#!/bin/sh
#
#+-#+-#+-#-+-#+-#+-#-+-#+-#+-#-+-#+-#+-#-+-#+-#+-#-+-#+-#+-#-+-#+-#+-#-+-#
#
# $SGE_ROOT/util/master_template
#
# DO NOT EDIT THIS FILE - this file is used as an template
# Don't change the markers #+-#+-#+-# and "#-#-#-#" , they will be removed
#
#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-
#
# SGE/SGEEE startup script
#
# (c) 2004 Sun Microsystems, Inc. Use is subject to license terms.  

#
# This script can be called with the following arguments:
#
#       start       start qmaster, scheduler or shadowd 
#       stop        Terminates scheduler and qmaster if we are on the master
#                   machine.
#       -qmaster    only starts qmaster and scheduler
#       -shadowd    start shadwod if found in the "shadow_masters" file
#       -migrate    shuts down qmaster and scheduler if they are running
#                   on another host and start the daemons on this host
#
# If the file "primary_qmaster" in the $SGE_ROOT/$SGE_CELL/common
# exists and it contains the hostname of the current machine and qmaster
# is running on another host it will be shut down and started on this host
#
# Unix commands which may be used in this script:
#    cat cut tr ls grep awk sed basename
#
# This script requires the script $SGE_ROOT/util/arch
#

PATH=/bin:/usr/bin:/sbin:/usr/sbin

#---------------------------------------------------------------------------
# The following lines provide the necessary info for adding a startup script
# according to the Linux Standard Base Specification (LSB) 1.0.0 which can
# be found at:
#
#    http://www.linuxbase.org/spec/gLSB/gLSB/sysinit.html
#
# See also
#
#    http://www.suse.de/~grimmer/Package-Conventions/index.html
#
### BEGIN INIT INFO
# Provides:       sgemaster 
# Required-Start: $network
# Required-Stop:
# Default-Start:  3 5
# Default-Stop: 0 1 2 6
# Description:  start Grid Engine qmaster, schedd, shadowd
### END INIT INFO
#---------------------------------------------------------------------------

SGE_ROOT=GENROOT; export SGE_ROOT
SGE_CELL=GENCELL; export SGE_CELL
SGE_QMASTER_PORT=GENSGE_QMASTER_PORT; export SGE_QMASTER_PORT 
SGE_EXECD_PORT=GENSGE_EXECD_PORT; export SGE_EXECD_PORT

unset CODINE_ROOT GRD_ROOT COD_CELL GRD_CELL

ARCH=`$SGE_ROOT/util/arch`
shlib_path_name=`$SGE_ROOT/util/arch -lib`
old_value=`eval echo '$'$shlib_path_name`
if [ x$old_value = x ]; then
   eval $shlib_path_name=$SGE_ROOT/lib/$ARCH
else
   eval $shlib_path_name=$old_value:$SGE_ROOT/lib/$ARCH
fi
export $shlib_path_name


#---------------------------------------------------------------------------
# Shutdown
# Send SIGTERM to process name $1 with pid in file $2
#
Shutdown()
{
   name=$1
   pidfile=$2
   if [ -f $pidfile ]; then
      pid=`cat $pidfile`
      maxretries=6
      i=0
      while [ $i -lt $maxretries ]; do
         $utilbin_dir/checkprog $pid $name > /dev/null
         if [ "$?" = 0 ]; then
            kill $pid
         else
            return
         fi
         sleep `expr 2 + $i`
         i=`expr $i + 1`

      done
      kill -9 $pid
   fi
}


#---------------------------------------------------------------------------
# QmasterSpoolDir
#    Return qmasters spool directory
#
QmasterSpoolDir()
{
   qma_spool_dir=`grep qmaster_spool_dir \
                      $SGE_ROOT/$SGE_CELL/common/bootstrap | \
                      awk '{ print $2 }'`
   echo $qma_spool_dir
}

#---------------------------------------------------------------------------
# CheckIfQmasterHost
#    If our hostname given in $1 is the same as in the "act_qmaster" file
#    echo "true" else echo "false"
#
CheckIfQmasterHost()
{
   host=$1

   if [ "$host" = "`cat $SGE_ROOT/$SGE_CELL/common/act_qmaster`" ]; then
      echo true
   else
      echo false
   fi
}

#---------------------------------------------------------------------------
# CheckIfPrimaryQmasterHost
#    Check if our hostname given in $1 is the same as in the
#    "primary_qmaster" file
#    echo true if there is our hostname else echo false
#
CheckIfPrimaryQmasterHost()
{
   host=$1

   fname=$SGE_ROOT/$SGE_CELL/common/primary_qmaster

   if [ -f $fname ]; then
      if [ "$host" = "`cat $fname`" ]; then
         echo true
      else
         echo false
      fi
   else
      echo false
   fi
}


#---------------------------------------------------------------------------
# CheckIfShadowMasterHost
#    Check if our hostname given in $1 is contained in the
#    "shadow_masters" file
#    echo true if there is our hostname else echo false
#
CheckIfShadowMasterHost()
{
   host=$1

   fname=$SGE_ROOT/$SGE_CELL/common/shadow_masters

   if [ -f $fname ]; then
      grep -i $host $fname 2>&1 > /dev/null
      if [ $? = 0 ]; then
         shadow_host="true"
      else
         shadow_host="false"
      fi
   else
      shadow_host="false"
   fi
}

#---------------------------------------------------------------------------
# GetPathToBinaries
#    echo the name of the bin_dir on this system
#    The check is fullfilled if we can access the qstat binary
#    echo "none" if we can't determine the binary path
GetPathToBinaries()
{
   cfgname=$SGE_ROOT/$SGE_CELL/common/bootstrap

   base=none

   if [ -f $cfgname ]; then
      base=`grep binary_path $cfgname | awk '{ print $2 }'`
      if [ -f $base/qstat ]; then
         :
      elif [ -f $SGE_ROOT/util/arch ]; then
         arch=`$SGE_ROOT/util/arch`
         if [ -f $base/$arch/qstat ]; then
               base=$base/$arch
         fi
      fi
   fi

   echo $base
}


#---------------------------------------------------------------------------
# GetAdminUser
#    echo the name of the admin user on this system
#    echo "root" if admin user retrieval fails
GetAdminUser()
{
   cfgname=$SGE_ROOT/$SGE_CELL/common/bootstrap
   user=none

   if [ -f $cfgname ]; then
      user=`grep admin_user $cfgname | awk '{ print $2 }'`
   fi

   if [ `echo $user|tr "A-Z" "a-z"` = "none" ]; then
      user=root
   fi
   echo $user
}

#---------------------------------------------------------------------------
# GetPathToUtilbin
#    echo the path to the binaires in utilbin
#    The check is fullfilled if we can access the "gethostname" binary
#    echo "none" if we can't determine the binary path
#
GetPathToUtilbin()
{
   base=none

   if [ -f $SGE_ROOT/util/arch ]; then
      utilbindir=$SGE_ROOT/utilbin

      arch=`$SGE_ROOT/util/arch`
      if [ -f $utilbindir/$arch/gethostname ]; then
         base=$utilbindir/$arch
      fi
   fi

   echo $base
}

#---------------------------------------------------------------------------
# CheckRunningQmaster
# checks, if sge_qmaster is running
# In error case the sge_qmaster didn't start, silently
#
CheckRunningQmaster()
{
   masterhost=`cat $SGE_ROOT/$SGE_CELL/common/act_qmaster`
   running=false
   loop=0

   if [ "$SGE_QMASTER_PORT" = "" ]; then
      SGE_QMASTER_PORT=`$utilbin_dir/getservbyname -number sge_qmaster`
   fi

   while [ $running = "false" -a $loop -ne 10 ]; do 
      $bin_dir/qping -info $masterhost $SGE_QMASTER_PORT qmaster 1 > /dev/null

      if [ "$?" = 0 ]; then
         running=true
      else
         sleep 3 
         loop=`expr $loop + 1`
      fi
   done

   if [ $running = "false" ]; then
      echo
      echo "sge_qmaster didn't start!"
      echo "Please check the messages file"
      echo
   fi
}

#---------------------------------------------------------------------------
usage()
{
   echo "Grid Engine start/stop script. Valid parameters are:"
   echo ""
   echo "   (no parameters): start qmaster and execution daemon if applicable"
   echo "   \"start\"        dto."
   echo "   \"stop\"         shutdown local Grid Engine processes and jobs"
   echo "   \"-qmaster\"     only start/stop qmaster and scheduler (if applicable)"
   echo "   \"-shadowd\"     only start/stop shadowd (if applicable)"   
   echo "   \"-migrate\"     shutdown qmaster/scheduler if it's running on another"
   echo "                    host and restart it on this host"
   echo "                    Migration only works if this host is an admin host"
   echo ""
   echo "Only one of the parameters \"start\", \"stop\" or \"softstop\" is allowed."
   echo "Only one of the parameters beginning  with \"-\" is allowed." 
   echo
   echo "Default argument is \"start\" for all components."
   echo "Default for \"stop\" is shutting down all components."
   echo
   exit 1
}


#---------------------------------------------------------------------------
# MAIN Procedure
#

if [ "$#" -gt 2 -o "$1" = "-h" -o "$1" = "help" ]; then
   usage
fi

startup=true
qmaster=true
shadowd=true
qstd=false
migrate_qmaster=false
softstop=false

for i in $*; do
   if [ "$i" = start ]; then
      startup=true
   elif [ "$i" = stop ]; then
      startup=false
   elif [ "$i" = softstop ]; then
      startup=false
      softstop=true
   elif [ "$i" = -qmaster ]; then
      qmaster=true
      shadowd=false
   elif [ "$i" = -shadowd ]; then
      qmaster=false
      shadowd=true
   elif [ "$i" = -migrate ]; then
      migrate_qmaster=true
      qmaster=true
      shadowd=false
   else
      usage
   fi
done

bin_dir=`GetPathToBinaries`
if [ "$bin_dir" = "none" ]; then
   echo "can't determine path to Grid Engine binaries"
   exit 1
fi

utilbin_dir=`GetPathToUtilbin`
if [ "$utilbin_dir" = "none" ]; then
   echo "can't determine path to Grid Engine utility binaries"
   exit 1
fi

HOST=`$utilbin_dir/gethostname -aname`
UQHOST=`$utilbin_dir/gethostname -aname | cut -f1 -d.`
qmaster_spool_dir=`QmasterSpoolDir`
CheckIfShadowMasterHost $HOST

if [ "$startup" = true ]; then

   # qmaster_host=true if qmaster was running on this host the last time
   #                   this host is an execution host

   qmaster_host=`CheckIfQmasterHost $HOST`
   primary_qmaster_host=`CheckIfPrimaryQmasterHost $HOST`

   if [ $qmaster = true -a $qmaster_host = false -a  \
        \( $primary_qmaster_host = true -o $migrate_qmaster = true \) ]; then
       actual_qmaster_host=`cat $SGE_ROOT/$SGE_CELL/common/act_qmaster`
       echo "   shutting down qmaster and scheduler on host \"$actual_qmaster_host\" ..."
       qconf_output=`$bin_dir/qconf -ks 2>&1 | grep "denied"`
       if [ "$qconf_output" != "" ]; then
          echo "   denied: host \"$HOST\" is no admin host."
          exit 1
       fi
       $bin_dir/qconf -km 2>&1 > /dev/null
       qmaster_sconf_info=`$bin_dir/qconf -sconf 2> /dev/null | grep "qmaster_spool_dir"`
       if [ "$qmaster_sconf_info" != "" ]; then
       #  qmaster is still running
          echo "   qmaster and scheduler still alive. Cannot migrate qmaster."
          exit 1
       fi
       qmaster_host=true
   fi

   if [ $qmaster = true -a $qmaster_host = true ]; then
      echo "   starting sge_qmaster"
      $bin_dir/sge_qmaster
      CheckRunningQmaster
      echo "   starting sge_schedd"
      $bin_dir/sge_schedd
   elif [ $qmaster = true -a $qmaster_host = false ]; then
      echo
      echo "sge_qmaster didn't start!"
      echo "This is not a qmaster host!"
      echo "Please, check your act_qmaster file!" 
      echo
   fi
      
   if [ $shadowd = true -a $shadow_host = true ]; then
      start_shadowd=false
        UQpidfile=$qmaster_spool_dir/shadowd_$UQHOST.pid
        pidfile=$qmaster_spool_dir/shadowd_$HOST.pid

      if [ -f $pidfile ]; then
         pid=`cat $pidfile`
         $utilbin_dir/checkprog $pid sge_shadowd > /dev/null
         if [ "$?" = 0 ]; then
            echo "   found running sge_shadowd - not starting"
            exit 1
         else
            start_shadowd=true
         fi
      elif [ -f $UQpidfile ]; then
           pid=`cat $UQpidfile`
           $utilbin_dir/checkprog $pid sge_shadowd > /dev/null
           if [ "$?" = 0 ]; then
              echo "   found running sge_shadowd - not starting"
              exit 1
           else
              start_shadowd=true
           fi
      fi

      if [ $start_shadowd = true ]; then
         echo "   starting sge_shadowd"
         $bin_dir/sge_shadowd
      fi
   fi
else
   if [ $shadow_host = true ]; then
      # Send SIGTERM to shadowd
      echo "   Shutting down Grid Engine shadowd"
      if [ -f $qmaster_spool_dir/shadowd_$UQHOST.pid ]; then
         Shutdown sge_shadowd $qmaster_spool_dir/shadowd_$UQHOST.pid
      elif [ -f $qmaster_spool_dir/shadowd_$HOST.pid ]; then
         Shutdown sge_shadowd $qmaster_spool_dir/shadowd_$HOST.pid
      fi
   fi

   if [ $qmaster = true ]; then
      if [ `CheckIfQmasterHost $HOST` = true ]; then
         # Send SIGTERM to scheduler
         echo "   Shutting down Grid Engine scheduler"
         Shutdown sge_schedd $qmaster_spool_dir/schedd/schedd.pid

         # Send SIGTERM to qmaster
         echo "   Shutting down Grid Engine qmaster"
         Shutdown sge_qmaster $qmaster_spool_dir/qmaster.pid
      fi
   fi

fi
