#!/bin/sh # # torque This script is a wrapper around the torque daemons to easily manage Torque. # # chkconfig: 35 96 5 # description: This script is a wrapper around the torque daemons to easily manage Torque. # # Source the library functions . /etc/rc.d/init.d/functions SERVERSPOOLDIR=/var/spool/torque # Check if memlock is set in /etc/security/limits.conf # Behave like PAM and respect the last entry in limits.conf memlock=$(awk '/^[^#|.]*memlock/ { print $4 }' /etc/security/limits.conf | tail -1) # Apply memlock limit if value is valid if [[ $memlock == [0-9]* ]] || [[ $memlock == 'unlimited' ]]; then ulimit -l $memlock fi # A small function to stop a service if it is running stop_service() { status $1 &> /dev/null if [ "$?" -eq "0" ]; then /etc/init.d/$1 stop &> /dev/null fi } # let see how we were called case "$1" in start) echo "Starting TORQUE Services: " if [ "`cat ${SERVERSPOOLDIR}/server_priv/nodes | wc -l`" -eq "0" ]; then $0 config-nodes fi echo -n "Checking TORQUE Node Startup Script: " if [ ! -x /etc/beowulf/init.d/90torque ]; then echo_warning echo else echo_success echo fi /etc/init.d/pbs_server start /etc/init.d/pbs_sched start RET=$? ;; cluster-start) echo "Starting Cluster wide TORQUE Services: " echo -n "Checking TORQUE Node Startup Script: " if [ ! -x /etc/beowulf/init.d/90torque ]; then echo_failure echo echo "Error: /etc/beowulf/init.d/90torque is not executable." exit 1 else echo_success echo fi status pbs_sched &> /dev/null if [ "$?" -ne "0" ]; then /etc/init.d/pbs_sched start fi status pbs_server &> /dev/null if [ "$?" -ne "0" ]; then /etc/init.d/pbs_server start fi echo -n "Starting Compute Node Daemons: " for node in `bpstat -n allup` do NODE=$node /etc/beowulf/init.d/90torque &> /dev/null if [ "$?" -ne "0" ]; then echo -n "Node $node unable to start: " echo_warning echo WARN=1 fi done if [ -n "$WARN" ]; then echo -n "Some nodes were unable to start the daemon: " echo_warning else echo_success fi echo RET=0 ;; stop) echo "Shutting down TORQUE Services: " /etc/init.d/pbs_sched stop /etc/init.d/pbs_server stop RET=$? ;; cluster-stop) echo "Shutting down Cluster wide TORQUE Services: " echo -n "Shutting down Cluster wide MOMs: " skill -9 pbs_mom if [ "$?" -ne "0" ]; then echo_failure else echo_success fi echo /etc/init.d/pbs_sched stop /etc/init.d/pbs_server stop RET=$? ;; status) /etc/init.d/pbs_server status /etc/init.d/pbs_mom status /etc/init.d/pbs_sched status RET=$? ;; restart) $0 stop $0 start ;; cluster-restart) $0 cluster-stop $0 cluster-start ;; reconfigure) $0 config-nodes $0 config-mom $0 config-server $0 config-db ;; config-nodes) echo -n "Configuring Nodes File: " stop_service pbs_sched stop_service pbs_server if [ -e ${SERVERSPOOLDIR}/server_priv/nodes ]; then mv ${SERVERSPOOLDIR}/server_priv/nodes ${SERVERSPOOLDIR}/server_priv/nodes.oldconfig fi for node in `bpstat -n | tr '\n' ' '` do HOSTNAME=`getent hosts .${node} | awk '{print $2}'` NUMPROCS=`beostat -N $node -c | awk -F" " '/num/ {print $4}'` if [ -z "$NUMPROCS" ]; then echo echo -n "No Information for Node $node" echo_warning WARN=1 else echo "${HOSTNAME} np=${NUMPROCS}" >> ${SERVERSPOOLDIR}/server_priv/nodes fi done if [ -n "$WARN" ]; then echo echo -n "Be Sure All Nodes are Booted and Rerun $1" echo_warning else echo_success fi echo RET=0 ;; config-mom) echo -n "Configuring TORQUE MOM: " if [ -e ${SERVERSPOOLDIR}/mom_priv/config ]; then mv ${SERVERSPOOLDIR}/mom_priv/config ${SERVERSPOOLDIR}/mom_priv/config.oldconfig fi CLUSTERDEV=`beoconfig interface` if [[ "${CLUSTERDEV}" == "none" ]]; then echo echo -n "Cannont Configure MOM: Configure Cluster Interface First" echo_failure echo RET=1 else echo "\$pbsserver master" >> ${SERVERSPOOLDIR}/mom_priv/config echo "\$usecp *:/home /home" >> ${SERVERSPOOLDIR}/mom_priv/config echo_success echo RET=0 fi ;; config-server) echo -n "Configuring TORQUE server_name: " if [ -e ${SERVERSPOOLDIR}/server_name ]; then mv ${SERVERSPOOLDIR}/server_name ${SERVERSPOOLDIR}/server_name.oldconfig fi echo `/bin/hostname` >> ${SERVERSPOOLDIR}/server_name echo_success echo RET=0 ;; config-db) echo -n "Configuring TORQUE's Database: " stop_service pbs_sched stop_service pbs_server # Do jobs exist in the queue? If so, move them to jobs.oldconfig OLDJOBS=$(ls ${SERVERSPOOLDIR}/server_priv/jobs/* 2>/dev/null) if [[ ! -z "$OLDJOBS" ]]; then mkdir -m 750 -p ${SERVERSPOOLDIR}/server_priv/jobs.oldconfig mv -f ${SERVERSPOOLDIR}/server_priv/jobs/* ${SERVERSPOOLDIR}/server_priv/jobs.oldconfig/ fi if [ -e ${SERVERSPOOLDIR}/server_priv/serverdb ]; then mv ${SERVERSPOOLDIR}/server_priv/serverdb ${SERVERSPOOLDIR}/server_priv/serverdb.oldconfig fi /usr/sbin/torque.setup root &> /dev/null killproc pbs_server rm -f /var/lock/subsys/pbs_server echo_success echo RET=0 ;; *) echo "Usage: torque {start|stop|cluster-start|cluster-stop|cluster-restart|restart|status|reconfigure|config-mom|config-nodes|config-server|config-db}" exit 1 esac exit $RET