[torqueusers] Problem with PBS_NODEFILE

Albino Aveleda bino at coc.ufrj.br
Thu Mar 15 13:16:15 MDT 2007


Hi,

I submited this mpi job bellow in a cluster with 32 nodes, where each node
has two cpus. In my job I asked to torque 8 cpus but the PBS_NODEFILE has
only two cpus from first node. My torque configuration is bellow.

What do I do wrong? I can´t find where it is wrong.

Regards,
Bibo

+++ job +++
#PBS -l nodes=4:ppn=2
#PBS -l walltime=01:00:00
#PBS -j oe
#PBS -N mpi

# change directory
cd ${PBS_O_WORKDIR}
# get the number of nodes
NUM_NODES=`cat $PBS_NODEFILE | wc -l`
# start MPD on allocated nodes
mpdboot -n ${NUM_NODES} -f ${PBS_NODEFILE} -r rsh
# run on all nodes
mpiexec -n ${NUM_NODES} ./testmpi
# stop MPD on allocated nodes
mpdallexit
+++ end job +++

+++ torque configuration +++
#
# Create queues and set their attributes.
#
#
# Create and define queue default
#
create queue default
set queue default queue_type = Route
set queue default max_running = 64
set queue default route_destinations = b_8cpus
set queue default route_destinations += b_4cpus
set queue default route_destinations += b_2cpus
set queue default route_destinations += b_1cpu
set queue default enabled = True
set queue default started = True
#
# Create and define queue b_2cpus
#
create queue b_2cpus
set queue b_2cpus queue_type = Execution
set queue b_2cpus Priority = 180
set queue b_2cpus max_running = 32
set queue b_2cpus resources_min.ncpus = 2
set queue b_2cpus resources_min.nodect = 1
set queue b_2cpus resources_default.ncpus = 2
set queue b_2cpus resources_default.nodect = 1
set queue b_2cpus resources_default.nodes = 1
set queue b_2cpus enabled = True
set queue b_2cpus started = True
#
# Create and define queue b_1cpu
#
create queue b_1cpu
set queue b_1cpu queue_type = Execution
set queue b_1cpu Priority = 200
set queue b_1cpu max_running = 64
set queue b_1cpu resources_default.ncpus = 1
set queue b_1cpu resources_default.nodect = 1
set queue b_1cpu resources_default.nodes = 1
set queue b_1cpu max_user_run = 4
set queue b_1cpu enabled = True
set queue b_1cpu started = True
#
# Create and define queue b_4cpus
#
create queue b_4cpus
set queue b_4cpus queue_type = Execution
set queue b_4cpus Priority = 160
set queue b_4cpus max_running = 16
set queue b_4cpus resources_min.ncpus = 3
set queue b_4cpus resources_min.nodect = 2
set queue b_4cpus resources_default.ncpus = 4
set queue b_4cpus resources_default.nodect = 2
set queue b_4cpus resources_default.nodes = 2
set queue b_4cpus enabled = True
set queue b_4cpus started = True
#
# Create and define queue b_8cpus
#
create queue b_8cpus
set queue b_8cpus queue_type = Execution
set queue b_8cpus Priority = 140
set queue b_8cpus max_running = 8
set queue b_8cpus resources_min.ncpus = 5
set queue b_8cpus resources_min.nodect = 3
set queue b_8cpus resources_default.ncpus = 8
set queue b_8cpus resources_default.nodect = 4
set queue b_8cpus resources_default.nodes = 4
set queue b_8cpus enabled = True
set queue b_8cpus started = True
#
# Set server attributes.
#
set server scheduling = True
set server acl_host_enable = False
set server managers = root at adm
set server operators = root at adm
set server default_queue = default
set server log_events = 511
set server mail_from = pbs
set server query_other_jobs = True
set server resources_available.nodect = 32
set server resources_default.ncpus = 1
set server resources_default.nodect = 1
set server resources_default.nodes = 1
set server scheduler_iteration = 600
set server node_check_rate = 150
set server tcp_timeout = 6
set server node_pack = False
set server pbs_version = 2.1.7
+++ end torque configuration +++



More information about the torqueusers mailing list