[torqueusers] Routing queue "exceeds queue resource limits"

Aquarijen aquarijen at gmail.com
Thu Nov 16 09:39:36 MST 2006


Hi Torque users,

Once again, I hope you all can help me -- you've been really great!

So I have 2 clusters that I would like to merge into one cluster as
far as torque goes.  They have identical hardware and they both have
an execution queue named the default "workq" and they have other
queues as well that have different names and resource constraints and
different acl_users and acl_groups already in place based on secondary
groups with acl_group_sloppy.

So I thought I'd use a routing queue, name it "workq" and have it
route these two groups of users to two new queues that have the
configurations (but not queue name) of the former "workq" queues.
----
What I have:

Cluster "A"
cnms_users submit to "workq" execution queue

Cluster "B"
ccsd_users submit to "workq" execution queue
----

What I want:

Cluster "AB"
cnms_users and ccsd_users submit to "workq" routing queue
Workq routes cnms_users to "cnmsq" execution queue based on acl_group
and acl_group sloppy
Workq routes ccsd_users to "ccsdq" execution queue based on acl_group
and acl_group_sloppy

----
When I try to do this, however, if I request more than nodes=2:ppn=2,
I get the same thing:
qsub: Job exceeds queue resource limits
But if I submit directly to the proper execution queue, I have no
problem and the job runs fine as long as I don't request more than
nodes=80:ppn=2.

And I don't understand why.  Perhaps I do not understand the purpose
of a routing queue?  I'll admit that possibility! :P

This is Torque 2.1.5
I also have maui 3.2.6p13
Running on RHEL4
64 bit.

and here's the queue and server config that I am trying to make work
for the combined cluster:
NOTE: the default queue is not the routing queue at present - does
this make a difference?
----------------------------------------------------------
# Create and define routing queue, workq
create queue workq
set queue workq queue_type = Route
set queue workq route_destinations = "cnmsq at b08l02,ccsdq at b08l02,dynaq at b08l02"
set queue workq enabled = True
set queue workq started = True

# Create and define ccsdq
create queue ccsdq
set queue ccsdq queue_type = Execution
set queue ccsdq resources_max.cput = 10000:00:00
set queue ccsdq resources_max.ncpus = 728
set queue ccsdq resources_max.nodect = 364
set queue ccsdq resources_max.walltime = 10000:00:00
set queue ccsdq resources_min.cput = 00:00:01
set queue ccsdq resources_min.ncpus = 1
set queue ccsdq resources_min.nodect = 1
set queue ccsdq resources_min.walltime = 00:00:01
set queue ccsdq resources_default.cput = 10:00:00
set queue ccsdq resources_default.ncpus = 1
set queue ccsdq resources_default.nodect = 1
set queue ccsdq resources_default.walltime = 10:00:00
set queue ccsdq resources_available.nodect = 364
set queue ccsdq acl_hosts = "b03n035,b03n036,b03n037,b03n038 <SNIP,
you get the idea>
set queue ccsdq acl_host_enable = false
set queue ccsdq acl_logic_or = true
set queue ccsdq acl_groups = ccsd
set queue ccsdq acl_groups += admin
set queue ccsdq acl_group_sloppy = true
set queue ccsdq acl_group_enable = true
set queue ccsdq enabled = True
set queue ccsdq started = True

# Create and define cnmsq
create queue cnmsq
set queue cnmsq queue_type = Execution
set queue cnmsq resources_max.cput = 10000:00:00
set queue cnmsq resources_max.ncpus = 728
set queue cnmsq resources_max.nodect = 364
set queue cnmsq resources_max.walltime = 10000:00:00
set queue cnmsq resources_min.cput = 00:00:01
set queue cnmsq resources_min.ncpus = 1
set queue cnmsq resources_min.nodect = 1
set queue cnmsq resources_min.walltime = 00:00:01
set queue cnmsq resources_default.cput = 10:00:00
set queue cnmsq resources_default.ncpus = 1
set queue cnmsq resources_default.nodect = 1
set queue cnmsq resources_default.walltime = 10:00:00
set queue cnmsq resources_available.nodect = 364
set queue cnmsq acl_hosts = "b03n035,b03n036,b03n037,<SNIP, you get the idea>
set queue cnmsq acl_host_enable = false
set queue cnmsq acl_logic_or = true
set queue cnmsq acl_groups = cnms
set queue cnmsq acl_groups += admin
set queue cnmsq acl_group_enable = true
set queue cnmsq acl_group_sloppy = true
set queue cnmsq enabled = True
set queue cnmsq started = True

#create and define dynaq for LS-Dyna work
create queue dynaq
set queue dynaq queue_type = Execution
set queue dynaq resources_max.cput = 10000:00:00
set queue dynaq resources_max.ncpus = 80
set queue dynaq resources_max.nodect = 40
set queue dynaq resources_max.nodes = 40
set queue dynaq resources_max.walltime = 10000:00:00
set queue dynaq resources_min.cput = 00:00:01
set queue dynaq resources_min.ncpus = 1
set queue dynaq resources_min.nodect = 1
set queue dynaq resources_min.walltime = 00:00:01
set queue dynaq resources_default.cput = 10:00:00
set queue dynaq resources_default.ncpus = 1
set queue dynaq resources_default.nodect = 1
set queue dynaq resources_default.walltime = 10:00:00
set queue dynaq resources_available.nodect = 40
set queue dynaq acl_hosts = "b05n001,b05n002,b05n003,<SNIP, you get the idea>
set queue dynaq acl_host_enable = false
set queue dynaq acl_users = qs9
set queue dynaq acl_users += 2vt
set queue dynaq acl_users += j3t
set queue dynaq acl_user_enable = true
set queue dynaq acl_logic_or = true
set queue dynaq enabled = True
set queue dynaq started = True
# Set server attributes.
#
set server scheduling = True
set server default_queue = cnmsq
set server log_events = 511
set server mail_from = root
set server query_other_jobs = True
set server resources_available.ncpus = 728
set server resources_available.nodect = 364
set server resources_available.nodes = 364
set server resources_default.neednodes = 1
set server resources_max.ncpus = 728
set server resources_max.nodes = 364
set server scheduler_iteration = 60
set server node_ping_rate = 300
set server node_check_rate = 150
set server tcp_timeout = 6
set server job_stat_rate = 30
set server acl_host_enable = FALSE

------------------------------------------------------------------------------

Thanks as always!!!!!!
Jennifer


More information about the torqueusers mailing list