[torqueusers] defining queues by user defined node features

P Spencer Davis psdavis at bsu.edu
Fri Sep 14 13:47:43 MDT 2007


Hello,
   I'm running v 2.1.6 of PBS as a resource manager with v 3.2.6p19 of 
the Maui scheduler. All the compute nodes are running RHEL 4 with the 
2.6.9-55 kernel. The cluster is heterogious, 32 of the nodes are 32 bit 
dual processor, and the other 32 are 64 bit dual processor. The nodes 
file in server_priv is configured as follows (edited for brevity)
...
n31 np=2 x86
n32 np=2 x86-64
...

with the idea being that submitting a job with nodes=x86-64 will select 
a 64 bit node. This worked fine until I created a routing queue with a 
short and a long execution queue, now the jobs are routed in a haphazard 
way. I tried creating short and long queues with the following properties:
Queue short-64
         queue_type = Execution
         total_jobs = 0
         state_count = Transit:0 Queued:0 Held:0 Waiting:0 Running:0 
Exiting:0
         resources_max.walltime = 24:00:00
         resources_default.neednodes = x86-64
         resources_default.nodes = x86-64
         mtime = Fri Sep 14 14:25:56 2007
         enabled = True
         started = True
and they work fine as long as I submit jobs directly to them, but if the 
job is submitted to the default routing queue, it will only be routed by 
cpu or walltime.
                    Any insight is appricaited,
                                    Spencer
Here are my queue defintions:
Queue short
         queue_type = Execution
         Priority = 20
         max_queuable = 62
         total_jobs = 4
         state_count = Transit:0 Queued:0 Held:0 Waiting:0 Running:4 
Exiting:0
         from_route_only = True
         resources_max.cput = 24:00:00
         resources_max.walltime = 24:00:00
         resources_min.cput = 00:00:00
         resources_default.neednodes = x86
         resources_default.nodes = x86
         mtime = Fri Sep 14 14:27:28 2007
         resources_assigned.mem = 16777216b
         resources_assigned.nodect = 4
         enabled = True
         started = True

Queue routing
         queue_type = Route
         total_jobs = 0
         state_count = Transit:0 Queued:0 Held:0 Waiting:0 Running:0 
Exiting:0
         resources_default.walltime = 00:10:00
         mtime = Fri Sep 14 14:06:20 2007
         route_destinations = short,long,long-64,short-64
         route_held_jobs = True
         route_waiting_jobs = True
         route_retry_time = 120
         route_lifetime = 604800
         enabled = True
         started = True

Queue long-64
         queue_type = Execution
         total_jobs = 0
         state_count = Transit:0 Queued:0 Held:0 Waiting:0 Running:0 
Exiting:0
         resources_min.walltime = 24:00:00
         resources_default.neednodes = x86-64
         mtime = Fri Sep 14 14:42:06 2007
         enabled = True
         started = True

Queue bsu-research
         queue_type = Execution
         Priority = 80
         total_jobs = 0
         state_count = Transit:0 Queued:0 Held:0 Waiting:0 Running:0 
Exiting:0
         from_route_only = False
         acl_group_enable = True
         acl_groups = ccnstaff
         mtime = Tue Aug 21 12:34:26 2007
         enabled = True
         started = True

Queue long
         queue_type = Execution
         Priority = 20
         max_queuable = 62
         total_jobs = 0
         state_count = Transit:0 Queued:0 Held:0 Waiting:0 Running:0 
Exiting:0
         acl_host_enable = False
         from_route_only = True
         resources_min.cput = 24:00:01
         resources_min.walltime = 24:00:01
         resources_default.neednodes = x86
         mtime = Fri Sep 14 14:01:39 2007
         resources_assigned.mem = 0b
         resources_assigned.nodect = 0
         enabled = True
         started = True

Queue short-64
         queue_type = Execution
         total_jobs = 0
         state_count = Transit:0 Queued:0 Held:0 Waiting:0 Running:0 
Exiting:0
         resources_max.walltime = 24:00:00
         resources_default.neednodes = x86-64
         resources_default.nodes = x86-64
         mtime = Fri Sep 14 14:25:56 2007
         enabled = True
         started = True

my server configuration
Server ccncluster.bsu.edu
         server_state = Active
         scheduling = True
         total_jobs = 4
         state_count = Transit:0 Queued:0 Held:0 Waiting:0 Running:4 
Exiting:0
         managers =
         operators =
         default_queue = routing
         log_events = 511
         mail_from = adm
         resources_default.mem = 4mb
         resources_assigned.mem = 16777216b
         resources_assigned.nodect = 4
         scheduler_iteration = 600
         node_check_rate = 150
         tcp_timeout = 6
         node_pack = False
         pbs_version = 2.1.6


and the maui configuration:

# maui.cfg 3.2.6p19

SERVERHOST            somehost.nowhere.net
# primary admin must be first in list
ADMIN1               00notreal

# Resource Manager Definition

#RMCFG[SOMEHOST] TYPE=PBS at RMNMHOST@
RMCFG[base]   TYPE=PBS

# Allocation Manager Definition

AMCFG[bank]  TYPE=NONE

# full parameter docs at http://supercluster.org/mauidocs/a.fparameters.html
# use the 'schedctl -l' command to display current configuration

RMPOLLINTERVAL        00:00:30

SERVERPORT            42559
SERVERMODE            NORMAL

# Admin: http://supercluster.org/mauidocs/a.esecurity.html


LOGFILE               maui.log
LOGFILEMAXSIZE        10000000
LOGLEVEL              3

# Job Priority: http://supercluster.org/mauidocs/5.1jobprioritization.html

QUEUETIMEWEIGHT       1

# FairShare: http://supercluster.org/mauidocs/6.3fairshare.html

#FSPOLICY              PSDEDICATED
#FSDEPTH               7
#FSINTERVAL            86400
#FSDECAY               0.80

# Throttling Policies: 
http://supercluster.org/mauidocs/6.2throttlingpolicies.html

# NONE SPECIFIED

# Backfill: http://supercluster.org/mauidocs/8.2backfill.html

BACKFILLPOLICY        NONE
RESERVATIONPOLICY     CURRENTHIGHEST

# Maui Feature polices

ENABLEMULTIREQJOBS TRUE

# Node Allocation: http://supercluster.org/mauidocs/5.2nodeallocation.html

NODEALLOCATIONPOLICY MINRESOURCE

# QOS: http://supercluster.org/mauidocs/7.3qos.html

# QOSCFG[hi]  PRIORITY=100 XFTARGET=100 FLAGS=PREEMPTOR:IGNMAXJOB
# QOSCFG[low] PRIORITY=-1000 FLAGS=PREEMPTEE

# Standing Reservations: 
http://supercluster.org/mauidocs/7.1.3standingreservations.html

# SRSTARTTIME[test] 8:00:00
# SRENDTIME[test]   17:00:00
# SRDAYS[test]      MON TUE WED THU FRI
# SRTASKCOUNT[test] 20
# SRMAXTIME[test]   0:30:00

# Creds: http://supercluster.org/mauidocs/6.1fairnessoverview.html

# USERCFG[DEFAULT]      FSTARGET=25.0
# USERCFG[john]         PRIORITY=100  FSTARGET=10.0-
# GROUPCFG[staff]       PRIORITY=1000 QLIST=hi:low QDEF=hi
# CLASSCFG[batch]       FLAGS=PREEMPTEE
# CLASSCFG[interactive] FLAGS=PREEMPTOR




More information about the torqueusers mailing list