[torqueusers] Associating a queue with specific nodes

Thomas H Dr Pierce TPierce at rohmhaas.com
Sat Dec 30 13:29:46 MST 2006


Dear Torque Users,

I think I am trying to do simple batch management with Torque.

I want to use have two queues that execute jobs on different specific 
nodes. Then I think it is a good idea to have a default queue as well.

So I want to have a queue called "fast" that runs on node07 or node08 , 
which are the fast nodes.... So I set two nodes to have a property = 
d1950, and tried to require that property in the "fast" queue.

I want to have a queue called "medium" that executes jobs on node53 and 
node54 , which are medium speed nodes. So i set two nodes to have the 
property= d1850, and tried to set the "medium" queue to require that 
property.

Eventually I would add a Engineering queue to run on 4 other separate 
nodes that are licensed for engineering software, but I thought I should 
have the "fast" and "medium" queue working before I tried that. 

and, of course, it does not work.... Everything just runs on node07, the 
first node in the nodes list.  I cannot get any job to be submitted to the 
node53 or node54, and all queues execute jobs on node07. 

How can I partition the cluster out so I can assign node07 and node08 to a 
specific queue, and node53 and node54 to a different queue?

All help is appreciated. Thanks. 

background data: 
the Daemons running are:

*********************************************************
******************  The Silvio Cluster  *****************
**   This Node is Master node for the Silvio Cluster   **
*********************************************************
[root at silvio ~]# ps aux |grep pbs
root      1215  0.0  0.0  3812 1348 ?        Ss   Dec19   0:13 
./pbs_server
root     29220  0.0  0.0  4516 1040 ?        Ss   Dec24   5:51 pbs_mom
root     29392  0.0  0.0  3760  828 ?        Ss   Dec26   0:01 ./pbs_sched
root      6173  0.0  0.0  4012  680 pts/1    S+   15:09   0:00 grep pbs

Running pbs_mom here since silvio is the "defautl node..."

The queues are:
[root at silvio server_priv]# qmgr -c "p s"
#
# Create queues and set their attributes.
#
#
# Create and define queue batch
#
create queue batch
set queue batch queue_type = Execution
set queue batch resources_default.nodes = 1
set queue batch resources_default.walltime = 01:00:00
set queue batch enabled = True
set queue batch started = True
#
# Create and define queue fast
#
create queue fast
set queue fast queue_type = Execution
set queue fast Priority = 40
set queue fast max_running = 10
set queue fast resources_default.neednodes = d1950
set queue fast resources_default.nodes = 1
set queue fast enabled = True
set queue fast started = True
#
# Create and define queue medium
#
create queue medium
set queue medium queue_type = Execution
set queue medium Priority = 80
set queue medium max_running = 10
set queue medium resources_default.cput = 02:00:00
set queue medium resources_default.neednodes = d1850
set queue medium resources_default.nodes = 1
set queue medium enabled = True
set queue medium started = True
#
# Set server attributes.
#
set server scheduling = True
set server default_queue = batch
set server log_events = 511
set server mail_from = adm
set server scheduler_iteration = 600
set server node_check_rate = 150
set server tcp_timeout = 6
set server pbs_version = 2.1.6


The file /var/spool/torque/server_priv/nodes is:
[root at silvio server_priv]# cat nodes
node07 np=4 d1950
node08 np=2 d1950
node53 np=2 d1850
node54 np=2 d1850
silvio:ts np=2

[root at silvio ~]# pbsnodes -a
node07
     state = free
     np = 4
     properties = d1950
     ntype = cluster
     status = opsys=linux,uname=Linux node07 2.6.9-42.ELsmp #1 SMP Wed Jul 
12 23:32:02 EDT 2006 
x86_64,sessions=4706,nsessions=1,nusers=1,idletime=1317790,totmem=6006972kb,availmem=5838656kb,physmem=4040900kb,ncpus=4,loadave=0.00,netload=3175545786,state=free,jobs=? 
15201,rectime=1167509354

node08
     state = free
     np = 2
     properties = d1950
     ntype = cluster
     status = opsys=linux,uname=Linux node08 2.6.9-42.ELsmp #1 SMP Wed Jul 
12 23:32:02 EDT 2006 
x86_64,sessions=4393,nsessions=1,nusers=1,idletime=1318754,totmem=6006972kb,availmem=5844760kb,physmem=4040900kb,ncpus=4,loadave=0.00,netload=371279658,state=free,jobs=? 
15201,rectime=1167509343

node53
     state = free
     np = 2
     properties = d1850
     ntype = cluster
     status = opsys=linux,uname=Linux node53 2.6.9-42.ELsmp #1 SMP Wed Jul 
12 23:27:17 EDT 2006 
i686,sessions=4275,nsessions=1,nusers=1,idletime=5526907,totmem=5146664kb,availmem=4983680kb,physmem=3115056kb,ncpus=4,loadave=0.00,netload=1644344957,state=free,jobs=? 
15201,rectime=1167509379

node54
     state = free
     np = 2
     properties = d1850
     ntype = cluster
     status = opsys=linux,uname=Linux node54 2.6.9-42.ELsmp #1 SMP Wed Jul 
12 23:27:17 EDT 2006 
i686,sessions=4299,nsessions=1,nusers=1,idletime=5281965,totmem=8148984kb,availmem=7901824kb,physmem=4151296kb,ncpus=4,loadave=0.00,netload=1644763048,state=free,jobs=? 
15201,rectime=1167509385

silvio
     state = free
     np = 2
     ntype = time-shared
     status = opsys=linux,uname=Linux silvio 2.6.9-42.0.3.ELsmp #1 SMP Mon 
Sep 25 17:28:02 EDT 2006 i686,sessions=6296 2443 7715 7723 7751 7790 7845 
548 22516 32663 4598 5163 5555 29978 15490 4349 9053 8931 20405 5217 3160 
8935 971 2070 21591 7156 31248 
22333,nsessions=28,nusers=10,idletime=25171,totmem=8209744kb,availmem=7660184kb,physmem=8307556kb,ncpus=4,loadave=1.16,netload=2638619773,state=free,jobs=? 
15201,rectime=1167509371


 ------
Sincerely,

   Tom Pierce
    Bldg 7/ Rm 207D - Spring House, PA
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://www.supercluster.org/pipermail/torqueusers/attachments/20061230/8da50954/attachment.html


More information about the torqueusers mailing list