[torqueusers] Associating a queue with specific nodes
Thomas H Dr Pierce
TPierce at rohmhaas.com
Sat Dec 30 13:29:46 MST 2006
Dear Torque Users,
I think I am trying to do simple batch management with Torque.
I want to use have two queues that execute jobs on different specific
nodes. Then I think it is a good idea to have a default queue as well.
So I want to have a queue called "fast" that runs on node07 or node08 ,
which are the fast nodes.... So I set two nodes to have a property =
d1950, and tried to require that property in the "fast" queue.
I want to have a queue called "medium" that executes jobs on node53 and
node54 , which are medium speed nodes. So i set two nodes to have the
property= d1850, and tried to set the "medium" queue to require that
property.
Eventually I would add a Engineering queue to run on 4 other separate
nodes that are licensed for engineering software, but I thought I should
have the "fast" and "medium" queue working before I tried that.
and, of course, it does not work.... Everything just runs on node07, the
first node in the nodes list. I cannot get any job to be submitted to the
node53 or node54, and all queues execute jobs on node07.
How can I partition the cluster out so I can assign node07 and node08 to a
specific queue, and node53 and node54 to a different queue?
All help is appreciated. Thanks.
background data:
the Daemons running are:
*********************************************************
****************** The Silvio Cluster *****************
** This Node is Master node for the Silvio Cluster **
*********************************************************
[root at silvio ~]# ps aux |grep pbs
root 1215 0.0 0.0 3812 1348 ? Ss Dec19 0:13
./pbs_server
root 29220 0.0 0.0 4516 1040 ? Ss Dec24 5:51 pbs_mom
root 29392 0.0 0.0 3760 828 ? Ss Dec26 0:01 ./pbs_sched
root 6173 0.0 0.0 4012 680 pts/1 S+ 15:09 0:00 grep pbs
Running pbs_mom here since silvio is the "defautl node..."
The queues are:
[root at silvio server_priv]# qmgr -c "p s"
#
# Create queues and set their attributes.
#
#
# Create and define queue batch
#
create queue batch
set queue batch queue_type = Execution
set queue batch resources_default.nodes = 1
set queue batch resources_default.walltime = 01:00:00
set queue batch enabled = True
set queue batch started = True
#
# Create and define queue fast
#
create queue fast
set queue fast queue_type = Execution
set queue fast Priority = 40
set queue fast max_running = 10
set queue fast resources_default.neednodes = d1950
set queue fast resources_default.nodes = 1
set queue fast enabled = True
set queue fast started = True
#
# Create and define queue medium
#
create queue medium
set queue medium queue_type = Execution
set queue medium Priority = 80
set queue medium max_running = 10
set queue medium resources_default.cput = 02:00:00
set queue medium resources_default.neednodes = d1850
set queue medium resources_default.nodes = 1
set queue medium enabled = True
set queue medium started = True
#
# Set server attributes.
#
set server scheduling = True
set server default_queue = batch
set server log_events = 511
set server mail_from = adm
set server scheduler_iteration = 600
set server node_check_rate = 150
set server tcp_timeout = 6
set server pbs_version = 2.1.6
The file /var/spool/torque/server_priv/nodes is:
[root at silvio server_priv]# cat nodes
node07 np=4 d1950
node08 np=2 d1950
node53 np=2 d1850
node54 np=2 d1850
silvio:ts np=2
[root at silvio ~]# pbsnodes -a
node07
state = free
np = 4
properties = d1950
ntype = cluster
status = opsys=linux,uname=Linux node07 2.6.9-42.ELsmp #1 SMP Wed Jul
12 23:32:02 EDT 2006
x86_64,sessions=4706,nsessions=1,nusers=1,idletime=1317790,totmem=6006972kb,availmem=5838656kb,physmem=4040900kb,ncpus=4,loadave=0.00,netload=3175545786,state=free,jobs=?
15201,rectime=1167509354
node08
state = free
np = 2
properties = d1950
ntype = cluster
status = opsys=linux,uname=Linux node08 2.6.9-42.ELsmp #1 SMP Wed Jul
12 23:32:02 EDT 2006
x86_64,sessions=4393,nsessions=1,nusers=1,idletime=1318754,totmem=6006972kb,availmem=5844760kb,physmem=4040900kb,ncpus=4,loadave=0.00,netload=371279658,state=free,jobs=?
15201,rectime=1167509343
node53
state = free
np = 2
properties = d1850
ntype = cluster
status = opsys=linux,uname=Linux node53 2.6.9-42.ELsmp #1 SMP Wed Jul
12 23:27:17 EDT 2006
i686,sessions=4275,nsessions=1,nusers=1,idletime=5526907,totmem=5146664kb,availmem=4983680kb,physmem=3115056kb,ncpus=4,loadave=0.00,netload=1644344957,state=free,jobs=?
15201,rectime=1167509379
node54
state = free
np = 2
properties = d1850
ntype = cluster
status = opsys=linux,uname=Linux node54 2.6.9-42.ELsmp #1 SMP Wed Jul
12 23:27:17 EDT 2006
i686,sessions=4299,nsessions=1,nusers=1,idletime=5281965,totmem=8148984kb,availmem=7901824kb,physmem=4151296kb,ncpus=4,loadave=0.00,netload=1644763048,state=free,jobs=?
15201,rectime=1167509385
silvio
state = free
np = 2
ntype = time-shared
status = opsys=linux,uname=Linux silvio 2.6.9-42.0.3.ELsmp #1 SMP Mon
Sep 25 17:28:02 EDT 2006 i686,sessions=6296 2443 7715 7723 7751 7790 7845
548 22516 32663 4598 5163 5555 29978 15490 4349 9053 8931 20405 5217 3160
8935 971 2070 21591 7156 31248
22333,nsessions=28,nusers=10,idletime=25171,totmem=8209744kb,availmem=7660184kb,physmem=8307556kb,ncpus=4,loadave=1.16,netload=2638619773,state=free,jobs=?
15201,rectime=1167509371
------
Sincerely,
Tom Pierce
Bldg 7/ Rm 207D - Spring House, PA
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://www.supercluster.org/pipermail/torqueusers/attachments/20061230/8da50954/attachment.html
More information about the torqueusers
mailing list