[torqueusers] node keeps down'ing itself :-(
Bent Terp
Bent.Terp at biosci.ki.se
Wed Mar 9 01:09:27 MST 2005
Hi!
I've got a trivial torque-1.2.0 install (everything on the same host), and
have problems with the node state changing to down. I can force it to free
with "pbsnodes -c", and it will process a couple of jobs and then down itself
again.
Any ideas what I'm doing wrong?
Host is FC3 x86_64, and the torque defs are:
[root at gemini torque]# qmgr -c "print server"
#
# Create queues and set their attributes.
#
#
# Create and define queue batch
#
create queue batch
set queue batch queue_type = Execution
set queue batch resources_default.nodect = 1
set queue batch resources_default.nodes = 1
set queue batch resources_default.walltime = 01:00:00
set queue batch resources_available.nodect = 1
set queue batch resources_available.nodes = 1
set queue batch enabled = True
set queue batch started = True
#
# Set server attributes.
#
set server scheduling = True
set server default_queue = batch
set server log_events = 511
set server mail_from = adm
set server resources_available.nodect = 1
set server resources_available.nodes = 1
set server scheduler_iteration = 10
set server node_ping_rate = 300
set server node_check_rate = 600
set server tcp_timeout = 6
set server default_node = gemini.cgb.ki.se#shared
[root at gemini torque]# qmgr -c "print node gemini.cgb.ki.se"
#
# Create nodes and set their properties.
#
#
# Create and define node gemini.cgb.ki.se
#
# create node gemini.cgb.ki.se # unsupported operation
set node gemini.cgb.ki.se state = free
set node gemini.cgb.ki.se properties = all
set node gemini.cgb.ki.se ntype = cluster
--
MVH / With kind regards,
Bent Terp <Bent.Terp at biosci.ki.se>
System Administrator
Bioinformatics and Expression Analysis Core Facility
Karolinska Institute, Department of Biosciences
Hälsovagen 7-9, 141 57 Huddinge, Sweden
More information about the torqueusers
mailing list