[torqueusers] node keeps down'ing itself :-(

Bent Terp Bent.Terp at biosci.ki.se
Wed Mar 9 01:09:27 MST 2005

I've got a trivial torque-1.2.0 install (everything on the same host), and 
have problems with the node state changing to down. I can force it to free 
with "pbsnodes -c", and it will process a couple of jobs and then down itself 

Any ideas what I'm doing wrong?

Host is FC3 x86_64, and the torque defs are:
[root at gemini torque]# qmgr -c "print server"
# Create queues and set their attributes.
# Create and define queue batch
create queue batch
set queue batch queue_type = Execution
set queue batch resources_default.nodect = 1
set queue batch resources_default.nodes = 1
set queue batch resources_default.walltime = 01:00:00
set queue batch resources_available.nodect = 1
set queue batch resources_available.nodes = 1
set queue batch enabled = True
set queue batch started = True
# Set server attributes.
set server scheduling = True
set server default_queue = batch
set server log_events = 511
set server mail_from = adm
set server resources_available.nodect = 1
set server resources_available.nodes = 1
set server scheduler_iteration = 10
set server node_ping_rate = 300
set server node_check_rate = 600
set server tcp_timeout = 6
set server default_node = gemini.cgb.ki.se#shared
[root at gemini torque]# qmgr -c "print node gemini.cgb.ki.se"
# Create nodes and set their properties.
# Create and define node gemini.cgb.ki.se
# create node gemini.cgb.ki.se  # unsupported operation
set node gemini.cgb.ki.se state = free
set node gemini.cgb.ki.se properties = all
set node gemini.cgb.ki.se ntype = cluster

MVH / With kind regards,
        Bent Terp <Bent.Terp at biosci.ki.se>
        System Administrator
        Bioinformatics and Expression Analysis Core Facility
        Karolinska Institute, Department of Biosciences
        Hälsovagen 7-9, 141 57 Huddinge, Sweden

More information about the torqueusers mailing list