[torqueusers] three queues work, one queue just disappears jobs

Thomas H Dr Pierce TPierce at rohmhaas.com
Wed Sep 10 13:11:45 MDT 2008


Dear Torque/Maui Users,

too weird.

I am very puzzled. I have three queues that work, a "fast", 'quad" and 
"fluent" queue.  The medium queue does not. Jobs submitted to it, just 
disappear. 
Any suggestions on my queue definitions or how to discover why the jobs do 
not actually run?

These jobs never write any output, they just disappear. they create a 
*.o1875 file and an *.e1875 file, both with zero bytes written.
I can run tracejob on them and this is what I get:

 tracejob 1875
 
/var/spool/torque/mom_logs/20080910: No matching job records located
/var/spool/torque/sched_logs/20080910: No such file or directory

Job: 1875.ralphie

09/10/2008 14:08:36  S    enqueuing into medium, state 1 hop 1
09/10/2008 14:08:36  S    Job Queued at request of  thp at ralphie, owner =
                           thp at ralphie, job name = med.para_Jaguar, queue 
=
                          medium
09/10/2008 14:08:37  S    Job Modified at request of root at ralphie
09/10/2008 14:08:37  S    Job Run at request of root at ralphie
09/10/2008 14:08:37  S    Job Modified at request of root at ralphie
09/10/2008 14:08:37  S    Exit_status=1 resources_used.cput=00:00:00
                          resources_used.mem=0kb resources_used.vmem=0kb
                          resources_used.walltime=00:00:00
09/10/2008 14:08:37  S    dequeuing from medium, state COMPLETE
o
Here is my qmgr and queue definitions:

$ qmgr -c "p s"
#
# Create queues and set their attributes.
#
#
# Create and define queue fast
#
create queue fast
set queue fast queue_type = Execution
set queue fast Priority = 40
set queue fast max_running = 64
set queue fast acl_host_enable = False
set queue fast acl_hosts = node19
set queue fast acl_hosts += node10
set queue fast resources_default.nodes = 1
set queue fast resources_available.nodect = 64
set queue fast enabled = True
set queue fast started = True
#
# Create and define queue medium
#
create queue medium
set queue medium queue_type = Execution
set queue medium Priority = 40
set queue medium max_running = 10
set queue medium acl_host_enable = False
set queue medium acl_hosts = node49
set queue medium acl_hosts += node48
set queue medium acl_hosts += node47
set queue medium acl_hosts += node46
set queue medium acl_hosts += node06
set queue medium acl_hosts += node45
set queue medium acl_hosts += node05
set queue medium acl_hosts += node54
set queue medium acl_hosts += node44
set queue medium acl_hosts += node53
set queue medium acl_hosts += node43
set queue medium acl_hosts += node52
set queue medium acl_hosts += node42
set queue medium acl_hosts += node51
set queue medium acl_hosts += node41
set queue medium acl_hosts += node50
set queue medium resources_default.nodes = 1
set queue medium resources_available.nodect = 40
set queue medium enabled = True
set queue medium started = True
#
# Create and define queue batch
#
create queue batch
set queue batch queue_type = Execution
set queue batch max_running = 4
set queue batch resources_default.nodes = 1
set queue batch resources_available.nodect = 2048
set queue batch enabled = True
set queue batch started = True
#
# Create and define queue fluent
#
create queue fluent
set queue fluent queue_type = Execution
set queue fluent Priority = 50
set queue fluent max_running = 25
set queue fluent acl_host_enable = False
set queue fluent acl_hosts = node26
set queue fluent acl_hosts += node25
set queue fluent resources_default.nodes = 1
set queue fluent resources_available.nodect = 30
set queue fluent enabled = True
set queue fluent started = True
#
# Create and define queue quad
#
create queue quad
set queue quad queue_type = Execution
set queue quad Priority = 40
set queue quad max_running = 40
set queue quad acl_host_enable = False
set queue quad acl_hosts = node29
set queue quad acl_hosts += node28
set queue quad resources_default.nodes = 1
set queue quad resources_available.nodect = 104
set queue quad enabled = True
set queue quad started = True
#
# Set server attributes.
#
set server scheduling = True
set server operators = root at ralphie
set server operators += rs0thp at ralphie
set server default_queue = batch
set server log_events = 511
set server mail_from = adm
set server query_other_jobs = True
set server resources_available.nodect = 2048
set server resources_default.nodes = 1
set server scheduler_iteration = 600
set server node_check_rate = 150
set server tcp_timeout = 6
set server pbs_version = 2.1.10

------
Sincerely,

   Tom Pierce
 
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://www.supercluster.org/pipermail/torqueusers/attachments/20080910/7f8e21e3/attachment.html


More information about the torqueusers mailing list