[torqueusers] single node / multi-node separation

Garrick Staples garrick at usc.edu
Tue Jan 10 11:48:12 MST 2006


On Thu, Jan 05, 2006 at 06:55:03PM -0600, Jacques Normand alleged:
> Hi,
> 
> I also have problems with the separation of single and multi nodes job
> on my opteron cluster. On the contrary to what I said in the first email
> (which never arrived, don't ask...), the default configuration does not
> allow multi-node jobs. The problems are the same in both case. 
> 
> echo "sleep 30 ; env" | qsub -q maintenance
> 51.janeway.rice.edu
> 
> echo "sleep 30 ; env" | qsub -l "walltime=00:01:00" -q maintenance
> 52.janeway.rice.edu
> 
> echo "sleep 30 ; env" | qsub -l "nodes=1,walltime=00:01:00" -q
> maintenance
> qsub: Job exceeds queue resource limits
> 
> I don't know what goes wrong in the configuration though. 
> 
> The configuration is attached, and I run 2.0.0p5-snap-1135358596 as well
> as 2.0.0p5-snap-1136437962. (due to a problem in 64b with qrerun
> mentioned on torquedev)
> 
> thanks 
> 
> jacques

The output below doesn't look like output from qmgr.  Can you send the
output of 'qmgr -c "print server"'?



> #
> # cleanup
> #
> delete queue default
> delete queue maintenance
> delete queue singlenode
> delete queue linda
> 
> #
> # Create queues and set their attributes.
> #
> #
> # Create and define queue maintenance
> #
> create queue maintenance
> set queue maintenance queue_type = Execution
> set queue maintenance Priority = 1024
> set queue maintenance acl_group_enable = True
> set queue maintenance acl_groups = admins
> set queue maintenance acl_user_enable = True
> set queue maintenance acl_users = root
> set queue maintenance acl_logic_or = True
> #set queue maintenance resources_max.nodect = 4 
> #set queue maintenance resources_max.walltime = 4800:00:00
> set queue maintenance resources_default.nodect = 1 
> set queue maintenance resources_default.walltime = 00:20:00
> set queue maintenance from_route_only = False
> #set queue maintenance max_running = 4
> set queue maintenance enabled = True
> set queue maintenance started = True
> #
> # Create and define queue singlenode
> #
> create queue singlenode
> set queue singlenode queue_type = Execution
> set queue singlenode acl_group_enable = False
> set queue singlenode Priority = 0
> set queue singlenode from_route_only = True
> set queue singlenode resources_max.nodect = 1
> set queue singlenode resources_max.walltime = 4800:00:00
> set queue singlenode enabled = True
> set queue singlenode started = True
> #
> # Create and define queue linda
> #
> create queue linda
> set queue linda queue_type = Execution
> set queue linda acl_group_enable = False
> set queue linda Priority = 0
> set queue linda from_route_only = True
> set queue linda resources_max.walltime = 4800:00:00
> set queue linda resources_min.nodes = 2
> set queue linda enabled = True
> set queue linda started = True
> #
> # Create and define queue default
> #
> create queue default
> set queue default queue_type = Route
> set queue default route_destinations = singlenode
> set queue default route_destinations += linda
> set queue default route_held_jobs = True
> set queue default route_waiting_jobs = True
> set queue default route_retry_time = 30
> set queue default route_lifetime = 3600
> set queue default resources_default.nodect = 1
> set queue default enabled = True
> set queue default started = True
> #
> # Set server attributes.
> #
> set server scheduling = False
> set server acl_roots = root
> set server default_queue = default
> set server log_events = 511
> set server query_other_jobs = True
> set server scheduler_iteration = 600
> set server node_pack = True
> 




> _______________________________________________
> torqueusers mailing list
> torqueusers at supercluster.org
> http://www.supercluster.org/mailman/listinfo/torqueusers


-- 
Garrick Staples, Linux/HPCC Administrator
University of Southern California
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: not available
Url : http://www.supercluster.org/pipermail/torqueusers/attachments/20060110/3211758e/attachment.bin


More information about the torqueusers mailing list