[torqueusers] no cpus available

Albino Aveleda bino at coc.ufrj.br
Wed Dec 13 04:29:12 MST 2006


Hi,

I had the same problem but in a cluster with 32 CPUs. I have fixed this 
increasing the resources_max.ncpus and setting the resources_min.ncpus. 
When I did this all jobs were running at the first queue in the route 
queue. And to fix this I have changed the order in the route queue, ie, 
queue with more CPUs first. Bellow, I show some queues to you see how I 
did this.

#
# Create and define queue b_1cpu
#
create queue b_1cpu
set queue b_1cpu queue_type = Execution
set queue b_1cpu Priority = 200
set queue b_1cpu max_running = 32
set queue b_1cpu resources_max.nodes = 1
set queue b_1cpu resources_default.ncpus = 1
set queue b_1cpu resources_default.nodect = 1
set queue b_1cpu resources_default.nodes = 1
set queue b_1cpu max_user_run = 4
set queue b_1cpu enabled = True
set queue b_1cpu started = True
#
# Create and define queue b_2cpus
#
create queue b_2cpus
set queue b_2cpus queue_type = Execution
set queue b_2cpus Priority = 180
set queue b_2cpus max_running = 16
set queue b_2cpus resources_max.ncpus = 32
set queue b_2cpus resources_max.nodes = 2
set queue b_2cpus resources_min.ncpus = 2
set queue b_2cpus resources_min.nodect = 2
set queue b_2cpus resources_default.ncpus = 2
set queue b_2cpus resources_default.nodect = 2
set queue b_2cpus resources_default.nodes = 2
set queue b_2cpus enabled = True
set queue b_2cpus started = True

...

#
# Create and define queue default
#
create queue default
set queue default queue_type = Route
set queue default max_running = 32
set queue default route_destinations += b_16cpus
set queue default route_destinations += b_8cpus
set queue default route_destinations += b_4cpus
set queue default route_destinations += b_2cpus
set queue default route_destinations += b_1cpu
set queue default enabled = True
set queue default started = True

[]´s,
Bibo

Quoting Yiannis Georgiadis <giannis at cc.uoa.gr>:

>
> Hi,
> I have two SMP machines
> my nodes file:
>
> aegean np=24
> uranus np=48
>
> but only few jobs getting ready to run
> many times the jobs are being queued
> with the pbs_sched log :
>           Not enough cpus available
>
> here is my configuration ..
>
> #
> #
> create queue router
> set queue router queue_type = Route
> set queue router route_destinations = medium
> set queue router enabled = True
> set queue router started = True
> #
> # Create and define queue medium
> #
> create queue medium
> set queue medium queue_type = Execution
> set queue medium max_queuable = 2
> set queue medium max_user_queuable = 1
> set queue medium max_running = 16
> set queue medium from_route_only = False
> set queue medium resources_max.ncpus = 1
> set queue medium resources_max.nodect = 1
> set queue medium resources_min.ncpus = 1
> set queue medium resources_min.nodect = 1
> set queue medium resources_default.ncpus = 1
> set queue medium resources_default.walltime = 48:00:00
> set queue medium max_user_run = 4
> set queue medium enabled = True
> set queue medium started = True
>
> create queue aheavy
> set queue aheavy queue_type = Execution
> set queue aheavy max_queuable = 2
> set queue aheavy max_user_queuable = 1
> set queue aheavy max_running = 8
> set queue aheavy resources_max.ncpus = 1
> set queue aheavy resources_min.ncpus = 1
> set queue aheavy resources_default.ncpus = 1
> set queue aheavy resources_default.nodes = aegean
> set queue aheavy resources_default.walltime = 240:00:00
> set queue aheavy max_user_run = 4
> set queue aheavy enabled = True
> set queue aheavy started = True
>
> create queue uparal-1
> set queue uparal-1 queue_type = Execution
> set queue uparal-1 max_queuable = 2
> set queue uparal-1 max_user_queuable = 1
> set queue uparal-1 max_running = 2
> set queue uparal-1 resources_max.ncpus = 16
> set queue uparal-1 resources_default.nodes = uranus
> set queue uparal-1 resources_default.walltime = 240:00:00
> set queue uparal-1 max_user_run = 1
> set queue uparal-1 enabled = True
> set queue uparal-1 started = True
>
>
> create queue uparal-2
> set queue uparal-2 queue_type = Execution
> set queue uparal-2 max_queuable = 2
> set queue uparal-2 max_user_queuable = 1
> set queue uparal-2 max_running = 6
> set queue uparal-2 resources_max.ncpus = 8
> set queue uparal-2 resources_min.ncpus = 2
> set queue uparal-2 resources_default.ncpus = 8
> set queue uparal-2 resources_default.nodes = uranus
> set queue uparal-2 resources_default.walltime = 240:00:00
> set queue uparal-2 max_user_run = 2
> set queue uparal-2 enabled = True
>
> create queue uheavy
> set queue uheavy queue_type = Execution
> set queue uheavy max_queuable = 2
> set queue uheavy max_user_queuable = 1
> set queue uheavy max_running = 8
> set queue uheavy resources_max.ncpus = 1
> set queue uheavy resources_default.ncpus = 1
> set queue uheavy resources_default.nodes = uranus
> set queue uheavy resources_default.walltime = 240:00:00
> set queue uheavy max_user_run = 4
> set queue uheavy enabled = True
> set queue uheavy started = True
>
> create queue aparal
> set queue aparal queue_type = Execution
> set queue aparal max_queuable = 2
> set queue aparal max_user_queuable = 1
> set queue aparal max_running = 4
> set queue aparal resources_max.ncpus = 8
> set queue aparal resources_min.ncpus = 2
> set queue aparal resources_default.ncpus = 8
> set queue aparal resources_default.nodes = aegean
> set queue aparal resources_default.walltime = 240:00:00
> set queue aparal max_user_run = 2
> set queue aparal enabled = True
> set queue aparal started = True
>
> set queue parallel-u2 queue_type = Execution
> set queue parallel-u2 max_running = 6
> set queue parallel-u2 resources_max.nodect = 1
> set queue parallel-u2 resources_min.ncpus = 2
> set queue parallel-u2 resources_default.ncpus = 8
> set queue parallel-u2 resources_default.nodect = 1
> set queue parallel-u2 resources_default.nodes = uranus
> set queue parallel-u2 resources_default.walltime = 240:00:00
> set queue parallel-u2 max_user_run = 2
> set queue parallel-u2 enabled = False
> set queue parallel-u2 started = True
>
> set server scheduling = True
> set server max_running = 24
> set server max_user_run = 8
> set server acl_host_enable = False
> set server acl_hosts = *.cc.uoa.gr
> set server acl_user_enable = False
> set server managers = giannis at erato
> set server operators = giannis at erato
> set server default_queue = medium
> set server log_events = 511
> set server mail_from = adm
> set server query_other_jobs = True
> set server scheduler_iteration = 600
> set server node_check_rate = 150
> set server tcp_timeout = 15
> set server log_level = 4
> set server pbs_version = 2.1.6
>
> few jobs are running now but
> why pbs_sched doesnt see the free cpus ?
>
> Any idea ?
>
>
> Yiannis Georgiadis
>
> Computer Center -Univ of Athens
>
>


More information about the torqueusers mailing list