[torqueusers] 2.5-beta another one

Stuart Barkley stuartb at 4gh.net
Fri Jul 23 08:53:52 MDT 2010


Sorry.  Another problem with 2.5 beta and arrays.

User types (incorrectly):

% qsub -t nodes=128:ppn=8 run_parallel.sh
qsub: End of File
%

pbs_server dies with:

# pbs_server -D
pbs_server is up
*** glibc detected *** pbs_server: double free or corruption (!prev): 0x0000000019245e60 ***
======= Backtrace: =========
/lib64/libc.so.6[0x32000722ef]
/lib64/libc.so.6(cfree+0x4b)[0x320007273b]
pbs_server[0x40bdda]
pbs_server[0x407d1a]
pbs_server[0x40853a]
pbs_server[0x423302]
pbs_server[0x41944d]
pbs_server[0x419c66]
/usr/local/lib/libtorque.so.2(wait_request+0x2ce)[0x2b0afb02bdee]
pbs_server[0x417fcc]
pbs_server[0x418955]
/lib64/libc.so.6(__libc_start_main+0xf4)[0x320001d994]
pbs_server[0x4065f9]
======= Memory map: ========
00400000-0044a000 r-xp 00000000 08:01 14793221                           /usr/local/sbin/pbs_server
0064a000-00650000 rw-p 0004a000 08:01 14793221                           /usr/local/sbin/pbs_server
00650000-01198000 rw-p 00650000 00:00 0
19143000-19264000 rw-p 19143000 00:00 0                                  [heap]
31ff000000-31ff01c000 r-xp 00000000 08:01 459062                         /lib64/ld-2.5.so
31ff21b000-31ff21c000 r--p 0001b000 08:01 459062                         /lib64/ld-2.5.so
31ff21c000-31ff21d000 rw-p 0001c000 08:01 459062                         /lib64/ld-2.5.so
3200000000-320014d000 r-xp 00000000 08:01 459063                         /lib64/libc-2.5.so
320014d000-320034d000 ---p 0014d000 08:01 459063                         /lib64/libc-2.5.so
320034d000-3200351000 r--p 0014d000 08:01 459063                         /lib64/libc-2.5.so
3200351000-3200352000 rw-p 00151000 08:01 459063                         /lib64/libc-2.5.so
3200352000-3200357000 rw-p 3200352000 00:00 0
320d200000-320d20d000 r-xp 00000000 08:01 459086                         /lib64/libgcc_s-4.1.2-20080825.so.1
320d20d000-320d40d000 ---p 0000d000 08:01 459086                         /lib64/libgcc_s-4.1.2-20080825.so.1
320d40d000-320d40e000 rw-p 0000d000 08:01 459086                         /lib64/libgcc_s-4.1.2-20080825.so.1
2b0afb00a000-2b0afb00c000 rw-p 2b0afb00a000 00:00 0
2b0afb00c000-2b0afb036000 r-xp 00000000 08:01 10059785                   /usr/local/lib/libtorque.so.2.0.0
2b0afb036000-2b0afb235000 ---p 0002a000 08:01 10059785                   /usr/local/lib/libtorque.so.2.0.0
2b0afb235000-2b0afb237000 rw-p 00029000 08:01 10059785                   /usr/local/lib/libtorque.so.2.0.0
2b0afb237000-2b0afb31a000 rw-p 2b0afb237000 00:00 0
2b0afb332000-2b0afb333000 rw-p 2b0afb332000 00:00 0
2b0afb333000-2b0afb368000 r--s 00000000 08:03 1048582                    /var/db/nscd/hosts
2b0afb368000-2b0afb369000 rw-p 2b0afb368000 00:00 0
2b0afb380000-2b0afb38a000 r-xp 00000000 08:01 458777                     /lib64/libnss_files-2.5.so
2b0afb38a000-2b0afb589000 ---p 0000a000 08:01 458777                     /lib64/libnss_files-2.5.so
2b0afb589000-2b0afb58a000 r--p 00009000 08:01 458777                     /lib64/libnss_files-2.5.so
2b0afb58a000-2b0afb58b000 rw-p 0000a000 08:01 458777                     /lib64/libnss_files-2.5.so
2b0afb58b000-2b0afb6c3000 rw-p 2b0afb58b000 00:00 0
2b0afb6c3000-2b0afb6f8000 r--s 00000000 08:03 1048578                    /var/db/nscd/passwd
2b0afb6f8000-2b0afb72d000 r--s 00000000 08:03 1048581                    /var/db/nscd/group
7fff07b4a000-7fff07c14000 rw-p 7ffffff35000 00:00 0                      [stack]
ffffffffff600000-ffffffffffe00000 ---p 00000000 00:00 0                  [vdso]
Aborted (core dumped)
#


# gdb -c core.21144 /usr/local/sbin/pbs_server
GNU gdb Fedora (6.8-37.el5)
Copyright (C) 2008 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-redhat-linux-gnu"...
Reading symbols from /usr/local/lib/libtorque.so.2...done.
Loaded symbols for /usr/local/lib/libtorque.so.2
Reading symbols from /lib64/libc.so.6...done.
Loaded symbols for /lib64/libc.so.6
Reading symbols from /lib64/ld-linux-x86-64.so.2...done.
Loaded symbols for /lib64/ld-linux-x86-64.so.2
Reading symbols from /lib64/libnss_files.so.2...done.
Loaded symbols for /lib64/libnss_files.so.2
Reading symbols from /lib64/libgcc_s.so.1...done.
Loaded symbols for /lib64/libgcc_s.so.1
Core was generated by `pbs_server -D'.
Program terminated with signal 6, Aborted.
[New process 21144]
#0  0x0000003200030265 in raise () from /lib64/libc.so.6
(gdb) bt
#0  0x0000003200030265 in raise () from /lib64/libc.so.6
#1  0x0000003200031d10 in abort () from /lib64/libc.so.6
#2  0x000000320006a84b in __libc_message () from /lib64/libc.so.6
#3  0x00000032000722ef in _int_free () from /lib64/libc.so.6
#4  0x000000320007273b in free () from /lib64/libc.so.6
#5  0x000000000040bdda in job_purge (pjob=0x19245e60) at job_func.c:1644
#6  0x0000000000407d1a in array_delete (pa=0x191f9210) at array_func.c:469
#7  0x000000000040853a in setup_array_struct (pjob=0x19245e60) at array_func.c:647
#8  0x0000000000423302 in req_commit (preq=0x192386f0) at req_quejob.c:1565
#9  0x000000000041944d in dispatch_request (sfds=10, request=0x192386f0) at process_request.c:764
#10 0x0000000000419c66 in process_request (sfds=10) at process_request.c:695
#11 0x00002b0afb02bdee in wait_request (waittime=<value optimized out>, SState=0x71eb18) at ../Libnet/net_server.c:507
#12 0x0000000000417fcc in main_loop () at pbsd_main.c:1186
#13 0x0000000000418955 in main (argc=2, argv=<value optimized out>) at pbsd_main.c:1741
(gdb) quit
#

Stuart


More information about the torqueusers mailing list