[torqueusers] 2.5-beta another one

Glen Beane glen.beane at gmail.com
Wed Jul 28 20:04:11 MDT 2010


On Fri, Jul 23, 2010 at 10:53 AM, Stuart Barkley <stuartb at 4gh.net> wrote:
> Sorry.  Another problem with 2.5 beta and arrays.
>
> User types (incorrectly):
>
> % qsub -t nodes=128:ppn=8 run_parallel.sh
> qsub: End of File
> %
>
> pbs_server dies with:
>
> # pbs_server -D
> pbs_server is up
> *** glibc detected *** pbs_server: double free or corruption (!prev): 0x0000000019245e60 ***
> ======= Backtrace: =========
> /lib64/libc.so.6[0x32000722ef]
> /lib64/libc.so.6(cfree+0x4b)[0x320007273b]
> pbs_server[0x40bdda]
> pbs_server[0x407d1a]
> pbs_server[0x40853a]
> pbs_server[0x423302]
> pbs_server[0x41944d]
> pbs_server[0x419c66]
> /usr/local/lib/libtorque.so.2(wait_request+0x2ce)[0x2b0afb02bdee]
> pbs_server[0x417fcc]
> pbs_server[0x418955]
> /lib64/libc.so.6(__libc_start_main+0xf4)[0x320001d994]
> pbs_server[0x4065f9]
> ======= Memory map: ========
> 00400000-0044a000 r-xp 00000000 08:01 14793221                           /usr/local/sbin/pbs_server
> 0064a000-00650000 rw-p 0004a000 08:01 14793221                           /usr/local/sbin/pbs_server
> 00650000-01198000 rw-p 00650000 00:00 0
> 19143000-19264000 rw-p 19143000 00:00 0                                  [heap]
> 31ff000000-31ff01c000 r-xp 00000000 08:01 459062                         /lib64/ld-2.5.so
> 31ff21b000-31ff21c000 r--p 0001b000 08:01 459062                         /lib64/ld-2.5.so
> 31ff21c000-31ff21d000 rw-p 0001c000 08:01 459062                         /lib64/ld-2.5.so
> 3200000000-320014d000 r-xp 00000000 08:01 459063                         /lib64/libc-2.5.so
> 320014d000-320034d000 ---p 0014d000 08:01 459063                         /lib64/libc-2.5.so
> 320034d000-3200351000 r--p 0014d000 08:01 459063                         /lib64/libc-2.5.so
> 3200351000-3200352000 rw-p 00151000 08:01 459063                         /lib64/libc-2.5.so
> 3200352000-3200357000 rw-p 3200352000 00:00 0
> 320d200000-320d20d000 r-xp 00000000 08:01 459086                         /lib64/libgcc_s-4.1.2-20080825.so.1
> 320d20d000-320d40d000 ---p 0000d000 08:01 459086                         /lib64/libgcc_s-4.1.2-20080825.so.1
> 320d40d000-320d40e000 rw-p 0000d000 08:01 459086                         /lib64/libgcc_s-4.1.2-20080825.so.1
> 2b0afb00a000-2b0afb00c000 rw-p 2b0afb00a000 00:00 0
> 2b0afb00c000-2b0afb036000 r-xp 00000000 08:01 10059785                   /usr/local/lib/libtorque.so.2.0.0
> 2b0afb036000-2b0afb235000 ---p 0002a000 08:01 10059785                   /usr/local/lib/libtorque.so.2.0.0
> 2b0afb235000-2b0afb237000 rw-p 00029000 08:01 10059785                   /usr/local/lib/libtorque.so.2.0.0
> 2b0afb237000-2b0afb31a000 rw-p 2b0afb237000 00:00 0
> 2b0afb332000-2b0afb333000 rw-p 2b0afb332000 00:00 0
> 2b0afb333000-2b0afb368000 r--s 00000000 08:03 1048582                    /var/db/nscd/hosts
> 2b0afb368000-2b0afb369000 rw-p 2b0afb368000 00:00 0
> 2b0afb380000-2b0afb38a000 r-xp 00000000 08:01 458777                     /lib64/libnss_files-2.5.so
> 2b0afb38a000-2b0afb589000 ---p 0000a000 08:01 458777                     /lib64/libnss_files-2.5.so
> 2b0afb589000-2b0afb58a000 r--p 00009000 08:01 458777                     /lib64/libnss_files-2.5.so
> 2b0afb58a000-2b0afb58b000 rw-p 0000a000 08:01 458777                     /lib64/libnss_files-2.5.so
> 2b0afb58b000-2b0afb6c3000 rw-p 2b0afb58b000 00:00 0
> 2b0afb6c3000-2b0afb6f8000 r--s 00000000 08:03 1048578                    /var/db/nscd/passwd
> 2b0afb6f8000-2b0afb72d000 r--s 00000000 08:03 1048581                    /var/db/nscd/group
> 7fff07b4a000-7fff07c14000 rw-p 7ffffff35000 00:00 0                      [stack]
> ffffffffff600000-ffffffffffe00000 ---p 00000000 00:00 0                  [vdso]
> Aborted (core dumped)
> #
>
>
> # gdb -c core.21144 /usr/local/sbin/pbs_server
> GNU gdb Fedora (6.8-37.el5)
> Copyright (C) 2008 Free Software Foundation, Inc.
> License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
> This is free software: you are free to change and redistribute it.
> There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
> and "show warranty" for details.
> This GDB was configured as "x86_64-redhat-linux-gnu"...
> Reading symbols from /usr/local/lib/libtorque.so.2...done.
> Loaded symbols for /usr/local/lib/libtorque.so.2
> Reading symbols from /lib64/libc.so.6...done.
> Loaded symbols for /lib64/libc.so.6
> Reading symbols from /lib64/ld-linux-x86-64.so.2...done.
> Loaded symbols for /lib64/ld-linux-x86-64.so.2
> Reading symbols from /lib64/libnss_files.so.2...done.
> Loaded symbols for /lib64/libnss_files.so.2
> Reading symbols from /lib64/libgcc_s.so.1...done.
> Loaded symbols for /lib64/libgcc_s.so.1
> Core was generated by `pbs_server -D'.
> Program terminated with signal 6, Aborted.
> [New process 21144]
> #0  0x0000003200030265 in raise () from /lib64/libc.so.6
> (gdb) bt
> #0  0x0000003200030265 in raise () from /lib64/libc.so.6
> #1  0x0000003200031d10 in abort () from /lib64/libc.so.6
> #2  0x000000320006a84b in __libc_message () from /lib64/libc.so.6
> #3  0x00000032000722ef in _int_free () from /lib64/libc.so.6
> #4  0x000000320007273b in free () from /lib64/libc.so.6
> #5  0x000000000040bdda in job_purge (pjob=0x19245e60) at job_func.c:1644
> #6  0x0000000000407d1a in array_delete (pa=0x191f9210) at array_func.c:469
> #7  0x000000000040853a in setup_array_struct (pjob=0x19245e60) at array_func.c:647
> #8  0x0000000000423302 in req_commit (preq=0x192386f0) at req_quejob.c:1565
> #9  0x000000000041944d in dispatch_request (sfds=10, request=0x192386f0) at process_request.c:764
> #10 0x0000000000419c66 in process_request (sfds=10) at process_request.c:695
> #11 0x00002b0afb02bdee in wait_request (waittime=<value optimized out>, SState=0x71eb18) at ../Libnet/net_server.c:507
> #12 0x0000000000417fcc in main_loop () at pbsd_main.c:1186
> #13 0x0000000000418955 in main (argc=2, argv=<value optimized out>) at pbsd_main.c:1741
> (gdb) quit
> #
>
> Stuart
> _______________________________________________
> torqueusers mailing list
> torqueusers at supercluster.org
> http://www.supercluster.org/mailman/listinfo/torqueusers
>



this has been fixed in 2.5-fixes, and will be merged into in trunk shortly.


More information about the torqueusers mailing list