[torqueusers] maui-3.2.6p18 + torque-2.1.6: segfault on Maui

Dave Jackson jacksond at clusterresources.com
Wed Jan 17 18:17:02 MST 2007


Try

  > export MAUIDEBUG=true
  > gdb maui
  (gdb) r
  <SEGV occurs>
  (gdb) where

Thanks,
Dave

On Thu, 2007-01-18 at 08:02 +0700, notinh notien wrote:
> Hi.  I am a bit rusty with my gdb but when I tried gdb maui and within gdb 
> nothing got show after run and bt.  If you could tell me how to carry out 
> what you wanted, I will try out.
> 
> Thank you.
> 
> Here is the strace:
> strace ./maui -C /usr/local/maui/maui.cfg
> execve("./maui", ["./maui", "-C", "/usr/local/maui/maui.cfg"], [/* 35 vars 
> */]) = 0
> uname({sys="Linux", node="neptune.myhost.com", ...}) = 0
> brk(0)                                  = 0x21ee000
> mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 
> 0x2b3b1f2bf000
> access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or 
> directory)
> open("/usr/local/torque-2.1.6/lib/tls/x86_64/libm.so.6", O_RDONLY) = -1 
> ENOENT (No such file or directory)
> stat("/usr/local/torque-2.1.6/lib/tls/x86_64", 0x7fff8b7e93c0) = -1 ENOENT 
> (No such file or directory)
> open("/usr/local/torque-2.1.6/lib/tls/libm.so.6", O_RDONLY) = -1 ENOENT (No 
> such file or directory)
> stat("/usr/local/torque-2.1.6/lib/tls", 0x7fff8b7e93c0) = -1 ENOENT (No such 
> file or directory)
> open("/usr/local/torque-2.1.6/lib/x86_64/libm.so.6", O_RDONLY) = -1 ENOENT 
> (No such file or directory)
> stat("/usr/local/torque-2.1.6/lib/x86_64", 0x7fff8b7e93c0) = -1 ENOENT (No 
> such file or directory)
> open("/usr/local/torque-2.1.6/lib/libm.so.6", O_RDONLY) = -1 ENOENT (No such 
> file or directory)
> stat("/usr/local/torque-2.1.6/lib", {st_mode=S_IFDIR|0755, st_size=4096, 
> ...}) = 0
> open("/etc/ld.so.cache", O_RDONLY)      = 3
> fstat(3, {st_mode=S_IFREG|0644, st_size=126762, ...}) = 0
> mmap(NULL, 126762, PROT_READ, MAP_PRIVATE, 3, 0) = 0x2b3b1f2c0000
> close(3)                                = 0
> open("/lib64/tls/libm.so.6", O_RDONLY)  = 3
> read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\360=\20"..., 832) = 
> 832
> fstat(3, {st_mode=S_IFREG|0755, st_size=613297, ...}) = 0
> mmap(0x3410100000, 1593800, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 
> 3, 0) = 0x3410100000
> mprotect(0x3410185000, 1049032, PROT_NONE) = 0
> mmap(0x3410284000, 8192, PROT_READ|PROT_WRITE, 
> MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x84000) = 0x3410284000
> close(3)                                = 0
> open("/usr/local/torque-2.1.6/lib/libtorque.so.0", O_RDONLY) = 3
> read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\20\253\0"..., 832) = 
> 832fstat(3, {st_mode=S_IFREG|0755, st_size=663286, ...}) = 0
> mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 
> 0x2b3b1f2df000
> mmap(NULL, 1325824, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 
> 0x2b3b1f2e0000
> mprotect(0x2b3b1f306000, 1170176, PROT_NONE) = 0
> mmap(0x2b3b1f406000, 12288, PROT_READ|PROT_WRITE, 
> MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x26000) = 0x2b3b1f406000
> mmap(0x2b3b1f409000, 109312, PROT_READ|PROT_WRITE, 
> MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x2b3b1f409000
> close(3)                                = 0
> open("/usr/local/torque-2.1.6/lib/libc.so.6", O_RDONLY) = -1 ENOENT (No such 
> file or directory)
> open("/lib64/tls/libc.so.6", O_RDONLY)  = 3
> read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\240\304"..., 832) = 
> 832
> fstat(3, {st_mode=S_IFREG|0755, st_size=1493409, ...}) = 0
> mmap(0x340fe00000, 2310088, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 
> 3, 0) = 0x340fe00000
> mprotect(0x340ff2b000, 1085384, PROT_NONE) = 0
> mmap(0x341002b000, 20480, PROT_READ|PROT_WRITE, 
> MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x12b000) = 0x341002b000
> mmap(0x3410030000, 16328, PROT_READ|PROT_WRITE, 
> MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x3410030000
> close(3)                                = 0
> mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 
> 0x2b3b1f424000
> mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 
> 0x2b3b1f425000
> mprotect(0x341002b000, 8192, PROT_READ) = 0
> arch_prctl(ARCH_SET_FS, 0x2b3b1f424b00) = 0
> munmap(0x2b3b1f2c0000, 126762)          = 0
> umask(027)                              = 022
> umask(027)                              = 027
> brk(0)                                  = 0x21ee000
> brk(0x220f000)                          = 0x220f000
> open("/usr/local/maui/log/maui.log", O_RDWR|O_APPEND|O_CREAT, 0666) = 3
> rt_sigaction(SIGUSR1, {0x4217a0, [USR1], SA_RESTORER|SA_RESTART, 
> 0x340fe2e2b0}, {SIG_DFL}, 8) = 0
> rt_sigaction(SIGUSR2, {0x4217a0, [USR2], SA_RESTORER|SA_RESTART, 
> 0x340fe2e2b0}, {SIG_DFL}, 8) = 0
> getpid()                                = 19191
> chdir("/usr/local/maui/")               = 0
> stat("/etc/maui.cfg", 0x7fff8b7dfd00)   = -1 ENOENT (No such file or 
> directory)
> stat("/usr/local/maui/maui.ck", {st_mode=S_IFREG|0640, st_size=39209, ...}) 
> = 0
> open("/usr/local/maui/maui.ck", O_RDONLY) = 4
> fstat(4, {st_mode=S_IFREG|0640, st_size=39209, ...}) = 0
> mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 
> 0x2b3b1f2c0000
> read(4, "Sched                    sched 1"..., 36864) = 36864
> read(4, "\" TPSR=\"0.000\" TPSU=\"0.000\" TQM="..., 4096) = 2345
> close(4)                                = 0
> munmap(0x2b3b1f2c0000, 4096)            = 0
> socket(PF_FILE, SOCK_STREAM, 0)         = 4
> fcntl(4, F_GETFL)                       = 0x2 (flags O_RDWR)
> fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK)    = 0
> connect(4, {sa_family=AF_FILE, path="/var/run/nscd/socket"}, 110) = -1 
> ENOENT (No such file or directory)
> close(4)                                = 0
> socket(PF_FILE, SOCK_STREAM, 0)         = 4
> fcntl(4, F_GETFL)                       = 0x2 (flags O_RDWR)
> fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK)    = 0
> connect(4, {sa_family=AF_FILE, path="/var/run/nscd/socket"}, 110) = -1 
> ENOENT (No such file or directory)
> close(4)                                = 0
> open("/etc/nsswitch.conf", O_RDONLY)    = 4
> fstat(4, {st_mode=S_IFREG|0644, st_size=1623, ...}) = 0
> mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 
> 0x2b3b1f2c0000
> read(4, "#\n# /etc/nsswitch.conf\n#\n# An ex"..., 4096) = 1623
> read(4, "", 4096)                       = 0
> close(4)                                = 0
> munmap(0x2b3b1f2c0000, 4096)            = 0
> open("/usr/local/torque-2.1.6/lib/libnss_files.so.2", O_RDONLY) = -1 ENOENT 
> (No such file or directory)
> open("/etc/ld.so.cache", O_RDONLY)      = 4
> fstat(4, {st_mode=S_IFREG|0644, st_size=126762, ...}) = 0
> mmap(NULL, 126762, PROT_READ, MAP_PRIVATE, 4, 0) = 0x2b3b1f2c0000
> close(4)                                = 0
> open("/lib64/libnss_files.so.2", O_RDONLY) = 4
> read(4, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\220\"\0"..., 832) = 
> 832
> fstat(4, {st_mode=S_IFREG|0755, st_size=56902, ...}) = 0
> mmap(NULL, 1094952, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 4, 0) = 
> 0x2b3b1f426000
> mprotect(0x2b3b1f430000, 1053992, PROT_NONE) = 0
> mmap(0x2b3b1f530000, 8192, PROT_READ|PROT_WRITE, 
> MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 4, 0xa000) = 0x2b3b1f530000
> close(4)                                = 0
> munmap(0x2b3b1f2c0000, 126762)          = 0
> open("/etc/passwd", O_RDONLY)           = 4
> fcntl(4, F_GETFD)                       = 0
> fcntl(4, F_SETFD, FD_CLOEXEC)           = 0
> fstat(4, {st_mode=S_IFREG|0644, st_size=1627, ...}) = 0
> mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 
> 0x2b3b1f2c0000
> read(4, "root:x:0:0:root:/root:/bin/bash\n"..., 4096) = 1627
> read(4, "", 4096)                       = 0
> close(4)                                = 0
> munmap(0x2b3b1f2c0000, 4096)            = 0
> socket(PF_FILE, SOCK_STREAM, 0)         = 4
> fcntl(4, F_GETFL)                       = 0x2 (flags O_RDWR)
> fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK)    = 0
> connect(4, {sa_family=AF_FILE, path="/var/run/nscd/socket"}, 110) = -1 
> ENOENT (No such file or directory)
> close(4)                                = 0
> socket(PF_FILE, SOCK_STREAM, 0)         = 4
> fcntl(4, F_GETFL)                       = 0x2 (flags O_RDWR)
> fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK)    = 0
> connect(4, {sa_family=AF_FILE, path="/var/run/nscd/socket"}, 110) = -1 
> ENOENT (No such file or directory)
> close(4)                                = 0
> open("/etc/group", O_RDONLY)            = 4
> fcntl(4, F_GETFD)                       = 0
> fcntl(4, F_SETFD, FD_CLOEXEC)           = 0
> fstat(4, {st_mode=S_IFREG|0644, st_size=623, ...}) = 0
> mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 
> 0x2b3b1f2c0000
> read(4, "root:x:0:root\nbin:x:1:root,bin,d"..., 4096) = 623
> read(4, "", 4096)                       = 0
> close(4)                                = 0
> munmap(0x2b3b1f2c0000, 4096)            = 0
> open("/etc/group", O_RDONLY)            = 4
> fcntl(4, F_GETFD)                       = 0
> fcntl(4, F_SETFD, FD_CLOEXEC)           = 0
> fstat(4, {st_mode=S_IFREG|0644, st_size=623, ...}) = 0
> mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 
> 0x2b3b1f2c0000
> read(4, "root:x:0:root\nbin:x:1:root,bin,d"..., 4096) = 623
> read(4, "", 4096)                       = 0
> close(4)                                = 0
> munmap(0x2b3b1f2c0000, 4096)            = 0
> rt_sigaction(SIGINT, {SIG_IGN}, {SIG_DFL}, 8) = 0
> rt_sigaction(SIGTERM, {0x4a47e0, [TERM], SA_RESTORER|SA_RESTART, 
> 0x340fe2e2b0}, {SIG_DFL}, 8) = 0
> rt_sigaction(SIGQUIT, {0x4a47e0, [QUIT], SA_RESTORER|SA_RESTART, 
> 0x340fe2e2b0}, {SIG_DFL}, 8) = 0
> rt_sigaction(SIGIO, {0x4a47e0, [IO], SA_RESTORER|SA_RESTART, 0x340fe2e2b0}, 
> {SIG_DFL}, 8) = 0
> rt_sigaction(SIGURG, {0x4a47e0, [URG], SA_RESTORER|SA_RESTART, 
> 0x340fe2e2b0}, {SIG_DFL}, 8) = 0
> rt_sigaction(SIGHUP, {SIG_IGN}, {SIG_DFL}, 8) = 0
> rt_sigaction(SIGPIPE, {SIG_IGN}, {SIG_DFL}, 8) = 0
> stat("/usr/local/maui/maui.cfg", {st_mode=S_IFREG|0644, st_size=2078, ...}) 
> = 0
> open("/usr/local/maui/maui.cfg", O_RDONLY) = 4
> fstat(4, {st_mode=S_IFREG|0644, st_size=2078, ...}) = 0
> mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 
> 0x2b3b1f2c0000
> read(4, "# maui.cfg 3.2.6p18\n\nSERVERHOST "..., 4096) = 2078
> close(4)                                = 0
> munmap(0x2b3b1f2c0000, 4096)            = 0
> stat("/usr/local/maui/maui-private.cfg", {st_mode=S_IFREG|0644, st_size=0, 
> ...}) = 0
> open("/usr/local/maui/maui-private.cfg", O_RDONLY) = 4
> close(4)                                = 0
> open("/etc/localtime", O_RDONLY)        = 4
> fstat(4, {st_mode=S_IFREG|0644, st_size=1017, ...}) = 0
> fstat(4, {st_mode=S_IFREG|0644, st_size=1017, ...}) = 0
> mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 
> 0x2b3b1f2c0000
> read(4, "TZif\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\4\0\0\0\4\0"..., 4096) = 
> 1017
> close(4)                                = 0
> munmap(0x2b3b1f2c0000, 4096)            = 0
> fstat(3, {st_mode=S_IFREG|0640, st_size=6189724, ...}) = 0
> mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 
> 0x2b3b1f2c0000
> write(3, "01/17 16:58:34 INFO:     startin"..., 75) = 75
> write(3, "01/17 16:58:34 INFO:     new LOG"..., 48) = 48
> write(3, "01/17 16:58:34 MCfgProcessLine(N"..., 66) = 66
> write(3, "01/17 16:58:34 MCfgSetVal(NODEAL"..., 72) = 72
> write(3, "01/17 16:58:34 MUGetIndex(MINRES"..., 49) = 49
> write(3, "01/17 16:58:34 MCfgProcessLine(Q"..., 52) = 52
> write(3, "01/17 16:58:34 MCfgSetVal(QUEUET"..., 67) = 67
> write(3, "01/17 16:58:34 MCfgProcessLine(R"..., 66) = 66
> write(3, "01/17 16:58:34 MCfgSetVal(RESERV"..., 69) = 69
> write(3, "01/17 16:58:34 MUGetIndex(CURREN"..., 52) = 52
> write(3, "01/17 16:58:34 MCfgProcessLine(R"..., 57) = 57
> write(3, "01/17 16:58:34 MCfgSetVal(RMPOLL"..., 66) = 66
> write(3, "01/17 16:58:34 MUTimeFromString("..., 42) = 42
> write(3, "01/17 16:58:34 MCfgProcessLine(S"..., 68) = 68
> write(3, "01/17 16:58:34 MCfgSetVal(SERVER"..., 62) = 62
> uname({sys="Linux", node="neptune.nanostellar.com", ...}) = 0
> open("/etc/resolv.conf", O_RDONLY)      = 4
> fstat(4, {st_mode=S_IFREG|0644, st_size=84, ...}) = 0
> mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 
> 0x2b3b1f2c1000
> read(4, "; generated by /sbin/dhclient-sc"..., 4096) = 84
> read(4, "", 4096)                       = 0
> close(4)                                = 0
> munmap(0x2b3b1f2c1000, 4096)            = 0
> socket(PF_FILE, SOCK_STREAM, 0)         = 4
> fcntl(4, F_GETFL)                       = 0x2 (flags O_RDWR)
> fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK)    = 0
> connect(4, {sa_family=AF_FILE, path="/var/run/nscd/socket"}, 110) = -1 
> ENOENT (No such file or directory)
> close(4)                                = 0
> socket(PF_FILE, SOCK_STREAM, 0)         = 4
> fcntl(4, F_GETFL)                       = 0x2 (flags O_RDWR)
> fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK)    = 0
> connect(4, {sa_family=AF_FILE, path="/var/run/nscd/socket"}, 110) = -1 
> ENOENT (No such file or directory)
> close(4)                                = 0
> open("/etc/host.conf", O_RDONLY)        = 4
> fstat(4, {st_mode=S_IFREG|0644, st_size=17, ...}) = 0
> mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 
> 0x2b3b1f2c1000
> read(4, "order hosts,bind\n", 4096)     = 17
> read(4, "", 4096)                       = 0
> close(4)                                = 0
> munmap(0x2b3b1f2c1000, 4096)            = 0
> open("/etc/hosts", O_RDONLY)            = 4
> fcntl(4, F_GETFD)                       = 0
> fcntl(4, F_SETFD, FD_CLOEXEC)           = 0
> fstat(4, {st_mode=S_IFREG|0644, st_size=564, ...}) = 0
> mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 
> 0x2b3b1f2c1000
> read(4, "# that require network functiona"..., 4096) = 564
> close(4)                                = 0
> munmap(0x2b3b1f2c1000, 4096)            = 0
> write(3, "01/17 16:58:34 INFO:     startin"..., 73) = 73
> write(3, "01/17 16:58:34 MCfgProcessLine(S"..., 51) = 51
> write(3, "01/17 16:58:34 MCfgSetVal(SERVER"..., 62) = 62
> write(3, "01/17 16:58:34 MUGetIndex(NORMAL"..., 44) = 44
> write(3, "01/17 16:58:34 MCfgProcessLine(S"..., 50) = 50
> write(3, "01/17 16:58:34 MCfgSetVal(SERVER"..., 62) = 62
> write(3, "01/17 16:58:34 MUGetIndex(TYPE,V"..., 42) = 42
> write(3, "01/17 16:58:34 MUGetIndex(PBS,Va"..., 41) = 41
> write(3, "01/17 16:58:34 MAMSetDefaults(ba"..., 36) = 36
> write(3, "01/17 16:58:34 MAMSetDefaults(ba"..., 36) = 36
> write(3, "01/17 16:58:34 MUGetIndex(TYPE,V"..., 42) = 42
> write(3, "01/17 16:58:34 MUGetIndex(NONE,V"..., 42) = 42
> write(3, "01/17 16:58:34 MAMSetDefaults(ba"..., 36) = 36
> geteuid()                               = 0
> stat("/usr/local/maui//.moab.key", 0x7fff8b7df490) = -1 ENOENT (No such file 
> or directory)
> write(3, "01/17 16:58:34 ServerProcessArgs"..., 43) = 43
> write(3, "01/17 16:58:34 MUGetOpt(3,ArgV,a"..., 85) = 85
> write(3, "01/17 16:58:34 ServerDemonize()\n", 32) = 32
> setpgid(0, 0)                           = 0
> clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, 
> child_tidptr=0x2b3b1f424b90) = 19192
> write(3, "01/17 16:58:34 INFO:     parent "..., 43) = 43
> exit_group(0)                           = ?
> Process 19191 detached
> 
> 
> 
> >From: Dave Jackson <jacksond at clusterresources.com>
> >To: notinh notien <notinhnotien7 at hotmail.com>
> >CC: torqueusers at supercluster.org, mauiusers at supercluster.org
> >Subject: Re: [torqueusers] maui-3.2.6p18 + torque-2.1.6: segfault on Maui
> >Date: Wed, 17 Jan 2007 17:49:14 -0700
> >
> >Can you send us a gdb 'where' covering the SEGV.  We will get this
> >fixed!
> >
> >Dave
> >
> >On Thu, 2007-01-18 at 07:47 +0700, notinh notien wrote:
> > > Hi, I just set up some new nodes and my head node run CentOS 4.4 x86_64 
> >on
> > > dual core dual CPUs 2.0 GHz.  I could not get maui to run and it kept 
> >giving
> > > me segfault.  I had to install 3.2.6p17 instead and this version has 
> >worked
> > > without any problem.
> > >
> > > FYI.
> > >
> > > _________________________________________________________________
> > > Express yourself instantly with MSN Messenger! Download today it's FREE!
> > > http://messenger.msn.click-url.com/go/onm00200471ave/direct/01/
> > >
> > > _______________________________________________
> > > torqueusers mailing list
> > > torqueusers at supercluster.org
> > > http://www.supercluster.org/mailman/listinfo/torqueusers
> >
> 
> _________________________________________________________________
> Express yourself instantly with MSN Messenger! Download today it's FREE! 
> http://messenger.msn.click-url.com/go/onm00200471ave/direct/01/
> 



More information about the torqueusers mailing list