From nobody@FreeBSD.org  Thu Apr  6 15:31:28 2006
Return-Path: <nobody@FreeBSD.org>
Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125])
	by hub.freebsd.org (Postfix) with ESMTP id 1DDCE16A400
	for <freebsd-gnats-submit@FreeBSD.org>; Thu,  6 Apr 2006 15:31:28 +0000 (UTC)
	(envelope-from nobody@FreeBSD.org)
Received: from www.freebsd.org (www.freebsd.org [216.136.204.117])
	by mx1.FreeBSD.org (Postfix) with ESMTP id 61A2543D9A
	for <freebsd-gnats-submit@FreeBSD.org>; Thu,  6 Apr 2006 15:31:08 +0000 (GMT)
	(envelope-from nobody@FreeBSD.org)
Received: from www.freebsd.org (localhost [127.0.0.1])
	by www.freebsd.org (8.13.1/8.13.1) with ESMTP id k36FV8hH031555
	for <freebsd-gnats-submit@FreeBSD.org>; Thu, 6 Apr 2006 15:31:08 GMT
	(envelope-from nobody@www.freebsd.org)
Received: (from nobody@localhost)
	by www.freebsd.org (8.13.1/8.13.1/Submit) id k36FV8kZ031554;
	Thu, 6 Apr 2006 15:31:08 GMT
	(envelope-from nobody)
Message-Id: <200604061531.k36FV8kZ031554@www.freebsd.org>
Date: Thu, 6 Apr 2006 15:31:08 GMT
From: Pavel Merdin <fbug1@merdin.com>
To: freebsd-gnats-submit@FreeBSD.org
Subject: pthread segmentation fault
X-Send-Pr-Version: www-2.3

>Number:         95418
>Category:       amd64
>Synopsis:       pthread segmentation fault
>Confidential:   no
>Severity:       serious
>Priority:       low
>Responsible:    grog
>State:          closed
>Quarter:        
>Keywords:       
>Date-Required:  
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Thu Apr 06 15:40:15 GMT 2006
>Closed-Date:    Tue Jun 13 01:59:03 GMT 2006
>Last-Modified:  Tue Jun 13 01:59:03 GMT 2006
>Originator:     Pavel Merdin
>Release:        FreeBSD 6-stable
>Organization:
Fotki Inc.
>Environment:
6.1-PRERELEASE FreeBSD 6.1-PRERELEASE #6: Fri Feb 17 12:47:19 EST 2006     

>Description:
The pthread library forces mysql 4.1.18-log to crash with a message like:
Feb 19 14:26:00 kernel: pid 33539 (mysqld), uid 1001: exited on signal 11

It happens when mysql is heavily used.
Seems like compiler bug as AX register cannot normally be equal to 218 
at 0x800d13aff.

gdb dump:

Program received signal SIGSEGV, Segmentation fault.
0x0000000800d13aff in _thr_gc (curthread=0x925000)
    at /usr/src/lib/libpthread/thread/thr_kern.c:1343
1343                    TAILQ_INSERT_HEAD(&worklist, kseg, kg_qe);
Current language:  auto; currently c
No locals.

(gdb) x/30i 0x0000000800d13a50
0x800d13a50 <_thr_gc+1296>:     out    %eax,$0xe8
0x800d13a52 <_thr_gc+1298>:     jle    0x800d13a1c <_thr_gc+1244>
0x800d13a54 <_thr_gc+1300>:     (bad)
0x800d13a55 <_thr_gc+1301>:     decl   0x64(%rbx,%rcx,4)
0x800d13a59 <_thr_gc+1305>:     and    $0x10,%al
0x800d13a5b <_thr_gc+1307>:     test   %r12,%r12
0x800d13a5e <_thr_gc+1310>:     jne    0x800d139f1 <_thr_gc+1201>
0x800d13a60 <_thr_gc+1312>:
    mov    1092225(%rip),%rsi        # 0x800e1e4e8 <_thread_list+12472>
0x800d13a67 <_thr_gc+1319>:     mov    (%rsi),%eax
0x800d13a69 <_thr_gc+1321>:     test   %eax,%eax
0x800d13a6b <_thr_gc+1323>:     jle    0x800d13c3a <_thr_gc+1786>
0x800d13a71 <_thr_gc+1329>:
    cmpl   $0x64,1092504(%rip)        # 0x800e1e610 <_thread_off_tcb+184>
0x800d13a78 <_thr_gc+1336>:     jle    0x800d13bf3 <_thr_gc+1715>
0x800d13a7e <_thr_gc+1342>:     callq  0x800d0fd80 <_kse_critical_enter>
0x800d13a83 <_thr_gc+1347>:     mov    0x180(%r13),%rdx
0x800d13a8a <_thr_gc+1354>:     mov    %rax,%rbp
0x800d13a8d <_thr_gc+1357>:     mov    0x130(%rdx),%eax
0x800d13a93 <_thr_gc+1363>:     cmp    $0x4,%eax
0x800d13a96 <_thr_gc+1366>:     jg     0x800d13c00 <_thr_gc+1728>
0x800d13a9c <_thr_gc+1372>:     inc    %eax
0x800d13a9e <_thr_gc+1374>:
    lea    1092571(%rip),%rdi        # 0x800e1e680 <_thread_off_tcb+296>
0x800d13aa5 <_thr_gc+1381>:     mov    %eax,0x130(%rdx)
0x800d13aab <_thr_gc+1387>:     mov    0x180(%r13),%rax
0x800d13ab2 <_thr_gc+1394>:     xor    %edx,%edx
0x800d13ab4 <_thr_gc+1396>:     movslq 0x130(%rax),%rsi
0x800d13abb <_thr_gc+1403>:     lea    (%rsi,%rsi,4),%rsi
0x800d13abf <_thr_gc+1407>:     lea    0x40(%rax,%rsi,8),%rsi
0x800d13ac4 <_thr_gc+1412>:     callq  0x800d19830 <_lock_acquire>
0x800d13ac9 <_thr_gc+1417>:
    mov    1092120(%rip),%rsi        # 0x800e1e4e8 <_thread_list+12472>
0x800d13ad0 <_thr_gc+1424>:     mov    %rsp,%rcx
(gdb) x/25i
0x800d13ad3 <_thr_gc+1427>:     jmp    0x800d13b0b <_thr_gc+1483>
0x800d13ad5 <_thr_gc+1429>:     mov    0x28(%rbx),%rax
0x800d13ad9 <_thr_gc+1433>:     mov    %rax,0x28(%rdx)
0x800d13add <_thr_gc+1437>:     mov    0x28(%rbx),%rax
0x800d13ae1 <_thr_gc+1441>:     mov    %rdx,(%rax)
0x800d13ae4 <_thr_gc+1444>:     mov    (%rsp),%rdx
0x800d13ae8 <_thr_gc+1448>:
    decl   1092386(%rip)        # 0x800e1e610 <_thread_off_tcb+184>
0x800d13aee <_thr_gc+1454>:     test   %rdx,%rdx
0x800d13af1 <_thr_gc+1457>:     mov    %rdx,0x20(%rbx)
0x800d13af5 <_thr_gc+1461>:     je     0x800d13b53 <_thr_gc+1555>
0x800d13af7 <_thr_gc+1463>:     mov    (%rsp),%rax
0x800d13afb <_thr_gc+1467>:     lea    0x20(%rbx),%rdx
0x800d13aff <_thr_gc+1471>:     mov    %rdx,0x28(%rax)
0x800d13b03 <_thr_gc+1475>:     mov    %rbx,(%rsp)
0x800d13b07 <_thr_gc+1479>:     mov    %rcx,0x28(%rbx)
0x800d13b0b <_thr_gc+1483>:     mov    (%rsi),%eax
0x800d13b0d <_thr_gc+1485>:     test   %eax,%eax
0x800d13b0f <_thr_gc+1487>:     jle    0x800d13b60 <_thr_gc+1568>
0x800d13b11 <_thr_gc+1489>:
    cmpl   $0x64,1092344(%rip)        # 0x800e1e610 <_thread_off_tcb+184>
0x800d13b18 <_thr_gc+1496>:     jle    0x800d13b69 <_thr_gc+1577>
0x800d13b1a <_thr_gc+1498>:
    mov    1092383(%rip),%rbx        # 0x800e1e640 <_thread_off_tcb+232>
0x800d13b21 <_thr_gc+1505>:     mov    0x20(%rbx),%rdx
0x800d13b25 <_thr_gc+1509>:     test   %rdx,%rdx
0x800d13b28 <_thr_gc+1512>:     jne    0x800d13ad5 <_thr_gc+1429>
0x800d13b2a <_thr_gc+1514>:     mov    0x28(%rbx),%rax

(gdb) info registers
rax            0xd8     216
rbx            0x709adf00       1889197824
rcx            0x7fffffffe700   140737488348928
rdx            0x709adf20       1889197856
rsi            0x800e1eb30      34374544176
rdi            0x800e1e680      34374542976
rbp            0x8008ac060      0x8008ac060
rsp            0x7fffffffe700   0x7fffffffe700
r8             0x0      0
r9             0x1      1
r10            0x7fffff12ffd8   140737472823256
r11            0x1000   4096
r12            0x0      0
r13            0x925000 9588736
r14            0x7fffffffe720   140737488348960
r15            0x23     35
rip            0x800d13aff      0x800d13aff <_thr_gc+1471>
eflags         0x10206  66054
cs             0x2b     43
ss             0x23     35
ds             0x0      0
es             0x0      0
fs             0x0      0
gs             0x0      0

>How-To-Repeat:
Try to load mysql with queries and many connections.
I used a real application under heavy load.
>Fix:
Recompiling pthread without -O2 fixes the issue.
>Release-Note:
>Audit-Trail:

Greg Lehey, 24 May 2006

This appears to be another manifestation of the bug reported as MySQL
BUG#19496 at http://bugs.mysql.com/bug.php?id=19496.  As you state,
libraries compiled without optimization don't fail.  On the other
hand, newer libraries compiled with optimization but with the patch
described in PR 95127 also do not fail.  I'm assuming therefore:

1.  This is a real code bug, not a compiler bug.
2.  Compiling without optimization drives it into hiding.

Could you please apply the following patch (to
/usr/src/lib/libpthread/thread/thr_kern.c) and let me know whether the
bug still occurs?  You'll need to reinstall the library
(/usr/src/lib/libpthread/libpthread.so.2) or install it elsewhere and
use LD_LIBRARY_PATH to point to it.

--- thr_kern.c  3 Jan 2006 15:34:27 -0000       1.120
+++ thr_kern.c  16 Feb 2006 01:33:36 -0000      1.121
@@ -1337,6 +1337,7 @@

        if (free_kseg_count <= MAX_CACHED_KSEGS)
                return;
+       TAILQ_INIT(&worklist);
        crit = _kse_critical_enter();
        KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
        while (free_kseg_count > MAX_CACHED_KSEGS) {

State-Changed-From-To: open->feedback 
State-Changed-By: grog 
State-Changed-When: Wed May 24 03:59:44 UTC 2006 
State-Changed-Why:  
Feedback solicited from submitter. 


Responsible-Changed-From-To: freebsd-amd64->grog 
Responsible-Changed-By: grog 
Responsible-Changed-When: Wed May 24 03:59:44 UTC 2006 
Responsible-Changed-Why:  
grog is looking at this PR and also MySQL BUG#19496, which 
appears to be the same thing. 

http://www.freebsd.org/cgi/query-pr.cgi?pr=95418 

Adding to audit trail from misfiled PR amd64/97769:

Date: Wed, 24 May 2006 14:10:28 +0930
From: Greg 'groggy' Lehey <grog@FreeBSD.org>
 
 On Thursday,  6 April 2006 at 15:31:08 +0000, Pavel Merdin wrote:
 >
 >> Description:
 > The pthread library forces mysql 4.1.18-log to crash with a message like:
 > Feb 19 14:26:00 kernel: pid 33539 (mysqld), uid 1001: exited on signal 11
 >
 > It happens when mysql is heavily used.
 >
 > Program received signal SIGSEGV, Segmentation fault.
 > 0x0000000800d13aff in _thr_gc (curthread=0x925000)
 >     at /usr/src/lib/libpthread/thread/thr_kern.c:1343
 > 1343                    TAILQ_INSERT_HEAD(&worklist, kseg, kg_qe);
 > Current language:  auto; currently c
 > No locals.
 >
 > 0x800d13afb <_thr_gc+1467>:     lea    0x20(%rbx),%rdx
 > 0x800d13aff <_thr_gc+1471>:     mov    %rdx,0x28(%rax)
 
 This appears to be another manifestation of the bug reported as MySQL
 BUG#19496 at http://bugs.mysql.com/bug.php?id=19496.  As you state,
 libraries compiled without optimization don't fail.  On the other
 hand, newer libraries compiled with optimization but with the patch
 described in PR 95127 also do not fail.  I'm assuming therefore:
 
 1.  This is a real code bug, not a compiler bug.
 2.  Compiling without optimization drives it into hiding.
 
 Could you please apply the following patch (to
 /usr/src/lib/libpthread/thread/thr_kern.c) and let me know whether the
 bug still occurs?  You'll need to reinstall the library
 (/usr/src/lib/libpthread/libpthread.so.2) or install it elsewhere and
 use LD_LIBRARY_PATH to point to it.
 
 --- thr_kern.c  3 Jan 2006 15:34:27 -0000       1.120
 +++ thr_kern.c  16 Feb 2006 01:33:36 -0000      1.121
 @@ -1337,6 +1337,7 @@
 
         if (free_kseg_count <= MAX_CACHED_KSEGS)
                 return;
 +       TAILQ_INIT(&worklist);
         crit = _kse_critical_enter();
         KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
         while (free_kseg_count > MAX_CACHED_KSEGS) {
 
 Greg

Greg Lehey, 13 June 2006

I have had no reply from the submitter.  Since this appears to be a
duplcate of PR 95127, I'm closing the PR.

State-Changed-From-To: feedback->closed 
State-Changed-By: grog 
State-Changed-When: Tue Jun 13 01:55:49 UTC 2006 
State-Changed-Why:  
Duplicate of PR 95127. 

http://www.freebsd.org/cgi/query-pr.cgi?pr=95418 
>Unformatted:
