From kwm@FreeBSD.org  Thu Dec 15 11:25:35 2011
Return-Path: <kwm@FreeBSD.org>
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id 52813106566B
	for <FreeBSD-gnats-submit@freebsd.org>; Thu, 15 Dec 2011 11:25:35 +0000 (UTC)
	(envelope-from kwm@FreeBSD.org)
Received: from freefall.freebsd.org (freefall.freebsd.org [IPv6:2001:4f8:fff6::28])
	by mx1.freebsd.org (Postfix) with ESMTP id 40D688FC1A
	for <FreeBSD-gnats-submit@freebsd.org>; Thu, 15 Dec 2011 11:25:35 +0000 (UTC)
Received: from freefall.freebsd.org (localhost [127.0.0.1])
	by freefall.freebsd.org (8.14.5/8.14.5) with ESMTP id pBFBPZtF061189
	for <FreeBSD-gnats-submit@freebsd.org>; Thu, 15 Dec 2011 11:25:35 GMT
	(envelope-from kwm@freefall.freebsd.org)
Received: (from kwm@localhost)
	by freefall.freebsd.org (8.14.5/8.14.5/Submit) id pBFBPZaq061188;
	Thu, 15 Dec 2011 11:25:35 GMT
	(envelope-from kwm)
Message-Id: <201112151125.pBFBPZaq061188@freefall.freebsd.org>
Date: Thu, 15 Dec 2011 11:25:35 GMT
From: Koop Mast <kwm@FreeBSD.org>
Reply-To: Koop Mast <kwm@FreeBSD.org>
To: FreeBSD-gnats-submit@freebsd.org
Cc:
Subject: [panic] [ath driver] kernel panic: page fault with ath0 taskq
X-Send-Pr-Version: 3.113
X-GNATS-Notify:

>Number:         163312
>Category:       kern
>Synopsis:       [panic] [ath] kernel panic: page fault with ath0 taskq
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    freebsd-wireless
>State:          patched
>Quarter:        
>Keywords:       
>Date-Required:  
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Thu Dec 15 11:30:10 UTC 2011
>Closed-Date:    
>Last-Modified:  Sun Apr 20 01:47:39 UTC 2014
>Originator:     Koop Mast
>Release:        FreeBSD 8.2-STABLE i386
>Organization:
>Environment:
System: FreeBSD freefall.freebsd.org 8.2-STABLE FreeBSD 8.2-STABLE #5 r227907: Wed Nov 23 21:55:50 UTC 2011 simon@freefall.freebsd.org:/usr/obj/usr/src/sys/FREEFALL i386


	
>Description:
	This panic happened while the machine was "idle" running Xorg, gnome3
	virtualbox and firefox.

ath0@pci0:3:0:0:        class=0x028000 card=0x10891a3b chip=0x002b168c rev=0x01
hdr=0x00
    vendor     = 'Atheros Communications Inc.'
    device     = 'AR9285 Wireless Network Adapter (PCI-Express)'
    class      = network

FreeBSD crashalot.rainbow-runner.nl 10.0-CURRENT FreeBSD 10.0-CURRENT
#32 r228191M: Fri Dec  2 01:20:19 CET 2011
root@crashalot.rainbow-runner.nl:/usr/obj/usr/src/sys/Sparkel  amd64

Fatal trap 12: page fault while in kernel mode
cpuid = 4; apic id = 04
fault virtual address   = 0xffffff8002a33a44
fault code              = supervisor write data, page not present
instruction pointer     = 0x20:0xffffffff802ea7b4
stack pointer           = 0x28:0xffffff81134c9aa0
frame pointer           = 0x28:0xffffff81134c9b40
code segment            = base 0x0, limit 0xfffff, type 0x1b
                        = DPL 0, pres 1, long 1, def32 0, gran 1
processor eflags        = interrupt enabled, resume, IOPL = 0
current process         = 0 (ath0 taskq)
trap number             = 12
panic: page fault
cpuid = 4
KDB: stack backtrace:
db_trace_self_wrapper() at 0xffffffff802c04ba = db_trace_self_wrapper
+0x2a
kdb_backtrace() at 0xffffffff804b3137 = kdb_backtrace+0x37
panic() at 0xffffffff8047b0c7 = panic+0x187
trap_fatal() at 0xffffffff8070e4e0 = trap_fatal+0x290
trap_pfault() at 0xffffffff8070e80b = trap_pfault+0x1db
trap() at 0xffffffff8070ec1f = trap+0x39f
calltrap() at 0xffffffff806f866f = calltrap+0x8
--- trap 0xc, rip = 0xffffffff802ea7b4, rsp = 0xffffff81134c9aa0, rbp =
0xffffff
ath_rx_proc() at 0xffffffff802ea7b4 = ath_rx_proc+0x564
taskqueue_run_locked() at 0xffffffff804c11f3 = taskqueue_run_locked+0x93
taskqueue_thread_loop() at 0xffffffff804c1c9e = taskqueue_thread_loop
+0x3e
fork_exit() at 0xffffffff8044b949 = fork_exit+0x189
fork_trampoline() at 0xffffffff806f8b9e = fork_trampoline+0xe
--- trap 0, rip = 0, rsp = 0xffffff81134c9d00, rbp = 0 ---
Uptime: 11h46m29s
Dumping 818 out of 3981
MB:..2%..12%..22%..32%..42%..51%..61%..71%..81%..92%

<snip gdb loading symbols>

#0  doadump (textdump=1) at /usr/src/sys/kern/kern_shutdown.c:261
261             if (textdump && textdump_pending) {
(kgdb) #0  doadump (textdump=1) at /usr/src/sys/kern/kern_shutdown.c:261
#1  0xffffffff8047b7d9 in kern_reboot (howto=260)
    at /usr/src/sys/kern/kern_shutdown.c:443
#2  0xffffffff8047b0b1 in panic (fmt=Variable "fmt" is not available.
)
    at /usr/src/sys/kern/kern_shutdown.c:608
#3  0xffffffff8070e4e0 in trap_fatal (frame=0xc, eva=Variable "eva" is
not avail
)
    at /usr/src/sys/amd64/amd64/trap.c:819
#4  0xffffffff8070e80b in trap_pfault (frame=0xffffff81134c99f0,
usermode=0)
    at /usr/src/sys/amd64/amd64/trap.c:735
#5  0xffffffff8070ec1f in trap (frame=0xffffff81134c99f0)
    at /usr/src/sys/amd64/amd64/trap.c:474
#6  0xffffffff806f866f in calltrap ()
    at /usr/src/sys/amd64/amd64/exception.S:228
#7  0xffffffff802ea7b4 in ath_rx_proc (sc=0xffffff8000a2e000, resched=1)
    at /usr/src/sys/dev/ath/if_ath.c:4114
#8  0xffffffff804c11f3 in taskqueue_run_locked
(queue=0xfffffe0002b91000)
    at /usr/src/sys/kern/subr_taskqueue.c:308
#9  0xffffffff804c1c9e in taskqueue_thread_loop (arg=Variable "arg" is
not avail
)
    at /usr/src/sys/kern/subr_taskqueue.c:497
#10 0xffffffff8044b949 in fork_exit (
    callout=0xffffffff804c1c60 <taskqueue_thread_loop>,
    arg=0xffffff8000a2e4b0, frame=0xffffff81134c9c50)
    at /usr/src/sys/kern/kern_fork.c:995
#11 0xffffffff806f8b9e in fork_trampoline ()
    at /usr/src/sys/amd64/amd64/exception.S:602
#12 0x0000000000000000 in ?? ()
#13 0x0000000000000000 in ?? ()
#14 0x0000000000000000 in ?? ()
#15 0x0000000000000000 in ?? ()
#16 0x0000000000000000 in ?? ()
#17 0x0000000000000000 in ?? ()
#18 0x0000000000000000 in ?? ()
#19 0x0000000000000000 in ?? ()
#20 0x0000000000000000 in ?? ()
#21 0x0000000000000000 in ?? ()
#22 0x0000000000000000 in ?? ()
#23 0x0000000000000000 in ?? ()
#24 0x0000000000000000 in ?? ()
#25 0x0000000000000000 in ?? ()
#26 0x0000000000000000 in ?? ()
#27 0x0000000000000000 in ?? ()
#28 0x0000000000000000 in ?? ()
#29 0x0000000000000000 in ?? ()
#30 0x0000000000000000 in ?? ()
#31 0x0000000000000000 in ?? ()
#32 0x0000000000000000 in ?? ()
#33 0x0000000000000000 in ?? ()
#34 0x0000000000000000 in ?? ()
#35 0x0000000000000000 in ?? ()
#36 0xfffffe0002b86428 in ?? ()
#37 0x0000000000000000 in ?? ()
#38 0xffffffff815e9780 in affinity ()
#39 0xfffffe000291d480 in ?? ()
#40 0xffffff81134c94c0 in ?? ()
#41 0xffffff81134c9468 in ?? ()
#42 0xfffffe0002b86000 in ?? ()
#43 0xffffffff804a5a57 in sched_switch (td=0xffffff8000a2e4b0,
    newtd=0xffffffff804c1c60, flags=Variable "flags" is not available.
) at /usr/src/sys/kern/sched_ule.c:1853
Previous frame inner to this frame (corrupt stack?)
(kgdb)
	
>How-To-Repeat:
	There isn't realy a way to reproduce it. Since this panic happened on 
	Dec 7 and the previous one was on Nov 22.
	
>Fix:
	Currently no fix, but running with the attached patch send to me by
	Adrian Chadd, to get more info when it happens again.
	

--- ath_if_ath.c begins here ---
Index: sys/dev/ath/if_ath.c
===================================================================
--- sys/dev/ath/if_ath.c	(revision 228191)
+++ sys/dev/ath/if_ath.c	(working copy)
@@ -4111,7 +4111,12 @@
 		}
 
 		ifp->if_ipackets++;
+if (rs->rs_antenna > 2) {
+    device_printf(sc->sc_dev, "rs_antenna=0x%x\n", rs->rs_antenna);
+    ath_printrxbuf(sc, bf, 0, status == HAL_OK);
+} else {
 		sc->sc_stats.ast_ant_rx[rs->rs_antenna]++;
+}
 
 		/*
 		 * Populate the rx status block.  When there are bpf
--- ath_if_ath.c ends here ---


>Release-Note:
>Audit-Trail:
Responsible-Changed-From-To: freebsd-bugs->freebsd-wireless 
Responsible-Changed-By: linimon 
Responsible-Changed-When: Fri Dec 16 09:07:29 UTC 2011 
Responsible-Changed-Why:  
Over to maintainer(s). 

http://www.freebsd.org/cgi/query-pr.cgi?pr=163312 

From: dfilter@FreeBSD.ORG (dfilter service)
To: bug-followup@FreeBSD.org
Cc:  
Subject: Re: kern/163312: commit references a PR
Date: Fri, 23 Dec 2011 02:21:36 +0000 (UTC)

 Author: adrian
 Date: Fri Dec 23 02:21:22 2011
 New Revision: 228829
 URL: http://svn.freebsd.org/changeset/base/228829
 
 Log:
   Add a temporary debugging statement in order to try and identify what's
   going on with the occasional garbage rs_antenna field reported by AR9285
   users.
   
   I've discovered that the 11n NICs only fill out the entire RX status
   descriptor on the final descriptor in an aggregate. Some of the fields
   (notably RSSI) are complete nonsense for A-MPDU subframes. This may
   be another example of this.
   
   The driver doesn't currently toss out statistics for non-final aggregate
   frames. It's likely that this should be done.
   
   If any users hit this particular debugging message they should report it
   immediately to freebsd-wireless@freebsd.org - please ensure you have
   ATH_DEBUG enabled so it prints out the full receive descriptor.
   
   PR:		kern/163312
 
 Modified:
   head/sys/dev/ath/if_ath.c
 
 Modified: head/sys/dev/ath/if_ath.c
 ==============================================================================
 --- head/sys/dev/ath/if_ath.c	Fri Dec 23 02:13:42 2011	(r228828)
 +++ head/sys/dev/ath/if_ath.c	Fri Dec 23 02:21:22 2011	(r228829)
 @@ -4111,6 +4111,35 @@ rx_accept:
  			m->m_pkthdr.len = len;
  		}
  
 +		/*
 +		 * Validate rs->rs_antenna.
 +		 *
 +		 * Some users w/ AR9285 NICs have reported crashes
 +		 * here because rs_antenna field is bogusly large.
 +		 * Let's enforce the maximum antenna limit of 8
 +		 * (and it shouldn't be hard coded, but that's a
 +		 * separate problem) and if there's an issue, print
 +		 * out an error and adjust rs_antenna to something
 +		 * sensible.
 +		 *
 +		 * This code should be removed once the actual
 +		 * root cause of the issue has been identified.
 +		 * For example, it may be that the rs_antenna
 +		 * field is only valid for the lsat frame of
 +		 * an aggregate and it just happens that it is
 +		 * "mostly" right. (This is a general statement -
 +		 * the majority of the statistics are only valid
 +		 * for the last frame in an aggregate.
 +		 */
 +		if (rs->rs_antenna > 7) {
 +			device_printf(sc->sc_dev, "%s: rs_antenna > 7 (%d)\n",
 +			    __func__, rs->rs_antenna);
 +#ifdef	ATH_DEBUG
 +			ath_printrxbuf(sc, bf, 0, status == HAL_OK);
 +#endif /* ATH_DEBUG */
 +			rs->rs_antenna = 0;	/* XXX better than nothing */
 +		}
 +
  		ifp->if_ipackets++;
  		sc->sc_stats.ast_ant_rx[rs->rs_antenna]++;
  
 _______________________________________________
 svn-src-all@freebsd.org mailing list
 http://lists.freebsd.org/mailman/listinfo/svn-src-all
 To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
 
State-Changed-From-To: open->patched 
State-Changed-By: adrian 
State-Changed-When: Sun Feb 12 23:23:12 UTC 2012 
State-Changed-Why:  
The patch in the PR should be fine - the RX antenna should be 
validated before being used. 

This should be backported to 9.x and 8.x. Although it _should_ only 
be occuring when in 11n mode (since RX antenna may not be updated 
for non-final RX aggregate frames) it may be occuring elsewhere. 


http://www.freebsd.org/cgi/query-pr.cgi?pr=163312 
>Unformatted:
