From nobody@FreeBSD.org  Mon Sep 13 07:10:09 2010
Return-Path: <nobody@FreeBSD.org>
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id 81C391065675
	for <freebsd-gnats-submit@FreeBSD.org>; Mon, 13 Sep 2010 07:10:09 +0000 (UTC)
	(envelope-from nobody@FreeBSD.org)
Received: from www.freebsd.org (www.freebsd.org [IPv6:2001:4f8:fff6::21])
	by mx1.freebsd.org (Postfix) with ESMTP id 6F5FB8FC25
	for <freebsd-gnats-submit@FreeBSD.org>; Mon, 13 Sep 2010 07:10:09 +0000 (UTC)
Received: from www.freebsd.org (localhost [127.0.0.1])
	by www.freebsd.org (8.14.3/8.14.3) with ESMTP id o8D7A91r049444
	for <freebsd-gnats-submit@FreeBSD.org>; Mon, 13 Sep 2010 07:10:09 GMT
	(envelope-from nobody@www.freebsd.org)
Received: (from nobody@localhost)
	by www.freebsd.org (8.14.3/8.14.3/Submit) id o8D7A9s7049428;
	Mon, 13 Sep 2010 07:10:09 GMT
	(envelope-from nobody)
Message-Id: <201009130710.o8D7A9s7049428@www.freebsd.org>
Date: Mon, 13 Sep 2010 07:10:09 GMT
From: David Naylor <naylor.b.david@gmail.com>
To: freebsd-gnats-submit@FreeBSD.org
Subject: [acpi] acpi_ec does not work properly on Lenovo S10[e] (due to dynamic switching to polled mode)
X-Send-Pr-Version: www-3.1
X-GNATS-Notify:

>Number:         150517
>Category:       kern
>Synopsis:       [acpi] acpi_ec does not work properly on Lenovo S10[e] (due to dynamic switching to polled mode)
>Confidential:   no
>Severity:       serious
>Priority:       low
>Responsible:    avg
>State:          closed
>Quarter:        
>Keywords:       
>Date-Required:  
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Mon Sep 13 07:20:06 UTC 2010
>Closed-Date:    Mon Nov 08 11:26:02 UTC 2010
>Last-Modified:  Mon Nov 08 11:26:02 UTC 2010
>Originator:     David Naylor
>Release:        FreeBSD-current
>Organization:
Private
>Environment:
FreeBSD dragonpico.dg 9.0-CURRENT FreeBSD 9.0-CURRENT #0: Thu Aug  5 16:21:50 SAST 2010     root@dragon.dg:/tmp/home/freebsd9/src/sys/LENOVOS10  i386
>Description:
The Lenovo S10's EC is a bit faulty and regularly resets itself.  This causes FreeBSD's handler to switch to polling mode which does not work the the EC.  

The problem appears to be triggered when commands are given to the EC in rapid succession.  This is too much for the EC to handle and it resets itself.  

This results in battery level to no longer work, and `shutdown -p`.  The console also gets spammed with error messages from ACPI.  

(A work around [and possible solution] is given below)
(This is an issue with all BIOS versions published by Lenovo)
>How-To-Repeat:
Run FreeBSD (generic) on a Lenovo S10[e]
>Fix:
WORK-A-ROUND:
Attached is a patch that works around the problem.  It disables switching to polling mode and forces a time to be waited before issuing more commands to the EC.  In conjunction to the patch I need in /boot/loader.conf:

# ACPI Debug options
debug.acpi.ec.delay="200"
debug.acpi.ec.gpe="1"
debug.acpi.ec.timeout="200"

Although this works for me others have reported failure.  Also the laptop fails to shutdown properly on occasion (actual power off sometimes does not happen).  

SOLUTION:
Looking at the Linux code it appears it detects when the EC reset itself and will reissue the command.  FreeBSD's code does not appear to do that.  

Patch attached with submission follows:

--- acpi_ec.c~	2009-06-17 21:14:48.000000000 +0200
+++ acpi_ec.c	2009-06-19 14:40:02.000000000 +0200
@@ -168,7 +168,7 @@
 #define EC_LOCK_TIMEOUT	1000
 
 /* Default delay in microseconds between each run of the status polling loop. */
-#define EC_POLL_DELAY	5
+#define EC_POLL_DELAY	100
 
 /* Total time in ms spent waiting for a response from EC. */
 #define EC_TIMEOUT	750
@@ -184,13 +184,21 @@
 SYSCTL_DECL(_debug_acpi);
 SYSCTL_NODE(_debug_acpi, OID_AUTO, ec, CTLFLAG_RD, NULL, "EC debugging");
 
-static int	ec_burst_mode;
+static int	ec_burst_mode = FALSE;
 TUNABLE_INT("debug.acpi.ec.burst", &ec_burst_mode);
-SYSCTL_INT(_debug_acpi_ec, OID_AUTO, burst, CTLFLAG_RW, &ec_burst_mode, 0,
+SYSCTL_INT(_debug_acpi_ec, OID_AUTO, burst, CTLFLAG_RW, &ec_burst_mode, FALSE,
     "Enable use of burst mode (faster for nearly all systems)");
-static int	ec_polled_mode;
+static int	ec_delay = 0;
+TUNABLE_INT("debug.acpi.ec.delay", &ec_delay);
+SYSCTL_INT(_debug_acpi_ec, OID_AUTO, delay, CTLFLAG_RW, &ec_delay, 0,
+    "Delay after waiting for responce (GPE and polled mode)");
+static int	ec_gpe_mode = FALSE;
+TUNABLE_INT("debug.acpi.ec.gpe", &ec_gpe_mode);
+SYSCTL_INT(_debug_acpi_ec, OID_AUTO, gpe, CTLFLAG_RW, &ec_gpe_mode, FALSE,
+    "Disable adaptive GPE switching (to polled mode)");
+static int	ec_polled_mode = FALSE;
 TUNABLE_INT("debug.acpi.ec.polled", &ec_polled_mode);
-SYSCTL_INT(_debug_acpi_ec, OID_AUTO, polled, CTLFLAG_RW, &ec_polled_mode, 0,
+SYSCTL_INT(_debug_acpi_ec, OID_AUTO, polled, CTLFLAG_RW, &ec_polled_mode, FALSE,
     "Force use of polled mode (only if interrupt mode doesn't work)");
 static int	ec_timeout = EC_TIMEOUT;
 TUNABLE_INT("debug.acpi.ec.timeout", &ec_timeout);
@@ -794,6 +802,7 @@
     EC_STATUS ec_status;
 
     status = AE_NO_HARDWARE_RESPONSE;
+
     ec_status = EC_GET_CSR(sc);
     if (sc->ec_burstactive && !(ec_status & EC_FLAG_BURST_MODE)) {
 	CTR1(KTR_ACPI, "ec burst disabled in waitevent (%s)", msg);
@@ -810,56 +819,36 @@
 EcWaitEvent(struct acpi_ec_softc *sc, EC_EVENT Event, u_int gen_count)
 {
     ACPI_STATUS	Status;
-    int		count, i, slp_ival;
+    int		count, i, req_ticks, cur_ticks;
 
     ACPI_SERIAL_ASSERT(ec);
     Status = AE_NO_HARDWARE_RESPONSE;
     int need_poll = cold || rebooting || ec_polled_mode || sc->ec_suspending;
-    /*
-     * The main CPU should be much faster than the EC.  So the status should
-     * be "not ready" when we start waiting.  But if the main CPU is really
-     * slow, it's possible we see the current "ready" response.  Since that
-     * can't be distinguished from the previous response in polled mode,
-     * this is a potential issue.  We really should have interrupts enabled
-     * during boot so there is no ambiguity in polled mode.
-     *
-     * If this occurs, we add an additional delay before actually entering
-     * the status checking loop, hopefully to allow the EC to go to work
-     * and produce a non-stale status.
-     */
-    if (need_poll) {
-	static int	once;
-
-	if (EcCheckStatus(sc, "pre-check", Event) == AE_OK) {
-	    if (!once) {
-		device_printf(sc->ec_dev,
-		    "warning: EC done before starting event wait\n");
-		once = 1;
-	    }
-	    AcpiOsStall(10);
-	}
-    }
 
     /* Wait for event by polling or GPE (interrupt). */
     if (need_poll) {
 	count = (ec_timeout * 1000) / EC_POLL_DELAY;
 	if (count == 0)
 	    count = 1;
+
+	/* The EC is slow, give it some time to catch up to us */
+	AcpiOsStall(100);
 	for (i = 0; i < count; i++) {
 	    Status = EcCheckStatus(sc, "poll", Event);
 	    if (Status == AE_OK)
 		break;
 	    AcpiOsStall(EC_POLL_DELAY);
 	}
+
+	if (Status != AE_OK)
+	    device_printf(sc->ec_dev, "wait timed out [polling mode]\n");
     } else {
-	slp_ival = hz / 1000;
-	if (slp_ival != 0) {
-	    count = ec_timeout;
-	} else {
-	    /* hz has less than 1 ms resolution so scale timeout. */
-	    slp_ival = 1;
-	    count = ec_timeout / (1000 / hz);
-	}
+	/* How many ticks should we sleep for (max) */
+	req_ticks = hz < 1000 ? (ec_timeout * hz) / 1000
+	    : (ec_timeout * 1000) / hz;
+	/* Make sure we sleep for at least one tick, from now */
+	cur_ticks = (volatile int)ticks;
+	req_ticks = cur_ticks + (req_ticks ? req_ticks : 1) + 1;
 
 	/*
 	 * Wait for the GPE to signal the status changed, checking the
@@ -867,38 +856,42 @@
 	 * GPE for an event we're not interested in here (i.e., SCI for
 	 * EC query).
 	 */
-	for (i = 0; i < count; i++) {
-	    if (gen_count != sc->ec_gencount) {
-		/*
-		 * Record new generation count.  It's possible the GPE was
-		 * just to notify us that a query is needed and we need to
-		 * wait for a second GPE to signal the completion of the
-		 * event we are actually waiting for.
-		 */
-		gen_count = sc->ec_gencount;
-		Status = EcCheckStatus(sc, "sleep", Event);
-		if (Status == AE_OK)
-		    break;
-	    }
-	    tsleep(&sc->ec_gencount, PZERO, "ecgpe", slp_ival);
+	while ((int)(req_ticks - cur_ticks) > 0) {
+	    /* If we have not received a signal then wait for one */
+	    if (gen_count == sc->ec_gencount)
+		tsleep(&sc->ec_gencount, PZERO, "ecgpe", req_ticks - cur_ticks);
+
+	    /*
+	     * Record new generation count.  It's possible the GPE was
+	     * just to notify us that a query is needed and we need to
+	     * wait for a second GPE to signal the completion of the
+	     * event we are actually waiting for.
+	     */
+	    gen_count = sc->ec_gencount;
+	    Status = EcCheckStatus(sc, "sleep", Event);
+	    if (Status == AE_OK)
+	        break;
+
+	    /* Update current tick (so we always have a consistant value */
+	    cur_ticks = (volatile int)ticks;
 	}
 
 	/*
-	 * We finished waiting for the GPE and it never arrived.  Try to
-	 * read the register once and trust whatever value we got.  This is
-	 * the best we can do at this point.  Then, force polled mode on
-	 * since this system doesn't appear to generate GPEs.
+	 * We finished waiting for the GPE and it never arrived.  The register
+	 * has been read on a timeout so no need to re-read it.  Force polled
+	 * mode on since this system doesn't appear to generate GPEs.
 	 */
 	if (Status != AE_OK) {
-	    Status = EcCheckStatus(sc, "sleep_end", Event);
-	    device_printf(sc->ec_dev,
-		"wait timed out (%sresponse), forcing polled mode\n",
-		Status == AE_OK ? "" : "no ");
-	    ec_polled_mode = TRUE;
+	    device_printf(sc->ec_dev, "wait timed out [GPE mode]%s\n",
+		ec_gpe_mode ? "" : ", forcing polled mode");
+	    ec_polled_mode = TRUE && !ec_gpe_mode;
 	}
     }
     if (Status != AE_OK)
 	    CTR0(KTR_ACPI, "error: ec wait timed out");
+    else if (ec_delay);
+	/* Give the EC a chance to recover from all its hard work!!! */
+	AcpiOsStall(ec_delay);
     return (Status);
 }
 


>Release-Note:
>Audit-Trail:
Responsible-Changed-From-To: freebsd-bugs->freebsd-acpi 
Responsible-Changed-By: linimon 
Responsible-Changed-When: Sun Sep 26 20:53:24 UTC 2010 
Responsible-Changed-Why:  
Over to maintainer(s). 

http://www.freebsd.org/cgi/query-pr.cgi?pr=150517 

From: Andriy Gapon <avg@icyb.net.ua>
To: bug-followup@FreeBSD.org, naylor.b.david@gmail.com
Cc:  
Subject: Re: kern/150517: [acpi] acpi_ec does not work properly on Lenovo
 S10[e] (due to dynamic switching to polled mode)
Date: Mon, 27 Sep 2010 08:53:51 +0300

 Since you already looked at the linux code, could you please post a link to a
 place where the problematic condition is handled there?
 
 P.S. A service like http://lxr.linux.no/ or similar might be convenient.
 
 -- 
 Andriy Gapon

From: David Naylor <naylor.b.david@gmail.com>
To: Andriy Gapon <avg@icyb.net.ua>
Cc: bug-followup@freebsd.org
Subject: Re: kern/150517: [acpi] acpi_ec does not work properly on Lenovo S10[e] (due to dynamic switching to polled mode)
Date: Sat, 2 Oct 2010 08:31:17 +0200

 --nextPart6155206.lNfR8abAcT
 Content-Type: Text/Plain;
   charset="us-ascii"
 Content-Transfer-Encoding: quoted-printable
 
 On Monday 27 September 2010 07:53:51 Andriy Gapon wrote:
 > Since you already looked at the linux code, could you please post a link =
 to
 > a place where the problematic condition is handled there?
 >=20
 > P.S. A service like http://lxr.linux.no/ or similar might be convenient.
 
 Sorry for the slow reply.  Your email was mistaken as spam.  I've told gmai=
 l=20
 to behave. =20
 
 It has been a long time since I looked at the code.  The function in questi=
 on=20
 is http://lxr.linux.no/linux+v2.6.35.7/drivers/acpi/ec.c#L217 with the actu=
 al=20
 check for controller reset at L238. =20
 
 Of note is the delay at L254.  It looks like it is almost the same thing my=
 =20
 patch does, except it waits before whereas my patch waits after. =20
 
 --nextPart6155206.lNfR8abAcT
 Content-Type: application/pgp-signature; name=signature.asc 
 Content-Description: This is a digitally signed message part.
 
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2.0.16 (FreeBSD)
 
 iEYEABECAAYFAkym0bkACgkQUaaFgP9pFrKnnACfbtzDzT316oC4jmBcSAK/aNdc
 HGQAn140T2kFljemg5tQCuXsxDC9/W+u
 =VVrY
 -----END PGP SIGNATURE-----
 
 --nextPart6155206.lNfR8abAcT--

From: Andriy Gapon <avg@icyb.net.ua>
To: David Naylor <naylor.b.david@gmail.com>
Cc: bug-followup@freebsd.org
Subject: Re: kern/150517: [acpi] acpi_ec does not work properly on Lenovo
 S10[e] (due to dynamic switching to polled mode)
Date: Tue, 05 Oct 2010 12:09:02 +0300

 on 02/10/2010 09:31 David Naylor said the following:
 > On Monday 27 September 2010 07:53:51 Andriy Gapon wrote:
 >> Since you already looked at the linux code, could you please post a link to
 >> a place where the problematic condition is handled there?
 >>
 >> P.S. A service like http://lxr.linux.no/ or similar might be convenient.
 > 
 > Sorry for the slow reply.  Your email was mistaken as spam.  I've told gmail 
 > to behave.  
 > 
 > It has been a long time since I looked at the code.  The function in question 
 > is http://lxr.linux.no/linux+v2.6.35.7/drivers/acpi/ec.c#L217 with the actual 
 > check for controller reset at L238.  
 > 
 > Of note is the delay at L254.  It looks like it is almost the same thing my 
 > patch does, except it waits before whereas my patch waits after.  
 
 David,
 
 can you dig up what kind of error messages you were getting from EC before your
 patch?
 
 I also looked at your changes and at what Linux does and came up with some
 changes to make our EC code more robust.  They are significantly based on your
 patch, but also add some additional logic from Linux code.
 
 Can you try the patch?
 http://people.freebsd.org/~avg/acpi_ec.patch
 Thanks a lot!
 
 P.S. I can describe why I didn't include some parts of your changes and what new
 changes I made and "borrowed" from Linux, but later, if you'd like.
 
 -- 
 Andriy Gapon

From: David Naylor <naylor.b.david@gmail.com>
To: Andriy Gapon <avg@icyb.net.ua>
Cc: bug-followup@freebsd.org
Subject: Re: kern/150517: [acpi] acpi_ec does not work properly on Lenovo S10[e] (due to dynamic switching to polled mode)
Date: Tue, 5 Oct 2010 19:54:14 +0200

 --nextPart3785335.Aben0vuXi3
 Content-Type: Text/Plain;
   charset="us-ascii"
 Content-Transfer-Encoding: quoted-printable
 
 On Tuesday 05 October 2010 11:09:02 Andriy Gapon wrote:
 > on 02/10/2010 09:31 David Naylor said the following:
 > > On Monday 27 September 2010 07:53:51 Andriy Gapon wrote:
 > >> Since you already looked at the linux code, could you please post a li=
 nk
 > >> to a place where the problematic condition is handled there?
 > >>=20
 > >> P.S. A service like http://lxr.linux.no/ or similar might be convenien=
 t.
 > >=20
 > > Sorry for the slow reply.  Your email was mistaken as spam.  I've told
 > > gmail to behave.
 > >=20
 > > It has been a long time since I looked at the code.  The function in
 > > question is http://lxr.linux.no/linux+v2.6.35.7/drivers/acpi/ec.c#L217
 > > with the actual check for controller reset at L238.
 > >=20
 > > Of note is the delay at L254.  It looks like it is almost the same thing
 > > my patch does, except it waits before whereas my patch waits after.
 >=20
 > David,
 >=20
 > can you dig up what kind of error messages you were getting from EC before
 > your patch?
 
 I ran a stress test of the EC by doing `while true; do acpiconf -i0; done` =
 and=20
 the error messages that produced are:
 
 acpi_ec0: wait timed out (no response), forcing polled mode
 acpi_ec0: EcRead: failed waiting to get data
 ACPI Exception: AE_NO_HARDWARE_RESPONSE, Returned by Handler for=20
 [EmbeddedControl] (20100915/evregion-588)
 ACPI Error: Method parse/execution failed [\\_SB_.BAT0._STA] (Node=20
 0xc4328520), AE_NO_HARDWARE_RESPONSE (20100915/psparse-633)
 ACPI Error: Method execution failed [\\_SB_.BAT0._STA] (Node 0xc4328520),=20
 AE_NO_HARDWARE_RESPONSE (20100915/uteval-185)
 acpi_ec0: EcRead: failed waiting to get data
 ACPI Exception: AE_NO_HARDWARE_RESPONSE, Returned by Handler for=20
 [EmbeddedControl] (20100915/evregion-588)
 ACPI Error: Method parse/execution failed [\\_SB_.BAT0._STA] (Node=20
 0xc4328520), AE_NO_HARDWARE_RESPONSE (20100915/psparse-633)
 ACPI Error: Method execution failed [\\_SB_.BAT0._STA] (Node 0xc4328520),=20
 AE_NO_HARDWARE_RESPONSE (20100915/uteval-185)
 acpi_ec0: EcRead: failed waiting to get data
 ACPI Exception: AE_NO_HARDWARE_RESPONSE, Returned by Handler for=20
 [EmbeddedControl] (20100915/evregion-588)
 ACPI Error: Method parse/execution failed [\\_SB_.BAT0.UPBS] (Node=20
 0xc43284a0), AE_NO_HARDWARE_RESPONSE (20100915/psparse-633)
 ACPI Error: Method parse/execution failed [\\_SB_.BAT0._BST] (Node=20
 0xc43284e0), AE_NO_HARDWARE_RESPONSE (20100915/psparse-633)
 acpi_ec0: EcRead: failed waiting to get data
 ACPI Exception: AE_NO_HARDWARE_RESPONSE, Returned by Handler for=20
 [EmbeddedControl] (20100915/evregion-588)
 ACPI Error: Method parse/execution failed [\\_SB_.BAT0._STA] (Node=20
 0xc4328520), AE_NO_HARDWARE_RESPONSE (20100915/psparse-633)
 ACPI Error: Method execution failed [\\_SB_.BAT0._STA] (Node 0xc4328520),=20
 AE_NO_HARDWARE_RESPONSE (20100915/uteval-185)
 acpi_ec0: EcRead: failed waiting to get data
 ACPI Exception: AE_NO_HARDWARE_RESPONSE, Returned by Handler for=20
 [EmbeddedControl] (20100915/evregion-588)
 ACPI Error: Method parse/execution failed [\\_SB_.BAT0._STA] (Node=20
 0xc4328520), AE_NO_HARDWARE_RESPONSE (20100915/psparse-633)
 ACPI Error: Method execution failed [\\_SB_.BAT0._STA] (Node 0xc4328520),=20
 AE_NO_HARDWARE_RESPONSE (20100915/uteval-185)
 acpi_ec0: EcRead: failed waiting to get data
 ACPI Exception: AE_NO_HARDWARE_RESPONSE, Returned by Handler for=20
 [EmbeddedControl] (20100915/evregion-588)
 ACPI Error: Method parse/execution failed [\\_SB_.BAT0._BST] (Node=20
 0xc43284e0), AE_NO_HARDWARE_RESPONSE (20100915/psparse-633)
 acpi_ec0: EcRead: failed waiting to get data
 ACPI Exception: AE_NO_HARDWARE_RESPONSE, Returned by Handler for=20
 [EmbeddedControl] (20100915/evregion-588)
 ACPI Error: Method parse/execution failed [\\_TZ_.TZ00._TMP] (Node=20
 0xc4328b80), AE_NO_HARDWARE_RESPONSE (20100915/psparse-633)
 
 The messages are not always consistent; sometimes they occur during boot. =
 =20
 Also sometimes the EC fails to powerdown the computer. =20
 
 > I also looked at your changes and at what Linux does and came up with some
 > changes to make our EC code more robust.  They are significantly based on
 > your patch, but also add some additional logic from Linux code.
 >=20
 > Can you try the patch?
 > http://people.freebsd.org/~avg/acpi_ec.patch
 > Thanks a lot!
 
 Two notes about your patch:
  - EcCheckStatus has changed position in the file resulting in a larger tha=
 n=20
 required change. =20
  - You no longer dynamically switch to polled mode.  Was that intentional?
 
 Your patchs works.  No errors were reported during the stress test, however=
 =20
 running acpiconf takes a noticeably longer time to complete (before and wit=
 h=20
 my patch it was instantaneous).  Setting debug.acpi.ec.timeout=3D25 improve=
 s=20
 responsiveness (reducing to 5 resulting in GPE query failed messages). =20
 
 Accoring to time acpiconf takes upto 3 seconds to complete, after setting=20
 debug.acpi.ec.timeout it takes upto 0.24 seconds. =20
 
 I changed EC_POLL_DELAY back to 5 and that didn't change anything. =20
 
 > P.S. I can describe why I didn't include some parts of your changes and
 > what new changes I made and "borrowed" from Linux, but later, if you'd
 > like.
 
 --nextPart3785335.Aben0vuXi3
 Content-Type: application/pgp-signature; name=signature.asc 
 Content-Description: This is a digitally signed message part.
 
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2.0.16 (FreeBSD)
 
 iEYEABECAAYFAkyrZkoACgkQUaaFgP9pFrJNvgCfTh+WLv+e8iUj45svPg39is4L
 EdgAoI72O51NeYgANB03UpwXC70L9gFg
 =nPj/
 -----END PGP SIGNATURE-----
 
 --nextPart3785335.Aben0vuXi3--

From: Andriy Gapon <avg@icyb.net.ua>
To: David Naylor <naylor.b.david@gmail.com>
Cc: bug-followup@freebsd.org
Subject: Re: kern/150517: [acpi] acpi_ec does not work properly on Lenovo
 S10[e] (due to dynamic switching to polled mode)
Date: Tue, 05 Oct 2010 21:28:18 +0300

 on 05/10/2010 20:54 David Naylor said the following:
 > On Tuesday 05 October 2010 11:09:02 Andriy Gapon wrote:
 >> Can you try the patch?
 >> http://people.freebsd.org/~avg/acpi_ec.patch
 >> Thanks a lot!
 > 
 > Two notes about your patch:
 >  - EcCheckStatus has changed position in the file resulting in a larger than 
 > required change.
 
 Yes, it's now needed in a function that is defined earlier.
 I could just have added a declaration for EcCheckStatus(), but for some reason I
 decided to move its definition.
 
 >  - You no longer dynamically switch to polled mode.  Was that intentional?
 
 Yes.  My opinion is that it should be up to user to forcefully switch to polled
 mode.  Although perhaps this is an unwelcome change for some users.  Need to
 weight pros and cons.
 
 > Your patchs works.  No errors were reported during the stress test, however 
 > running acpiconf takes a noticeably longer time to complete (before and with 
 > my patch it was instantaneous).  Setting debug.acpi.ec.timeout=25 improves 
 > responsiveness (reducing to 5 resulting in GPE query failed messages).  
 > 
 > Accoring to time acpiconf takes upto 3 seconds to complete, after setting 
 > debug.acpi.ec.timeout it takes upto 0.24 seconds.  
 > 
 > I changed EC_POLL_DELAY back to 5 and that didn't change anything.  
 
 I will investigate this.
 Thank you!
 
 -- 
 Andriy Gapon

From: David Naylor <naylor.b.david@gmail.com>
To: Andriy Gapon <avg@icyb.net.ua>
Cc: bug-followup@freebsd.org
Subject: Re: kern/150517: [acpi] acpi_ec does not work properly on Lenovo S10[e] (due to dynamic switching to polled mode)
Date: Tue, 5 Oct 2010 21:01:35 +0200

 --nextPart3774253.k86G1yU3gG
 Content-Type: Text/Plain;
   charset="us-ascii"
 Content-Transfer-Encoding: quoted-printable
 
 On Tuesday 05 October 2010 20:28:18 Andriy Gapon wrote:
 > on 05/10/2010 20:54 David Naylor said the following:
 > > On Tuesday 05 October 2010 11:09:02 Andriy Gapon wrote:
 > >> Can you try the patch?
 > >> http://people.freebsd.org/~avg/acpi_ec.patch
 > >> Thanks a lot!
 > >=20
 > > Two notes about your patch:
 > >  - EcCheckStatus has changed position in the file resulting in a larger
 > >  than
 > >=20
 > > required change.
 >=20
 > Yes, it's now needed in a function that is defined earlier.
 > I could just have added a declaration for EcCheckStatus(), but for some
 > reason I decided to move its definition.
 >=20
 > >  - You no longer dynamically switch to polled mode.  Was that
 > >  intentional?
 >=20
 > Yes.  My opinion is that it should be up to user to forcefully switch to
 > polled mode.  Although perhaps this is an unwelcome change for some users.
 >  Need to weight pros and cons.
 >=20
 > > Your patchs works.  No errors were reported during the stress test,
 > > however running acpiconf takes a noticeably longer time to complete
 > > (before and with my patch it was instantaneous).  Setting
 > > debug.acpi.ec.timeout=3D25 improves responsiveness (reducing to 5
 > > resulting in GPE query failed messages).
 > >=20
 > > Accoring to time acpiconf takes upto 3 seconds to complete, after setti=
 ng
 > > debug.acpi.ec.timeout it takes upto 0.24 seconds.
 > >=20
 > > I changed EC_POLL_DELAY back to 5 and that didn't change anything.
 >=20
 > I will investigate this.
 > Thank you!
 
 Thanks, I've tried but have not been able to isolate the cause.  Polled mod=
 e=20
 works perfectly.  Something my patch was unable to do. =20
 
 --nextPart3774253.k86G1yU3gG
 Content-Type: application/pgp-signature; name=signature.asc 
 Content-Description: This is a digitally signed message part.
 
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2.0.16 (FreeBSD)
 
 iEYEABECAAYFAkyrdhMACgkQUaaFgP9pFrLHiQCeM+1kVlq4poGSZ3rdEdornCuI
 PrMAn21LNhbMKbSgo+g3fBoVQa5YE9Ft
 =PBE8
 -----END PGP SIGNATURE-----
 
 --nextPart3774253.k86G1yU3gG--

From: Andriy Gapon <avg@icyb.net.ua>
To: David Naylor <naylor.b.david@gmail.com>
Cc: bug-followup@freebsd.org
Subject: Re: kern/150517: [acpi] acpi_ec does not work properly on Lenovo
 S10[e] (due to dynamic switching to polled mode)
Date: Wed, 06 Oct 2010 14:24:46 +0300

 on 05/10/2010 22:01 David Naylor said the following:
 > Thanks, I've tried but have not been able to isolate the cause.  Polled mode 
 > works perfectly.  Something my patch was unable to do.  
 
 David,
 
 when you see those long delays, do you have polling enabled or disabled?
 Also, before my patch, did your system use to fallback to polled mode automatically?
 
 Thanks!
 -- 
 Andriy Gapon
Responsible-Changed-From-To: freebsd-acpi->avg 
Responsible-Changed-By: avg 
Responsible-Changed-When: Mon Oct 11 11:44:50 UTC 2010 
Responsible-Changed-Why:  
It seems that I am working on this. 

http://www.freebsd.org/cgi/query-pr.cgi?pr=150517 

From: Andriy Gapon <avg@icyb.net.ua>
To: bug-followup@FreeBSD.org
Cc:  
Subject: Re: kern/150517: [acpi] acpi_ec does not work properly on Lenovo
 S10[e] (due to dynamic switching to polled mode)
Date: Tue, 12 Oct 2010 15:32:08 +0300

 The latest patch, just for the record:
 http://people.freebsd.org/~avg/acpi_ec.2.patch
 
 -- 
 Andriy Gapon

From: dfilter@FreeBSD.ORG (dfilter service)
To: bug-followup@FreeBSD.org
Cc:  
Subject: Re: kern/150517: commit references a PR
Date: Tue, 12 Oct 2010 17:53:10 +0000 (UTC)

 Author: avg
 Date: Tue Oct 12 17:53:01 2010
 New Revision: 213737
 URL: http://svn.freebsd.org/changeset/base/213737
 
 Log:
   acpi_ec: changes in communication with hardware
   
   Short description of the changes:
   - attempt to retry some commands for which it is possible (read, query)
   - always make a short sleep before checking EC status in polled mode
   - periodically poll EC status in interrupt mode
   - change logic for detecting broken interrupt delivery and falling back
     to polled mode
   - check that EC is ready for input before starting a new command, wait
     if necessary
   
   This commit is based on the original patch by David Naylor.
   
   PR:		kern/150517
   Submitted by:	David Naylor <naylor.b.david@gmail.com>
   Reviewed by:	jkim
   MFC after:	3 weeks
 
 Modified:
   head/sys/dev/acpica/acpi_ec.c
 
 Modified: head/sys/dev/acpica/acpi_ec.c
 ==============================================================================
 --- head/sys/dev/acpica/acpi_ec.c	Tue Oct 12 17:40:45 2010	(r213736)
 +++ head/sys/dev/acpica/acpi_ec.c	Tue Oct 12 17:53:01 2010	(r213737)
 @@ -153,7 +153,7 @@ struct acpi_ec_softc {
      int			ec_glkhandle;
      int			ec_burstactive;
      int			ec_sci_pend;
 -    u_int		ec_gencount;
 +    volatile u_int	ec_gencount;
      int			ec_suspending;
  };
  
 @@ -165,7 +165,7 @@ struct acpi_ec_softc {
  #define EC_LOCK_TIMEOUT	1000
  
  /* Default delay in microseconds between each run of the status polling loop. */
 -#define EC_POLL_DELAY	5
 +#define EC_POLL_DELAY	50
  
  /* Total time in ms spent waiting for a response from EC. */
  #define EC_TIMEOUT	750
 @@ -599,12 +599,32 @@ acpi_ec_write_method(device_t dev, u_int
      return (0);
  }
  
 +static ACPI_STATUS
 +EcCheckStatus(struct acpi_ec_softc *sc, const char *msg, EC_EVENT event)
 +{
 +    ACPI_STATUS status;
 +    EC_STATUS ec_status;
 +
 +    status = AE_NO_HARDWARE_RESPONSE;
 +    ec_status = EC_GET_CSR(sc);
 +    if (sc->ec_burstactive && !(ec_status & EC_FLAG_BURST_MODE)) {
 +	CTR1(KTR_ACPI, "ec burst disabled in waitevent (%s)", msg);
 +	sc->ec_burstactive = FALSE;
 +    }
 +    if (EVENT_READY(event, ec_status)) {
 +	CTR2(KTR_ACPI, "ec %s wait ready, status %#x", msg, ec_status);
 +	status = AE_OK;
 +    }
 +    return (status);
 +}
 +
  static void
  EcGpeQueryHandler(void *Context)
  {
      struct acpi_ec_softc	*sc = (struct acpi_ec_softc *)Context;
      UINT8			Data;
      ACPI_STATUS			Status;
 +    int				retry;
      char			qxx[5];
  
      ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__);
 @@ -625,7 +645,16 @@ EcGpeQueryHandler(void *Context)
       * that may arise from running the query from causing another query
       * to be queued, we clear the pending flag only after running it.
       */
 -    Status = EcCommand(sc, EC_COMMAND_QUERY);
 +    for (retry = 0; retry < 2; retry++) {
 +	Status = EcCommand(sc, EC_COMMAND_QUERY);
 +	if (ACPI_SUCCESS(Status))
 +	    break;
 +	if (EcCheckStatus(sc, "retr_check",
 +	    EC_EVENT_INPUT_BUFFER_EMPTY) == AE_OK)
 +	    continue;
 +	else
 +	    break;
 +    }
      sc->ec_sci_pend = FALSE;
      if (ACPI_FAILURE(Status)) {
  	EcUnlock(sc);
 @@ -678,7 +707,7 @@ EcGpeHandler(void *Context)
       * address and then data values.)
       */
      atomic_add_int(&sc->ec_gencount, 1);
 -    wakeup(&sc->ec_gencount);
 +    wakeup(&sc);
  
      /*
       * If the EC_SCI bit of the status register is set, queue a query handler.
 @@ -789,68 +818,27 @@ EcSpaceHandler(UINT32 Function, ACPI_PHY
  }
  
  static ACPI_STATUS
 -EcCheckStatus(struct acpi_ec_softc *sc, const char *msg, EC_EVENT event)
 -{
 -    ACPI_STATUS status;
 -    EC_STATUS ec_status;
 -
 -    status = AE_NO_HARDWARE_RESPONSE;
 -    ec_status = EC_GET_CSR(sc);
 -    if (sc->ec_burstactive && !(ec_status & EC_FLAG_BURST_MODE)) {
 -	CTR1(KTR_ACPI, "ec burst disabled in waitevent (%s)", msg);
 -	sc->ec_burstactive = FALSE;
 -    }
 -    if (EVENT_READY(event, ec_status)) {
 -	CTR2(KTR_ACPI, "ec %s wait ready, status %#x", msg, ec_status);
 -	status = AE_OK;
 -    }
 -    return (status);
 -}
 -
 -static ACPI_STATUS
  EcWaitEvent(struct acpi_ec_softc *sc, EC_EVENT Event, u_int gen_count)
  {
 +    static int	no_intr = 0;
      ACPI_STATUS	Status;
 -    int		count, i, slp_ival;
 +    int		count, i, need_poll, slp_ival;
  
      ACPI_SERIAL_ASSERT(ec);
      Status = AE_NO_HARDWARE_RESPONSE;
 -    int need_poll = cold || rebooting || ec_polled_mode || sc->ec_suspending;
 -    /*
 -     * The main CPU should be much faster than the EC.  So the status should
 -     * be "not ready" when we start waiting.  But if the main CPU is really
 -     * slow, it's possible we see the current "ready" response.  Since that
 -     * can't be distinguished from the previous response in polled mode,
 -     * this is a potential issue.  We really should have interrupts enabled
 -     * during boot so there is no ambiguity in polled mode.
 -     *
 -     * If this occurs, we add an additional delay before actually entering
 -     * the status checking loop, hopefully to allow the EC to go to work
 -     * and produce a non-stale status.
 -     */
 -    if (need_poll) {
 -	static int	once;
 -
 -	if (EcCheckStatus(sc, "pre-check", Event) == AE_OK) {
 -	    if (!once) {
 -		device_printf(sc->ec_dev,
 -		    "warning: EC done before starting event wait\n");
 -		once = 1;
 -	    }
 -	    AcpiOsStall(10);
 -	}
 -    }
 +    need_poll = cold || rebooting || ec_polled_mode || sc->ec_suspending;
  
      /* Wait for event by polling or GPE (interrupt). */
      if (need_poll) {
  	count = (ec_timeout * 1000) / EC_POLL_DELAY;
  	if (count == 0)
  	    count = 1;
 +	DELAY(10);
  	for (i = 0; i < count; i++) {
  	    Status = EcCheckStatus(sc, "poll", Event);
  	    if (Status == AE_OK)
  		break;
 -	    AcpiOsStall(EC_POLL_DELAY);
 +	    DELAY(EC_POLL_DELAY);
  	}
      } else {
  	slp_ival = hz / 1000;
 @@ -869,34 +857,37 @@ EcWaitEvent(struct acpi_ec_softc *sc, EC
  	 * EC query).
  	 */
  	for (i = 0; i < count; i++) {
 -	    if (gen_count != sc->ec_gencount) {
 -		/*
 -		 * Record new generation count.  It's possible the GPE was
 -		 * just to notify us that a query is needed and we need to
 -		 * wait for a second GPE to signal the completion of the
 -		 * event we are actually waiting for.
 -		 */
 -		gen_count = sc->ec_gencount;
 -		Status = EcCheckStatus(sc, "sleep", Event);
 -		if (Status == AE_OK)
 -		    break;
 +	    if (gen_count == sc->ec_gencount)
 +		tsleep(&sc, 0, "ecgpe", slp_ival);
 +	    /*
 +	     * Record new generation count.  It's possible the GPE was
 +	     * just to notify us that a query is needed and we need to
 +	     * wait for a second GPE to signal the completion of the
 +	     * event we are actually waiting for.
 +	     */
 +	    Status = EcCheckStatus(sc, "sleep", Event);
 +	    if (Status == AE_OK) {
 +		if (gen_count == sc->ec_gencount)
 +		    no_intr++;
 +		else
 +		    no_intr = 0;
 +		break;
  	    }
 -	    tsleep(&sc->ec_gencount, PZERO, "ecgpe", slp_ival);
 +	    gen_count = sc->ec_gencount;
  	}
  
  	/*
  	 * We finished waiting for the GPE and it never arrived.  Try to
  	 * read the register once and trust whatever value we got.  This is
 -	 * the best we can do at this point.  Then, force polled mode on
 -	 * since this system doesn't appear to generate GPEs.
 +	 * the best we can do at this point.
  	 */
 -	if (Status != AE_OK) {
 +	if (Status != AE_OK)
  	    Status = EcCheckStatus(sc, "sleep_end", Event);
 -	    device_printf(sc->ec_dev,
 -		"wait timed out (%sresponse), forcing polled mode\n",
 -		Status == AE_OK ? "" : "no ");
 -	    ec_polled_mode = TRUE;
 -	}
 +    }
 +    if (!need_poll && no_intr > 10) {
 +	device_printf(sc->ec_dev,
 +	    "not getting interrupts, switched to polled mode\n");
 +	ec_polled_mode = 1;
      }
      if (Status != AE_OK)
  	    CTR0(KTR_ACPI, "error: ec wait timed out");
 @@ -933,6 +924,14 @@ EcCommand(struct acpi_ec_softc *sc, EC_C
  	return (AE_BAD_PARAMETER);
      }
  
 +    /*
 +     * Ensure empty input buffer before issuing command.
 +     * Use generation count of zero to force a quick check.
 +     */
 +    status = EcWaitEvent(sc, EC_EVENT_INPUT_BUFFER_EMPTY, 0);
 +    if (ACPI_FAILURE(status))
 +	return (status);
 +
      /* Run the command and wait for the chosen event. */
      CTR1(KTR_ACPI, "ec running command %#x", cmd);
      gen_count = sc->ec_gencount;
 @@ -955,24 +954,31 @@ EcRead(struct acpi_ec_softc *sc, UINT8 A
  {
      ACPI_STATUS	status;
      u_int gen_count;
 +    int retry;
  
      ACPI_SERIAL_ASSERT(ec);
      CTR1(KTR_ACPI, "ec read from %#x", Address);
  
 -    status = EcCommand(sc, EC_COMMAND_READ);
 -    if (ACPI_FAILURE(status))
 -	return (status);
 +    for (retry = 0; retry < 2; retry++) {
 +	status = EcCommand(sc, EC_COMMAND_READ);
 +	if (ACPI_FAILURE(status))
 +	    return (status);
  
 -    gen_count = sc->ec_gencount;
 -    EC_SET_DATA(sc, Address);
 -    status = EcWaitEvent(sc, EC_EVENT_OUTPUT_BUFFER_FULL, gen_count);
 -    if (ACPI_FAILURE(status)) {
 -	device_printf(sc->ec_dev, "EcRead: failed waiting to get data\n");
 -	return (status);
 +	gen_count = sc->ec_gencount;
 +	EC_SET_DATA(sc, Address);
 +	status = EcWaitEvent(sc, EC_EVENT_OUTPUT_BUFFER_FULL, gen_count);
 +	if (ACPI_FAILURE(status)) {
 +	    if (EcCheckStatus(sc, "retr_check",
 +		EC_EVENT_INPUT_BUFFER_EMPTY) == AE_OK)
 +		continue;
 +	    else
 +		break;
 +	}
 +	*Data = EC_GET_DATA(sc);
 +	return (AE_OK);
      }
 -    *Data = EC_GET_DATA(sc);
 -
 -    return (AE_OK);
 +    device_printf(sc->ec_dev, "EcRead: failed waiting to get data\n");
 +    return (status);
  }
  
  static ACPI_STATUS
 @@ -992,7 +998,7 @@ EcWrite(struct acpi_ec_softc *sc, UINT8 
      EC_SET_DATA(sc, Address);
      status = EcWaitEvent(sc, EC_EVENT_INPUT_BUFFER_EMPTY, gen_count);
      if (ACPI_FAILURE(status)) {
 -	device_printf(sc->ec_dev, "EcRead: failed waiting for sent address\n");
 +	device_printf(sc->ec_dev, "EcWrite: failed waiting for sent address\n");
  	return (status);
      }
  
 _______________________________________________
 svn-src-all@freebsd.org mailing list
 http://lists.freebsd.org/mailman/listinfo/svn-src-all
 To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
 
State-Changed-From-To: open->patched 
State-Changed-By: avg 
State-Changed-When: Mon Oct 18 12:09:04 UTC 2010 
State-Changed-Why:  
The patch is committed in head. 

http://www.freebsd.org/cgi/query-pr.cgi?pr=150517 
State-Changed-From-To: patched->closed 
State-Changed-By: avg 
State-Changed-When: Mon Nov 8 11:25:37 UTC 2010 
State-Changed-Why:  
The fix is MFC-ed to stable/8. 

http://www.freebsd.org/cgi/query-pr.cgi?pr=150517 
>Unformatted:
