From nobody@FreeBSD.org  Sat Jan 12 08:22:12 2008
Return-Path: <nobody@FreeBSD.org>
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id 50EEF16A41B
	for <freebsd-gnats-submit@FreeBSD.org>; Sat, 12 Jan 2008 08:22:12 +0000 (UTC)
	(envelope-from nobody@FreeBSD.org)
Received: from www.freebsd.org (www.freebsd.org [IPv6:2001:4f8:fff6::21])
	by mx1.freebsd.org (Postfix) with ESMTP id 431C313C455
	for <freebsd-gnats-submit@FreeBSD.org>; Sat, 12 Jan 2008 08:22:12 +0000 (UTC)
	(envelope-from nobody@FreeBSD.org)
Received: from www.freebsd.org (localhost [127.0.0.1])
	by www.freebsd.org (8.14.2/8.14.2) with ESMTP id m0C8L1KY093326
	for <freebsd-gnats-submit@FreeBSD.org>; Sat, 12 Jan 2008 08:21:01 GMT
	(envelope-from nobody@www.freebsd.org)
Received: (from nobody@localhost)
	by www.freebsd.org (8.14.2/8.14.1/Submit) id m0C8L1Ut093325;
	Sat, 12 Jan 2008 08:21:01 GMT
	(envelope-from nobody)
Message-Id: <200801120821.m0C8L1Ut093325@www.freebsd.org>
Date: Sat, 12 Jan 2008 08:21:01 GMT
From: KOIE Hidetaka <koie@suri.co.jp>
To: freebsd-gnats-submit@FreeBSD.org
Subject: default parameter of powerd is unsuitable for multi core system.
X-Send-Pr-Version: www-3.1
X-GNATS-Notify:

>Number:         119589
>Category:       bin
>Synopsis:       [patch] powerd(8): default parameter of powerd is unsuitable for multi core system.
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    freebsd-bugs
>State:          closed
>Quarter:        
>Keywords:       
>Date-Required:  
>Class:          change-request
>Submitter-Id:   current-users
>Arrival-Date:   Sat Jan 12 08:30:01 UTC 2008
>Closed-Date:    Tue Nov 18 15:58:02 UTC 2008
>Last-Modified:  Tue Nov 18 15:58:02 UTC 2008
>Originator:     KOIE Hidetaka
>Release:        FreeBSD 8.0-CURRENT amd64
>Organization:
surigiken
>Environment:
FreeBSD guriandgura 8.0-CURRENT FreeBSD 8.0-CURRENT #0: Sat Jan 5 16:35:
45 JST 2008 koie@guriandgura:/usr/obj/usr/src/sys/GURIANDGURA amd64

>Description:
On multi core system, powerd will not increase performance.
For example, if NCPU=4 and one process consumes a CPU,
system-wide CPU idle is >= 75%.
Because the threashold is 65% by default,
powerd never increase performace.

>How-To-Repeat:

>Fix:
Change default setting:
DEFAULT_ACTIVE_PERCENT = (100 * (NCPU - 1) + 65) / NCPU,
DEFAULT_IDLE_PERCENT = (100 * (NCPU - 1) + 90) / NCPU.
if NCPU=4, 
DEFAULT_ACTIVE_PERCENT = 91,
DEFAULT_IDLE_PERCENT = 97.

Or, powerd polls per-core CPU statistics kern.cp_times
and increases performance when any CPU is busy.

Or, kernel and powerd support per-core power management.


>Release-Note:
>Audit-Trail:
Responsible-Changed-From-To: freebsd-ports-bugs->freebsd-bugs 
Responsible-Changed-By: edwin 
Responsible-Changed-When: Sat Jan 12 10:03:18 UTC 2008 
Responsible-Changed-Why:  
This lives in src/usr.sbin/powerd 

http://www.freebsd.org/cgi/query-pr.cgi?pr=119589 

From: KOIE Hidetaka <koie@suri.co.jp>
To: bug-followup@FreeBSD.org, koie@suri.co.jp
Cc:  
Subject: Re: bin/119589: powerd(8): default parameter of powerd is
 unsuitable for multi core system.
Date: Sat, 19 Jan 2008 03:37:24 +0900 (JST)

 I write a patch:
 - If a cpu is busy, increase freq.
 - If all cpu is idle, decrease freq.
 
 --- powerd.c.orig	2008-01-15 12:13:03.130694451 +0900
 +++ powerd.c	2008-01-19 03:21:27.279583449 +0900
 @@ -50,6 +50,8 @@ __FBSDID("$FreeBSD: src/usr.sbin/powerd/
  #include <machine/apm_bios.h>
  #endif
  
 +#define MAX_CPU 32	/*XXX*/
 +
  #define DEFAULT_ACTIVE_PERCENT	65
  #define DEFAULT_IDLE_PERCENT	90
  #define DEFAULT_POLL_INTERVAL	500	/* Poll interval in milliseconds */
 @@ -77,7 +79,7 @@ const char *modes[] = {
  #define DEVDPIPE	"/var/run/devd.pipe"
  #define DEVCTL_MAXBUF	1024
  
 -static int	read_usage_times(long *idle, long *total);
 +static int	read_usage_times(long idle[MAX_CPU], long total[MAX_CPU]);
  static int	read_freqs(int *numfreqs, int **freqs, int **power);
  static int	set_freq(int freq);
  static void	acline_init(void);
 @@ -89,7 +91,8 @@ static void	parse_mode(char *arg, int *m
  static void	usage(void);
  
  /* Sysctl data structures. */
 -static int	cp_time_mib[2];
 +static int	numcpus;
 +static int	cp_times_mib[2];
  static int	freq_mib[4];
  static int	levels_mib[4];
  static int	acline_mib[3];
 @@ -119,27 +122,29 @@ static int	devd_pipe = -1;
  static struct timeval tried_devd;
  
  static int
 -read_usage_times(long *idle, long *total)
 +read_usage_times(long idle[MAX_CPU], long total[MAX_CPU])
  {
 -	static long idle_old, total_old;
 -	long cp_time[CPUSTATES], i, total_new;
 -	size_t cp_time_len;
 +	static long idle_old[MAX_CPU], total_old[MAX_CPU];
 +	int k;
 +	long cp_times[MAX_CPU][CPUSTATES];
 +	size_t cp_times_len;
  	int error;
  
 -	cp_time_len = sizeof(cp_time);
 -	error = sysctl(cp_time_mib, 2, cp_time, &cp_time_len, NULL, 0);
 +	cp_times_len = sizeof(cp_times);
 +	error = sysctl(cp_times_mib, 2, cp_times, &cp_times_len, NULL, 0);
  	if (error)
  		return (error);
 -	for (total_new = 0, i = 0; i < CPUSTATES; i++)
 -		total_new += cp_time[i];
 -
 -	if (idle)
 -		*idle = cp_time[CP_IDLE] - idle_old;
 -	if (total)
 -		*total = total_new - total_old;
 -
 -	idle_old = cp_time[CP_IDLE];
 -	total_old = total_new;
 +	for (k = 0; k < numcpus; k++) {
 +		double total_new = 0;
 +		int i;
 +		for (i = 0; i < CPUSTATES; i++)
 +			total_new += cp_times[k][i];
 +
 +		idle[k] = cp_times[k][CP_IDLE] - idle_old[k];
 +		idle_old[k] = cp_times[k][CP_IDLE];
 +		total[k] = total_new - total_old[k];
 +		total_old[k] = total_new;
 +	}
  
  	return (0);
  }
 @@ -369,11 +374,13 @@ main(int argc, char * argv[])
  	int nfds;
  	struct pidfh *pfh = NULL;
  	const char *pidfile = NULL;
 -	long idle, total;
 +	long idle[MAX_CPU], total[MAX_CPU];
 +	int k;
  	int curfreq, *freqs, i, *mwatts, numfreqs;
  	int ch, mode, mode_ac, mode_battery, mode_none;
  	uint64_t mjoules_used;
  	size_t len;
 +	int bumpup, dropdown;
  
  	/* Default mode for all AC states is adaptive. */
  	mode_ac = mode_battery = mode_none = MODE_ADAPTIVE;
 @@ -425,7 +432,7 @@ main(int argc, char * argv[])
  			}
  			break;
  		case 'v':
 -			vflag = 1;
 +			vflag++;
  			break;
  		default:
  			usage();
 @@ -436,10 +443,19 @@ main(int argc, char * argv[])
  	/* Poll interval is in units of ms. */
  	poll_ival *= 1000;
  
 +	/* How many CPU? */
 +	len = sizeof(numcpus);
 +	if (sysctlbyname("hw.ncpu", &numcpus, &len, NULL, 0))
 +		err(1, "sysctl hw.ncpu");
 +	if (vflag)
 +		printf("ncpu=%d\n", numcpus);
 +	if (numcpus > MAX_CPU)
 +		errc(1, 0, "too many CPU are loaded");
 +
  	/* Look up various sysctl MIBs. */
  	len = 2;
 -	if (sysctlnametomib("kern.cp_time", cp_time_mib, &len))
 -		err(1, "lookup kern.cp_time");
 +	if (sysctlnametomib("kern.cp_times", cp_times_mib, &len))
 +		err(1, "lookup kern.cp_times");
  	len = 4;
  	if (sysctlnametomib("dev.cpu.0.freq", freq_mib, &len))
  		err(1, "lookup freq");
 @@ -448,7 +464,7 @@ main(int argc, char * argv[])
  		err(1, "lookup freq_levels");
  
  	/* Check if we can read the idle time and supported freqs. */
 -	if (read_usage_times(NULL, NULL))
 +	if (read_usage_times(idle, total))
  		err(1, "read_usage_times");
  	if (read_freqs(&numfreqs, &freqs, &mwatts))
  		err(1, "error reading supported CPU frequencies");
 @@ -578,11 +594,18 @@ main(int argc, char * argv[])
  		}
  
  		/* Adaptive mode; get the current CPU usage times. */
 -		if (read_usage_times(&idle, &total)) {
 +		if (read_usage_times(idle, total)) {
  			if (vflag)
  				warn("read_usage_times() failed");
  			continue;
  		}
 +#if 0
 +		printf("IDLE%%:");
 +		for (i = 0; i < numcpus; i++) {
 +			printf(" %3ld",  100 * idle[i] / total[i]);
 +		}
 +		printf("\n");
 +#endif
  
  		/*
  		 * If we're idle less than the active mark, bump up two levels.
 @@ -592,8 +615,26 @@ main(int argc, char * argv[])
  			if (freqs[i] == curfreq)
  				break;
  		}
 -		if (idle < (total * cpu_running_mark) / 100 &&
 -		    curfreq < freqs[0]) {
 +		bumpup = 0;
 +		if (curfreq < freqs[0]) {
 +			for (k = 0; k < numcpus; k++) {
 +				if (idle[k] * 100 < total[k] * cpu_running_mark) {
 +					bumpup = 1;
 +					break;
 +				}
 +			}
 +		}
 +		dropdown = 0;
 +		if (curfreq > freqs[numfreqs - 1]) {
 +			dropdown = 1;
 +			for (k = 0; k < numcpus; k++) {
 +				if (idle[k] * 100 <= total[k] * cpu_idle_mark) {
 +					dropdown = 0;
 +					break;
 +				}
 +			}
 +		}
 +		if (bumpup) {
  			i -= 2;
  			if (i < 0)
  				i = 0;
 @@ -605,8 +646,7 @@ main(int argc, char * argv[])
  			if (set_freq(freqs[i]))
  				warn("error setting CPU frequency %d",
  				    freqs[i]);
 -		} else if (idle > (total * cpu_idle_mark) / 100 &&
 -		    curfreq > freqs[numfreqs - 1]) {
 +		} else if (dropdown) {
  			i++;
  			if (vflag) {
  				printf("idle time > %d%%, decreasing clock"
State-Changed-From-To: open->closed 
State-Changed-By: mav 
State-Changed-When: Tue Nov 18 15:57:21 UTC 2008 
State-Changed-Why:  
Fixed in -CURRENT by r185050. 

http://www.freebsd.org/cgi/query-pr.cgi?pr=119589 
>Unformatted:
