From dwhite@gumbysoft.com  Wed Nov  5 14:37:40 2003
Return-Path: <dwhite@gumbysoft.com>
Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125])
	by hub.freebsd.org (Postfix) with ESMTP id 5398B16A4CE
	for <FreeBSD-gnats-submit@freebsd.org>; Wed,  5 Nov 2003 14:37:40 -0800 (PST)
Received: from carver.gumbysoft.com (carver.gumbysoft.com [66.220.23.50])
	by mx1.FreeBSD.org (Postfix) with ESMTP id 6BEA043FDD
	for <FreeBSD-gnats-submit@freebsd.org>; Wed,  5 Nov 2003 14:37:39 -0800 (PST)
	(envelope-from dwhite@gumbysoft.com)
Received: by carver.gumbysoft.com (Postfix, from userid 1000)
	id 5D74272DB8; Wed,  5 Nov 2003 14:37:39 -0800 (PST)
Message-Id: <20031105223739.5D74272DB8@carver.gumbysoft.com>
Date: Wed,  5 Nov 2003 14:37:39 -0800 (PST)
From: Doug White <dwhite@gumbysoft.com>
Reply-To: Doug White <dwhite@gumbysoft.com>
To: FreeBSD-gnats-submit@freebsd.org
Cc:
Subject: SYSV Semaphore exhaustion causes Giant deadlock
X-Send-Pr-Version: 3.113
X-GNATS-Notify:

>Number:         58984
>Category:       kern
>Synopsis:       SYSV Semaphore exhaustion causes Giant deadlock
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    freebsd-bugs
>State:          closed
>Quarter:        
>Keywords:       
>Date-Required:  
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Wed Nov 05 14:40:08 PST 2003
>Closed-Date:    Sun Nov 09 23:26:31 PST 2003
>Last-Modified:  Sun Nov 09 23:26:31 PST 2003
>Originator:     Doug White
>Release:        FreeBSD 5.1-CURRENT i386
>Organization:
>Environment:
FreeBSD fbtest1.looksmart.com 5.1-CURRENT FreeBSD 5.1-CURRENT #6: Wed Nov  5 12:10:15 PST 2003     root@fbtest1.looksmart.com:/usr/src/sys/i386/compile/XEON  i386

	
>Description:
	Depletion the system of SYSV semaphores causes the semop() system
call to infinitely retry to allocate resources while holding Giant.   This 
causes a complete system hang.  The tracebacks I get when I break into ddb
usually are in semundo_adjust().

>How-To-Repeat:
In our environment, I was able to duplicate this using 2 2.4GHz Xeon systems,
one serving as a httpd/php host with the pear-APC module installed, and the 
other as a load generator.  pear-APC uses SYSV semaphores to control access
to its shared code cache.  The client is using ApacheBench to hit a php script
on the server. If ab is run with more than 15 clients, the system hangs less
than a second after the bench begins.  
	
>Fix:
	


>Release-Note:
>Audit-Trail:

From: Tim Robbins <tjr@freebsd.org>
To: dwhite@gumbysoft.com
Cc: bug-followup@freebsd.org
Subject: Re: kern/58984: SYSV Semaphore exhaustion causes Giant deadlock
Date: Fri, 7 Nov 2003 13:48:33 +1100

 Try this patch:
 (http://perforce.freebsd.org/chv.cgi?CH=41609)
 
 --- sysv_sem.c.old	Fri Nov  7 13:46:17 2003
 +++ sysv_sem.c	Fri Nov  7 13:44:18 2003
 @@ -66,7 +66,7 @@
  static struct semid_ds *sema;	/* semaphore id pool */
  static struct mtx *sema_mtx;	/* semaphore id pool mutexes*/
  static struct sem *sem;		/* semaphore pool */
 -SLIST_HEAD(, sem_undo) semu_list;	/* list of active undo structures */
 +LIST_HEAD(, sem_undo) semu_list;	/* list of active undo structures */
  static int	*semu;		/* undo structure pool */
  static eventhandler_tag semexit_tag;
  
 @@ -86,7 +86,7 @@
   * Undo structure (one per process)
   */
  struct sem_undo {
 -	SLIST_ENTRY(sem_undo) un_next;	/* ptr to next active undo structure */
 +	LIST_ENTRY(sem_undo) un_list;	/* ptr to next active undo structure */
  	struct	proc *un_proc;		/* owner of this structure */
  	short	un_cnt;			/* # of active entries */
  	struct undo {
 @@ -205,7 +205,7 @@
  		struct sem_undo *suptr = SEMU(i);
  		suptr->un_proc = NULL;
  	}
 -	SLIST_INIT(&semu_list);
 +	LIST_INIT(&semu_list);
  	mtx_init(&sem_mtx, "sem", NULL, MTX_DEF);
  	semexit_tag = EVENTHANDLER_REGISTER(process_exit, semexit_myhook, NULL,
  	    EVENTHANDLER_PRI_ANY);
 @@ -303,8 +303,7 @@
  	struct thread *td;
  {
  	int i;
 -	struct sem_undo *suptr;
 -	struct sem_undo **supptr;
 +	struct sem_undo *suptr, *sutmpptr;
  	int attempt;
  
  	SEMUNDO_LOCKASSERT(MA_OWNED);
 @@ -323,7 +322,7 @@
  		for (i = 0; i < seminfo.semmnu; i++) {
  			suptr = SEMU(i);
  			if (suptr->un_proc == NULL) {
 -				SLIST_INSERT_HEAD(&semu_list, suptr, un_next);
 +				LIST_INSERT_HEAD(&semu_list, suptr, un_list);
  				suptr->un_cnt = 0;
  				suptr->un_proc = td->td_proc;
  				return(suptr);
 @@ -339,12 +338,12 @@
  			/* All the structures are in use - try to free some */
  			int did_something = 0;
  
 -			SLIST_FOREACH_PREVPTR(suptr, supptr, &semu_list,
 -			    un_next) {
 +			LIST_FOREACH_SAFE(suptr, &semu_list, un_list,
 +			    sutmpptr) {
  				if (suptr->un_cnt == 0) {
  					suptr->un_proc = NULL;
  					did_something = 1;
 -					*supptr = SLIST_NEXT(suptr, un_next);
 +					LIST_REMOVE(suptr, un_list);
  				}
  			}
  
 @@ -385,7 +384,7 @@
  
  	suptr = *supptr;
  	if (suptr == NULL) {
 -		SLIST_FOREACH(suptr, &semu_list, un_next) {
 +		LIST_FOREACH(suptr, &semu_list, un_list) {
  			if (suptr->un_proc == p) {
  				*supptr = suptr;
  				break;
 @@ -446,7 +445,7 @@
  	struct sem_undo *suptr;
  
  	SEMUNDO_LOCKASSERT(MA_OWNED);
 -	SLIST_FOREACH(suptr, &semu_list, un_next) {
 +	LIST_FOREACH(suptr, &semu_list, un_list) {
  		struct undo *sunptr = &suptr->un_ent[0];
  		int i = 0;
  
 @@ -1148,22 +1147,22 @@
  	void *arg;
  	struct proc *p;
  {
 -	struct sem_undo *suptr;
 -	struct sem_undo **supptr;
 +	struct sem_undo *suptr, *sutmpptr;
  
  	/*
  	 * Go through the chain of undo vectors looking for one
  	 * associated with this process.
  	 */
  	SEMUNDO_LOCK();
 -	SLIST_FOREACH_PREVPTR(suptr, supptr, &semu_list, un_next) {
 +	LIST_FOREACH_SAFE(suptr, &semu_list, un_list, sutmpptr) {
  		if (suptr->un_proc == p)
  			break;
  	}
 -	SEMUNDO_UNLOCK();
  
 -	if (suptr == NULL)
 +	if (suptr == NULL) {
 +		SEMUNDO_UNLOCK();
  		return;
 +	}
  
  	DPRINTF(("proc @%08x has undo structure with %d entries\n", p,
  	    suptr->un_cnt));
 @@ -1184,7 +1183,6 @@
  			semaptr = &sema[semid];
  			sema_mtxp = &sema_mtx[semid];
  			mtx_lock(sema_mtxp);
 -			SEMUNDO_LOCK();
  			if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0)
  				panic("semexit - semid not allocated");
  			if (semnum >= semaptr->sem_nsems)
 @@ -1209,7 +1207,6 @@
  			wakeup(semaptr);
  			DPRINTF(("semexit:  back from wakeup\n"));
  			mtx_unlock(sema_mtxp);
 -			SEMUNDO_UNLOCK();
  		}
  	}
  
 @@ -1218,7 +1215,8 @@
  	 */
  	DPRINTF(("removing vector\n"));
  	suptr->un_proc = NULL;
 -	*supptr = SLIST_NEXT(suptr, un_next);
 +	LIST_REMOVE(suptr, un_list);
 +	SEMUNDO_UNLOCK();
  }
  
  static int
State-Changed-From-To: open->closed 
State-Changed-By: tjr 
State-Changed-When: Sun Nov 9 23:25:43 PST 2003 
State-Changed-Why:  
Fixed in -current, does not affect -stable; thanks for the report. 

http://www.freebsd.org/cgi/query-pr.cgi?pr=58984 
>Unformatted:
