From mm@mail.vx.sk  Tue May  4 10:42:52 2010
Return-Path: <mm@mail.vx.sk>
Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52])
	by hub.freebsd.org (Postfix) with ESMTP id 3A65C106566C;
	Tue,  4 May 2010 10:42:52 +0000 (UTC)
	(envelope-from mm@mail.vx.sk)
Received: from mail.vx.sk (core.vx.sk [188.40.32.143])
	by mx1.freebsd.org (Postfix) with ESMTP id 847788FC08;
	Tue,  4 May 2010 10:42:51 +0000 (UTC)
Received: from core.vx.sk (localhost [127.0.0.1])
	by mail.vx.sk (Postfix) with ESMTP id 616BE8F579;
	Tue,  4 May 2010 12:42:49 +0200 (CEST)
Received: from mail.vx.sk ([127.0.0.1])
	by core.vx.sk (mail.vx.sk [127.0.0.1]) (amavisd-new, port 10024)
	with LMTP id i-o+ksJnE8+v; Tue,  4 May 2010 12:42:47 +0200 (CEST)
Received: by mail.vx.sk (Postfix, from userid 1001)
	id 198038F567; Tue,  4 May 2010 12:42:47 +0200 (CEST)
Message-Id: <20100504104247.198038F567@mail.vx.sk>
Date: Tue,  4 May 2010 12:42:47 +0200 (CEST)
From: Martin Matuska <mm@FreeBSD.org>
Reply-To: Martin Matuska <mm@FreeBSD.org>
To: FreeBSD-gnats-submit@freebsd.org
Cc: pjd@FreeBSD.org
Subject: [zfs] [patch] fix deadlock during zfs receive (onnv 9299)
X-Send-Pr-Version: 3.113
X-GNATS-Notify:

>Number:         146296
>Category:       kern
>Synopsis:       [zfs] [patch] fix deadlock during zfs receive (onnv 9299)
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    mm
>State:          closed
>Quarter:        
>Keywords:       
>Date-Required:  
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Tue May 04 10:50:01 UTC 2010
>Closed-Date:    Tue May 11 07:28:44 UTC 2010
>Last-Modified:  Tue May 11 07:28:44 UTC 2010
>Originator:     Martin Matuska
>Release:        FreeBSD 8.0-STABLE amd64
>Organization:
>Environment:
>Description:
I have encountered a hanging zfs receive during receiving many incremental
streams.

This problem has been described in OpenSolaris mailing lists,
it matches my symptoms and affects our ZFS port.

OpenSolaris Bug IDs:
	6783818 Incremental stream receive panics system
	6826836 Deadlock possible in dmu_object_reclaim()

Mailing list discussion:
http://mail.opensolaris.org/pipermail/storage-discuss/2009-June/006171.html

Fixed in onnv revision: 9299:8809e849f63e
>How-To-Repeat:
>Fix:
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c	(revision 207608)
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c	(working copy)
@@ -128,15 +128,6 @@
 		return (0);
 	}
 
-	tx = dmu_tx_create(os);
-	dmu_tx_hold_bonus(tx, object);
-	err = dmu_tx_assign(tx, TXG_WAIT);
-	if (err) {
-		dmu_tx_abort(tx);
-		dnode_rele(dn, FTAG);
-		return (err);
-	}
-
 	nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
 
 	/*
@@ -144,16 +135,27 @@
 	 * be a new file instance.   We must clear out the previous file
 	 * contents before we can change this type of metadata in the dnode.
 	 */
-	if (dn->dn_nblkptr > nblkptr || dn->dn_datablksz != blocksize)
-		dmu_free_long_range(os, object, 0, DMU_OBJECT_END);
+	if (dn->dn_nblkptr > nblkptr || dn->dn_datablksz != blocksize) {
+		err = dmu_free_long_range(os, object, 0, DMU_OBJECT_END);
+		if (err)
+			goto out;
+	}
 
+	tx = dmu_tx_create(os);
+	dmu_tx_hold_bonus(tx, object);
+	err = dmu_tx_assign(tx, TXG_WAIT);
+	if (err) {
+		dmu_tx_abort(tx);
+		goto out;
+	}
+
 	dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, tx);
 
 	dmu_tx_commit(tx);
-
+out:
 	dnode_rele(dn, FTAG);
 
-	return (0);
+	return (err);
 }
 
 int
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c	(revision 207608)
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c	(working copy)
@@ -464,15 +464,15 @@
 	ASSERT(db->db_buf == NULL);
 
 	if (db->db_blkid == DB_BONUS_BLKID) {
-		int bonuslen = dn->dn_bonuslen;
+		int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen);
 
 		ASSERT3U(bonuslen, <=, db->db.db_size);
 		db->db.db_data = zio_buf_alloc(DN_MAX_BONUSLEN);
 		arc_space_consume(DN_MAX_BONUSLEN);
 		if (bonuslen < DN_MAX_BONUSLEN)
 			bzero(db->db.db_data, DN_MAX_BONUSLEN);
-		bcopy(DN_BONUS(dn->dn_phys), db->db.db_data,
-		    bonuslen);
+		if (bonuslen)
+			bcopy(DN_BONUS(dn->dn_phys), db->db.db_data, bonuslen);
 		dbuf_update_data(db);
 		db->db_state = DB_CACHED;
 		mutex_exit(&db->db_mtx);
>Release-Note:
>Audit-Trail:
Responsible-Changed-From-To: freebsd-bugs->freebsd-fs 
Responsible-Changed-By: linimon 
Responsible-Changed-When: Tue May 4 15:58:45 UTC 2010 
Responsible-Changed-Why:  
Over to maintainer(s). 

http://www.freebsd.org/cgi/query-pr.cgi?pr=146296 

From: dfilter@FreeBSD.ORG (dfilter service)
To: bug-followup@FreeBSD.org
Cc:  
Subject: Re: kern/146296: commit references a PR
Date: Tue,  4 May 2010 17:30:27 +0000 (UTC)

 Author: mm
 Date: Tue May  4 17:30:07 2010
 New Revision: 207624
 URL: http://svn.freebsd.org/changeset/base/207624
 
 Log:
   Fix deadlock during zfs receive.
   
   OpenSolaris onnv revision:	9299:8809e849f63e
   
   PR:		kern/146296
   Submitted by:	myself
   Approved by:	pjd, delphij (mentor)
   Obtained from:	OpenSolaris (Bug ID 6783818, 6826836)
   MFC after:	1 week
 
 Modified:
   head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
   head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c
 
 Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
 ==============================================================================
 --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c	Tue May  4 17:12:36 2010	(r207623)
 +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c	Tue May  4 17:30:07 2010	(r207624)
 @@ -464,15 +464,15 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t
  	ASSERT(db->db_buf == NULL);
  
  	if (db->db_blkid == DB_BONUS_BLKID) {
 -		int bonuslen = dn->dn_bonuslen;
 +		int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen);
  
  		ASSERT3U(bonuslen, <=, db->db.db_size);
  		db->db.db_data = zio_buf_alloc(DN_MAX_BONUSLEN);
  		arc_space_consume(DN_MAX_BONUSLEN);
  		if (bonuslen < DN_MAX_BONUSLEN)
  			bzero(db->db.db_data, DN_MAX_BONUSLEN);
 -		bcopy(DN_BONUS(dn->dn_phys), db->db.db_data,
 -		    bonuslen);
 +		if (bonuslen)
 +			bcopy(DN_BONUS(dn->dn_phys), db->db.db_data, bonuslen);
  		dbuf_update_data(db);
  		db->db_state = DB_CACHED;
  		mutex_exit(&db->db_mtx);
 
 Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c
 ==============================================================================
 --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c	Tue May  4 17:12:36 2010	(r207623)
 +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c	Tue May  4 17:30:07 2010	(r207624)
 @@ -128,15 +128,6 @@ dmu_object_reclaim(objset_t *os, uint64_
  		return (0);
  	}
  
 -	tx = dmu_tx_create(os);
 -	dmu_tx_hold_bonus(tx, object);
 -	err = dmu_tx_assign(tx, TXG_WAIT);
 -	if (err) {
 -		dmu_tx_abort(tx);
 -		dnode_rele(dn, FTAG);
 -		return (err);
 -	}
 -
  	nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
  
  	/*
 @@ -144,16 +135,27 @@ dmu_object_reclaim(objset_t *os, uint64_
  	 * be a new file instance.   We must clear out the previous file
  	 * contents before we can change this type of metadata in the dnode.
  	 */
 -	if (dn->dn_nblkptr > nblkptr || dn->dn_datablksz != blocksize)
 -		dmu_free_long_range(os, object, 0, DMU_OBJECT_END);
 +	if (dn->dn_nblkptr > nblkptr || dn->dn_datablksz != blocksize) {
 +		err = dmu_free_long_range(os, object, 0, DMU_OBJECT_END);
 +		if (err)
 +			goto out;
 +	}
 +
 +	tx = dmu_tx_create(os);
 +	dmu_tx_hold_bonus(tx, object);
 +	err = dmu_tx_assign(tx, TXG_WAIT);
 +	if (err) {
 +		dmu_tx_abort(tx);
 +		goto out;
 +	}
  
  	dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, tx);
  
  	dmu_tx_commit(tx);
 -
 +out:
  	dnode_rele(dn, FTAG);
  
 -	return (0);
 +	return (err);
  }
  
  int
 _______________________________________________
 svn-src-all@freebsd.org mailing list
 http://lists.freebsd.org/mailman/listinfo/svn-src-all
 To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
 
State-Changed-From-To: open->patched 
State-Changed-By: mm 
State-Changed-When: Tue May 4 21:08:22 UTC 2010 
State-Changed-Why:  
Scheduled for MFC 

http://www.freebsd.org/cgi/query-pr.cgi?pr=146296 
Responsible-Changed-From-To: freebsd-fs->mm 
Responsible-Changed-By: mm 
Responsible-Changed-When: Tue May 4 21:09:08 UTC 2010 
Responsible-Changed-Why:  
Take PR. 

http://www.freebsd.org/cgi/query-pr.cgi?pr=146296 

From: dfilter@FreeBSD.ORG (dfilter service)
To: bug-followup@FreeBSD.org
Cc:  
Subject: Re: kern/146296: commit references a PR
Date: Tue, 11 May 2010 07:02:39 +0000 (UTC)

 Author: mm
 Date: Tue May 11 07:02:29 2010
 New Revision: 207906
 URL: http://svn.freebsd.org/changeset/base/207906
 
 Log:
   MFC r207624:
   
   Fix deadlock during zfs receive.
   
   OpenSolaris onnv revision:	9299:8809e849f63e
   
   PR:		kern/146296
   Approved by:	pjd, delphij (mentor)
   Obtained from:	OpenSolaris (Bug ID 6783818, 6826836)
 
 Modified:
   stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
   stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c
 Directory Properties:
   stable/8/sys/   (props changed)
   stable/8/sys/amd64/include/xen/   (props changed)
   stable/8/sys/cddl/contrib/opensolaris/   (props changed)
   stable/8/sys/contrib/dev/acpica/   (props changed)
   stable/8/sys/contrib/pf/   (props changed)
   stable/8/sys/dev/xen/xenpci/   (props changed)
   stable/8/sys/geom/sched/   (props changed)
 
 Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
 ==============================================================================
 --- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c	Tue May 11 01:29:18 2010	(r207905)
 +++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c	Tue May 11 07:02:29 2010	(r207906)
 @@ -464,15 +464,15 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t
  	ASSERT(db->db_buf == NULL);
  
  	if (db->db_blkid == DB_BONUS_BLKID) {
 -		int bonuslen = dn->dn_bonuslen;
 +		int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen);
  
  		ASSERT3U(bonuslen, <=, db->db.db_size);
  		db->db.db_data = zio_buf_alloc(DN_MAX_BONUSLEN);
  		arc_space_consume(DN_MAX_BONUSLEN);
  		if (bonuslen < DN_MAX_BONUSLEN)
  			bzero(db->db.db_data, DN_MAX_BONUSLEN);
 -		bcopy(DN_BONUS(dn->dn_phys), db->db.db_data,
 -		    bonuslen);
 +		if (bonuslen)
 +			bcopy(DN_BONUS(dn->dn_phys), db->db.db_data, bonuslen);
  		dbuf_update_data(db);
  		db->db_state = DB_CACHED;
  		mutex_exit(&db->db_mtx);
 
 Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c
 ==============================================================================
 --- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c	Tue May 11 01:29:18 2010	(r207905)
 +++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c	Tue May 11 07:02:29 2010	(r207906)
 @@ -128,15 +128,6 @@ dmu_object_reclaim(objset_t *os, uint64_
  		return (0);
  	}
  
 -	tx = dmu_tx_create(os);
 -	dmu_tx_hold_bonus(tx, object);
 -	err = dmu_tx_assign(tx, TXG_WAIT);
 -	if (err) {
 -		dmu_tx_abort(tx);
 -		dnode_rele(dn, FTAG);
 -		return (err);
 -	}
 -
  	nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
  
  	/*
 @@ -144,16 +135,27 @@ dmu_object_reclaim(objset_t *os, uint64_
  	 * be a new file instance.   We must clear out the previous file
  	 * contents before we can change this type of metadata in the dnode.
  	 */
 -	if (dn->dn_nblkptr > nblkptr || dn->dn_datablksz != blocksize)
 -		dmu_free_long_range(os, object, 0, DMU_OBJECT_END);
 +	if (dn->dn_nblkptr > nblkptr || dn->dn_datablksz != blocksize) {
 +		err = dmu_free_long_range(os, object, 0, DMU_OBJECT_END);
 +		if (err)
 +			goto out;
 +	}
 +
 +	tx = dmu_tx_create(os);
 +	dmu_tx_hold_bonus(tx, object);
 +	err = dmu_tx_assign(tx, TXG_WAIT);
 +	if (err) {
 +		dmu_tx_abort(tx);
 +		goto out;
 +	}
  
  	dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, tx);
  
  	dmu_tx_commit(tx);
 -
 +out:
  	dnode_rele(dn, FTAG);
  
 -	return (0);
 +	return (err);
  }
  
  int
 _______________________________________________
 svn-src-all@freebsd.org mailing list
 http://lists.freebsd.org/mailman/listinfo/svn-src-all
 To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
 

From: dfilter@FreeBSD.ORG (dfilter service)
To: bug-followup@FreeBSD.org
Cc:  
Subject: Re: kern/146296: commit references a PR
Date: Tue, 11 May 2010 07:08:03 +0000 (UTC)

 Author: mm
 Date: Tue May 11 07:07:44 2010
 New Revision: 207907
 URL: http://svn.freebsd.org/changeset/base/207907
 
 Log:
   MFC r207624:
   
   Fix deadlock during zfs receive.
   
   OpenSolaris onnv revision:	9299:8809e849f63e
   
   PR:		kern/146296
   Approved by:	pjd, delphij (mentor)
   Obtained from:	OpenSolaris (Bug ID 6783818, 6826836)
 
 Modified:
   stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
   stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c
 Directory Properties:
   stable/7/sys/   (props changed)
   stable/7/sys/cddl/contrib/opensolaris/   (props changed)
   stable/7/sys/contrib/dev/acpica/   (props changed)
   stable/7/sys/contrib/pf/   (props changed)
 
 Modified: stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
 ==============================================================================
 --- stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c	Tue May 11 07:02:29 2010	(r207906)
 +++ stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c	Tue May 11 07:07:44 2010	(r207907)
 @@ -464,15 +464,15 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t
  	ASSERT(db->db_buf == NULL);
  
  	if (db->db_blkid == DB_BONUS_BLKID) {
 -		int bonuslen = dn->dn_bonuslen;
 +		int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen);
  
  		ASSERT3U(bonuslen, <=, db->db.db_size);
  		db->db.db_data = zio_buf_alloc(DN_MAX_BONUSLEN);
  		arc_space_consume(DN_MAX_BONUSLEN);
  		if (bonuslen < DN_MAX_BONUSLEN)
  			bzero(db->db.db_data, DN_MAX_BONUSLEN);
 -		bcopy(DN_BONUS(dn->dn_phys), db->db.db_data,
 -		    bonuslen);
 +		if (bonuslen)
 +			bcopy(DN_BONUS(dn->dn_phys), db->db.db_data, bonuslen);
  		dbuf_update_data(db);
  		db->db_state = DB_CACHED;
  		mutex_exit(&db->db_mtx);
 
 Modified: stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c
 ==============================================================================
 --- stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c	Tue May 11 07:02:29 2010	(r207906)
 +++ stable/7/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c	Tue May 11 07:07:44 2010	(r207907)
 @@ -128,15 +128,6 @@ dmu_object_reclaim(objset_t *os, uint64_
  		return (0);
  	}
  
 -	tx = dmu_tx_create(os);
 -	dmu_tx_hold_bonus(tx, object);
 -	err = dmu_tx_assign(tx, TXG_WAIT);
 -	if (err) {
 -		dmu_tx_abort(tx);
 -		dnode_rele(dn, FTAG);
 -		return (err);
 -	}
 -
  	nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
  
  	/*
 @@ -144,16 +135,27 @@ dmu_object_reclaim(objset_t *os, uint64_
  	 * be a new file instance.   We must clear out the previous file
  	 * contents before we can change this type of metadata in the dnode.
  	 */
 -	if (dn->dn_nblkptr > nblkptr || dn->dn_datablksz != blocksize)
 -		dmu_free_long_range(os, object, 0, DMU_OBJECT_END);
 +	if (dn->dn_nblkptr > nblkptr || dn->dn_datablksz != blocksize) {
 +		err = dmu_free_long_range(os, object, 0, DMU_OBJECT_END);
 +		if (err)
 +			goto out;
 +	}
 +
 +	tx = dmu_tx_create(os);
 +	dmu_tx_hold_bonus(tx, object);
 +	err = dmu_tx_assign(tx, TXG_WAIT);
 +	if (err) {
 +		dmu_tx_abort(tx);
 +		goto out;
 +	}
  
  	dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, tx);
  
  	dmu_tx_commit(tx);
 -
 +out:
  	dnode_rele(dn, FTAG);
  
 -	return (0);
 +	return (err);
  }
  
  int
 _______________________________________________
 svn-src-all@freebsd.org mailing list
 http://lists.freebsd.org/mailman/listinfo/svn-src-all
 To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
 
State-Changed-From-To: patched->closed 
State-Changed-By: mm 
State-Changed-When: Tue May 11 07:28:43 UTC 2010 
State-Changed-Why:  
Committed. Thanks! 

http://www.freebsd.org/cgi/query-pr.cgi?pr=146296 
>Unformatted:
