From nobody@FreeBSD.org  Wed Jul 27 15:04:54 2011
Return-Path: <nobody@FreeBSD.org>
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id F0A9F106564A
	for <freebsd-gnats-submit@FreeBSD.org>; Wed, 27 Jul 2011 15:04:54 +0000 (UTC)
	(envelope-from nobody@FreeBSD.org)
Received: from red.freebsd.org (red.freebsd.org [IPv6:2001:4f8:fff6::22])
	by mx1.freebsd.org (Postfix) with ESMTP id DFC108FC19
	for <freebsd-gnats-submit@FreeBSD.org>; Wed, 27 Jul 2011 15:04:54 +0000 (UTC)
Received: from red.freebsd.org (localhost [127.0.0.1])
	by red.freebsd.org (8.14.4/8.14.4) with ESMTP id p6RF4sQA083508
	for <freebsd-gnats-submit@FreeBSD.org>; Wed, 27 Jul 2011 15:04:54 GMT
	(envelope-from nobody@red.freebsd.org)
Received: (from nobody@localhost)
	by red.freebsd.org (8.14.4/8.14.4/Submit) id p6RF4sf9083507;
	Wed, 27 Jul 2011 15:04:54 GMT
	(envelope-from nobody)
Message-Id: <201107271504.p6RF4sf9083507@red.freebsd.org>
Date: Wed, 27 Jul 2011 15:04:54 GMT
From: Pedro Giffuni <giffunip@tutopia.com>
To: freebsd-gnats-submit@FreeBSD.org
Subject: fs/ext2fs: finish reallocblk implementation
X-Send-Pr-Version: www-3.1
X-GNATS-Notify:

>Number:         159233
>Category:       kern
>Synopsis:       [ext2fs] [patch] fs/ext2fs: finish reallocblk implementation
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    pfg
>State:          closed
>Quarter:        
>Keywords:       
>Date-Required:  
>Class:          change-request
>Submitter-Id:   current-users
>Arrival-Date:   Wed Jul 27 15:10:11 UTC 2011
>Closed-Date:    Thu Jan 05 01:53:02 UTC 2012
>Last-Modified:  Thu Jan 05 01:53:02 UTC 2012
>Originator:     Pedro Giffuni
>Release:        9.0-CURRENT
>Organization:
>Environment:
[pedro@mogwai] /sys/fs# uname -a  
FreeBSD mogwai.giffuni.net 9.0-CURRENT FreeBSD 9.0-CURRENT #6: Sat Apr 30 01:37:57 PDT 2011     root@build9x64.pcbsd.org:/usr/obj/pcbsd-build90/fbsd-source/9.0/sys/PCBSD  amd64

>Description:
Block reallocation is a feature planned but not finished by the initial ext2fs implementation in LITES:
http://www.cs.utah.edu/flux/lites/html/ext2fs.html

The feature has been standard for a while in UFS as a means to reduce fragmentation, therefore maintaining consistent performance with filesystem aging. The BSD-licensed ext2fs is similar enough to UFS to make porting this feature possible.

This is also very similar to what ext4 calls "delayed allocation".

In his 2010 GSoC, Zheng Liu did the port and found it to produce better performance improvements that the preallocation approach.

All the credit for the port goes to Zheng Liu: I only did some style/english cleanups and helped it get ready to apply cleanly in 9.0-current.
>How-To-Repeat:

>Fix:


Patch attached with submission follows:

diff -ru ext2fs.orig/ext2_alloc.c ext2fs/ext2_alloc.c
--- ext2fs.orig/ext2_alloc.c	2011-04-29 10:15:45.000000000 +0000
+++ ext2fs/ext2_alloc.c	2011-05-09 10:37:23.000000000 +0000
@@ -42,6 +42,7 @@
 #include <sys/vnode.h>
 #include <sys/stat.h>
 #include <sys/mount.h>
+#include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/buf.h>
 
@@ -52,6 +53,7 @@
 #include <fs/ext2fs/ext2_extern.h>
 
 static daddr_t	ext2_alloccg(struct inode *, int, daddr_t, int);
+static daddr_t	ext2_clusteralloc(struct inode *, int, daddr_t, int);
 static u_long	ext2_dirpref(struct inode *);
 static void	ext2_fserr(struct m_ext2fs *, uid_t, char *);
 static u_long	ext2_hashalloc(struct inode *, int, long, int,
@@ -59,9 +61,6 @@
 						int));
 static daddr_t	ext2_nodealloccg(struct inode *, int, daddr_t, int);
 static daddr_t  ext2_mapsearch(struct m_ext2fs *, char *, daddr_t);
-#ifdef FANCY_REALLOC
-static int	ext2_reallocblks(struct vop_reallocblks_args *);
-#endif
 
 /*
  * Allocate a block in the file system.
@@ -113,20 +112,20 @@
 	if (bpref >= fs->e2fs->e2fs_bcount)
 		bpref = 0;
 	if (bpref == 0)
-                cg = ino_to_cg(fs, ip->i_number);
-        else
-                cg = dtog(fs, bpref);
-        bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
-                                                 ext2_alloccg);
-        if (bno > 0) {
+		cg = ino_to_cg(fs, ip->i_number);
+	else
+		cg = dtog(fs, bpref);
+	bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
+				      ext2_alloccg);
+	if (bno > 0) {
 		/* set next_alloc fields as done in block_getblk */
 		ip->i_next_alloc_block = lbn;
 		ip->i_next_alloc_goal = bno;
 
-                ip->i_blocks += btodb(fs->e2fs_bsize);
-                ip->i_flag |= IN_CHANGE | IN_UPDATE;
-                *bnp = bno;
-                return (0);
+		ip->i_blocks += btodb(fs->e2fs_bsize);
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		*bnp = bno;
+		return (0);
         }
 nospace:
 	EXT2_UNLOCK(ump);
@@ -150,7 +149,6 @@
  * the previous block allocation will be used.
  */
 
-#ifdef FANCY_REALLOC
 SYSCTL_NODE(_vfs, OID_AUTO, ext2fs, CTLFLAG_RW, 0, "EXT2FS filesystem");
 
 static int doasyncfree = 1;
@@ -159,7 +157,6 @@
 
 static int doreallocblks = 1;
 SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, "");
-#endif
 
 int
 ext2_reallocblks(ap)
@@ -168,11 +165,6 @@
 		struct cluster_save *a_buflist;
 	} */ *ap;
 {
-#ifndef FANCY_REALLOC
-/* printf("ext2_reallocblks not implemented\n"); */
-return ENOSPC;
-#else
-
 	struct m_ext2fs *fs;
 	struct inode *ip;
 	struct vnode *vp;
@@ -184,14 +176,17 @@
 	int32_t start_lbn, end_lbn, soff, newblk, blkno;
 	int i, len, start_lvl, end_lvl, pref, ssize;
 
+	if (doreallocblks == 0)
+		  return (ENOSPC);
+
 	vp = ap->a_vp;
 	ip = VTOI(vp);
 	fs = ip->i_e2fs;
 	ump = ip->i_ump;
-#ifdef UNKLAR
-	if (fs->fs_contigsumsize <= 0)
+
+	if (fs->e2fs_contigsumsize <= 0)
 		return (ENOSPC);
-#endif
+
 	buflist = ap->a_buflist;
 	len = buflist->bs_nchildren;
 	start_lbn = buflist->bs_children[0]->b_lblkno;
@@ -228,11 +223,6 @@
 		soff = idp->in_off;
 	}
 	/*
-	 * Find the preferred location for the cluster.
-	 */
-	EXT2_LOCK(ump);
-	pref = ext2_blkpref(ip, start_lbn, soff, sbap, 0);
-	/*
 	 * If the block range spans two block maps, get the second map.
 	 */
 	if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
@@ -243,13 +233,16 @@
 			panic("ext2_reallocblk: start == end");
 #endif
 		ssize = len - (idp->in_off + 1);
-		if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp)){
-			EXT2_UNLOCK(ump);	
+		if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp))
 			goto fail;
-		}
 		ebap = (int32_t *)ebp->b_data;
 	}
 	/*
+	 * Find the preferred location for the cluster.
+	 */
+	EXT2_LOCK(ump);
+	pref = ext2_blkpref(ip, start_lbn, soff, sbap, 0);
+	/*
 	 * Search the block map looking for an allocation of the desired size.
 	 */
 	if ((newblk = (int32_t)ext2_hashalloc(ip, dtog(fs, pref), pref,
@@ -264,15 +257,23 @@
 	 * block pointers in the inode and indirect blocks associated
 	 * with the file.
 	 */
+#ifdef DEBUG
+	printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number,
+	    (intmax_t)start_lbn, (intmax_t)end_lbn);
+#endif /* DEBUG */
 	blkno = newblk;
 	for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
-		if (i == ssize)
+		if (i == ssize) {
 			bap = ebap;
 			soff = -i;
+		}
 #ifdef DIAGNOSTIC
 		if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap))
 			panic("ext2_reallocblks: alloc mismatch");
 #endif
+#ifdef DEBUG
+	printf(" %d,", *bap);
+#endif /* DEBUG */
 		*bap++ = blkno;
 	}
 	/*
@@ -308,11 +309,20 @@
 	/*
 	 * Last, free the old blocks and assign the new blocks to the buffers.
 	 */
+#ifdef DEBUG
+	printf("\n\tnew:");
+#endif /* DEBUG */
 	for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
 		ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno),
 		    fs->e2fs_bsize);
 		buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
-	}
+#ifdef DEBUG
+		printf(" %d,", blkno);
+#endif /* DEBUG */
+	}
+#ifdef DEBUG
+	printf("\n");
+#endif /* DEBUG */
 	return (0);
 
 fail:
@@ -321,8 +331,6 @@
 	if (sbap != &ip->i_db[0])
 		brelse(sbp);
 	return (ENOSPC);
-
-#endif /* FANCY_REALLOC */
 }
 
 /*
@@ -747,6 +755,7 @@
 #endif
 	setbit(bbp, bno);
 	EXT2_LOCK(ump);
+	ext2_clusteracct(fs, bbp, cg, bno, -1);
 	fs->e2fs->e2fs_fbcount--;
 	fs->e2fs_gd[cg].ext2bgd_nbfree--;
 	fs->e2fs_fmod = 1;
@@ -756,6 +765,116 @@
 }
 
 /*
+ * Determine whether a cluster can be allocated.
+ */
+static daddr_t
+ext2_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len)
+{
+	struct m_ext2fs *fs;
+	struct ext2mount *ump;
+	struct buf *bp;
+	char *bbp;
+	int bit, error, got, i, loc, run;
+	int32_t *lp;
+	daddr_t bno;
+
+	fs = ip->i_e2fs;
+	ump = ip->i_ump;
+
+	if (fs->e2fs_maxcluster[cg] < len)
+		return (0);
+
+	EXT2_UNLOCK(ump);
+	error = bread(ip->i_devvp,
+	    fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap),
+	    (int)fs->e2fs_bsize, NOCRED, &bp);
+	if (error)
+		goto fail_lock;
+
+	bbp = (char *)bp->b_data;
+	bp->b_xflags |= BX_BKGRDWRITE;
+
+	EXT2_LOCK(ump);
+	/*
+	 * Check to see if a cluster of the needed size (or bigger) is
+	 * available in this cylinder group.
+	 */
+	lp = &fs->e2fs_clustersum[cg].cs_sum[len];
+	for (i = len; i <= fs->e2fs_contigsumsize; i++)
+		if (*lp++ > 0)
+			break;
+	if (i > fs->e2fs_contigsumsize) {
+		/*
+		 * Update the cluster summary information to reflect
+		 * the true maximum sized cluster so that future cluster
+		 * allocation requests can avoid reading the bitmap only
+		 * to find no cluster.
+		 */
+		lp = &fs->e2fs_clustersum[cg].cs_sum[len - 1];
+			for (i = len - 1; i > 0; i--)
+				if (*lp-- > 0)
+					break;
+		fs->e2fs_maxcluster[cg] = i;
+		goto fail;
+	}
+	EXT2_UNLOCK(ump);
+
+	/* Search the bitmap to find a bit enough cluster like in FFS. */
+	if (dtog(fs, bpref) != cg)
+		bpref = 0;
+	if (bpref != 0)
+		bpref = dtogd(fs, bpref);
+	loc = bpref / NBBY;
+	bit = 1 << (bpref % NBBY);
+	for (run = 0, got = bpref; got < fs->e2fs->e2fs_fpg; got++) {
+		if ((bbp[loc] & bit) != 0)
+			run = 0;
+		else {
+			run++;
+			if (run == len)
+				break;
+		}
+		if ((got & (NBBY - 1)) != (NBBY - 1))
+			bit <<= 1;
+		else {
+			loc++;
+			bit = 1;
+		}
+	}
+
+	if (got >= fs->e2fs->e2fs_fpg)
+		goto fail_lock;
+
+	/* Allocate the cluster that we found. */
+	for (i = 1; i < len; i++)
+		if (!isclr(bbp, got - run + i))
+			panic("ext2_clusteralloc: map mismatch");
+
+	bno = got - run + 1;
+	if (bno >= fs->e2fs->e2fs_fpg)
+		panic("ext2_clusteralloc: allocated out of group");
+
+	EXT2_LOCK(ump);
+	for (i = 0; i < len; i += fs->e2fs_fpb) {
+		setbit(bbp, bno + i);
+		ext2_clusteracct(fs, bbp, cg, bno + i, -1);
+		fs->e2fs->e2fs_fbcount--;
+		fs->e2fs_gd[cg].ext2bgd_nbfree--;
+	}
+	fs->e2fs_fmod = 1;
+	EXT2_UNLOCK(ump);
+
+	bdwrite(bp);
+	return (cg * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno);
+
+fail_lock:
+	EXT2_LOCK(ump);
+fail:
+	brelse(bp);
+	return (0);
+}
+
+/*
  * Determine whether an inode can be allocated.
  *
  * Check to see if an inode is available, and if it is,
@@ -877,6 +996,7 @@
         }
         clrbit(bbp, bno);
 	EXT2_LOCK(ump);
+	ext2_clusteracct(fs, bbp, cg, bno, 1);
         fs->e2fs->e2fs_fbcount++;
         fs->e2fs_gd[cg].ext2bgd_nbfree++;
         fs->e2fs_fmod = 1;
diff -ru ext2fs.orig/ext2_extern.h ext2fs/ext2_extern.h
--- ext2fs.orig/ext2_extern.h	2011-04-29 10:15:45.000000000 +0000
+++ ext2fs/ext2_extern.h	2011-05-09 10:35:57.000000000 +0000
@@ -55,12 +55,13 @@
 int32_t	ext2_blkpref(struct inode *, int32_t, int, int32_t *, int32_t);
 int	ext2_bmap(struct vop_bmap_args *);
 int	ext2_bmaparray(struct vnode *, int32_t, int32_t *, int *, int *);
+void	ext2_clusteracct(struct m_ext2fs *, char *, int, daddr_t, int);
 void	ext2_dirbad(struct inode *ip, doff_t offset, char *how);
 void	ext2_ei2i(struct ext2fs_dinode *, struct inode *);
 int	ext2_getlbns(struct vnode *, int32_t, struct indir *, int *);
 void	ext2_i2ei(struct inode *, struct ext2fs_dinode *);
+int     ext2_reallocblks(struct vop_reallocblks_args *);
 void	ext2_itimes(struct vnode *vp);
-int	ext2_reallocblks(struct vop_reallocblks_args *);
 int	ext2_reclaim(struct vop_reclaim_args *);
 void	ext2_setblock(struct m_ext2fs *, u_char *, int32_t);
 int	ext2_truncate(struct vnode *, off_t, int, struct ucred *, struct thread *);
diff -ru ext2fs.orig/ext2_subr.c ext2fs/ext2_subr.c
--- ext2fs.orig/ext2_subr.c	2011-04-29 10:15:45.000000000 +0000
+++ ext2fs/ext2_subr.c	2011-05-08 14:21:26.000000000 +0000
@@ -120,3 +120,107 @@
 	}
 }
 #endif /* KDB */
+
+/*
+ * Update the cluster map because of an allocation of free like ffs.
+ *
+ * Cnt == 1 means free; cnt == -1 means allocating.
+ */
+void
+ext2_clusteracct(struct m_ext2fs *fs, char *bbp, int cg, daddr_t bno, int cnt)
+{
+	int32_t *sump = fs->e2fs_clustersum[cg].cs_sum;
+	int32_t *lp;
+	int back, bit, end, forw, i, loc, start;
+
+	/* Initialize the cluster summary array. */
+	if (fs->e2fs_clustersum[cg].cs_init == 0) {
+		int run = 0;
+		bit = 1;
+		loc = 0;
+
+		for (i = 0; i < fs->e2fs->e2fs_fpg; i++) {
+			if ((bbp[loc] & bit) == 0)
+				run++;
+			else if (run != 0) {
+				if (run > fs->e2fs_contigsumsize)
+					run = fs->e2fs_contigsumsize;
+				sump[run]++;
+				run = 0;
+			}
+			if ((i & (NBBY - 1)) != (NBBY - 1))
+				bit <<= 1;
+			else {
+				loc++;
+				bit = 1;
+			}
+		}
+		if (run != 0) {
+			if (run > fs->e2fs_contigsumsize)
+				run = fs->e2fs_contigsumsize;
+			sump[run]++;
+		}
+		fs->e2fs_clustersum[cg].cs_init = 1;
+	}
+
+	if (fs->e2fs_contigsumsize <= 0)
+		return;
+
+	/* Find the size of the cluster going forward. */
+	start = bno + 1;
+	end = start + fs->e2fs_contigsumsize;
+	if (end > fs->e2fs->e2fs_fpg)
+		end = fs->e2fs->e2fs_fpg;
+	loc = start / NBBY;
+	bit = 1 << (start % NBBY);
+	for (i = start; i < end; i++) {
+		if ((bbp[loc] & bit) != 0)
+			break;
+		if ((i & (NBBY - 1)) != (NBBY - 1))
+			bit <<= 1;
+		else {
+			loc++;
+			bit = 1;
+		}
+	}
+	forw = i - start;
+
+	/* Find the size of the cluster going backward. */
+	start = bno - 1;
+	end = start - fs->e2fs_contigsumsize;
+	if (end < 0)
+		end = -1;
+	loc = start / NBBY;
+	bit = 1 << (start % NBBY);
+	for (i = start; i > end; i--) {
+		if ((bbp[loc] & bit) != 0)
+			break;
+		if ((i & (NBBY - 1)) != 0)
+			bit >>= 1;
+		else {
+			loc--;
+			bit = 1 << (NBBY - 1);
+		}
+	}
+	back = start - i;
+
+	/*
+	 * Account for old cluster and the possibly new forward and
+	 * back clusters.
+	 */
+	i = back + forw + 1;
+	if (i > fs->e2fs_contigsumsize)
+		i = fs->e2fs_contigsumsize;
+	sump[i] += cnt;
+	if (back > 0)
+		sump[back] -= cnt;
+	if (forw > 0)
+		sump[forw] -= cnt;
+
+	/* Update cluster summary information. */
+	lp = &sump[fs->e2fs_contigsumsize];
+	for (i = fs->e2fs_contigsumsize; i > 0; i--)
+		if (*lp-- > 0)
+			break;
+	fs->e2fs_maxcluster[cg] = i;
+}
diff -ru ext2fs.orig/ext2_vfsops.c ext2fs/ext2_vfsops.c
--- ext2fs.orig/ext2_vfsops.c	2011-04-29 10:15:45.000000000 +0000
+++ ext2fs/ext2_vfsops.c	2011-05-08 14:21:26.000000000 +0000
@@ -405,7 +405,7 @@
  * Things to do to update the mount:
  *	1) invalidate all cached meta-data.
  *	2) re-read superblock from disk.
- *	3) re-read summary information from disk.
+ *	3) invalidate all cluster summary information.
  *	4) invalidate all inactive vnodes.
  *	5) invalidate all cached file data.
  *	6) re-read inode data for all active vnodes.
@@ -419,7 +419,9 @@
 	struct buf *bp;
 	struct ext2fs *es;
 	struct m_ext2fs *fs;
-	int error;
+	struct csum *sump;
+	int error, i;
+	int32_t *lp;
 
 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
 		return (EINVAL);
@@ -456,6 +458,19 @@
 #endif
 	brelse(bp);
 
+	/*
+	 * Step 3: invalidate all cluster summary information.
+	 */
+	if (fs->e2fs_contigsumsize > 0) {
+		lp = fs->e2fs_maxcluster;
+		sump = fs->e2fs_clustersum;
+		for (i = 0; i < fs->e2fs_gcount; i++, sump++) {
+			*lp++ = fs->e2fs_contigsumsize;
+			sump->cs_init = 0;
+			bzero(sump->cs_sum, fs->e2fs_contigsumsize + 1);
+		}
+	}
+
 loop:
 	MNT_ILOCK(mp);
 	MNT_VNODE_FOREACH(vp, mp, mvp) {
@@ -511,8 +526,11 @@
 	struct cdev *dev = devvp->v_rdev;
 	struct g_consumer *cp;
 	struct bufobj *bo;
+	struct csum *sump;
 	int error;
 	int ronly;
+	int i, size;
+	int32_t *lp;
 
 	ronly = vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0);
 	/* XXX: use VOP_ACESS to check FS perms */
@@ -582,6 +600,32 @@
 	if ((error = compute_sb_data(devvp, ump->um_e2fs->e2fs, ump->um_e2fs)))
 		goto out;
 
+	/*
+	 * We calculate the max contiguous blks and size of cluster summary
+	 * array. In FFS this is done by newfs. But the superblock in ext2fs
+	 * doesn't have these variables so we just can calculate them here.
+	 */
+	ump->um_e2fs->e2fs_maxcontig = MAX(1, MAXPHYS / ump->um_e2fs->e2fs_bsize);
+	if (ump->um_e2fs->e2fs_maxcontig > 0)
+		ump->um_e2fs->e2fs_contigsumsize =
+		    MIN(ump->um_e2fs->e2fs_maxcontig, EXT2_MAXCONTIG);
+	else
+		ump->um_e2fs->e2fs_contigsumsize = 0;
+	if (ump->um_e2fs->e2fs_contigsumsize > 0) {
+		size = ump->um_e2fs->e2fs_gcount * sizeof(int32_t);
+		ump->um_e2fs->e2fs_maxcluster = malloc(size, M_EXT2MNT, M_WAITOK);
+		size = ump->um_e2fs->e2fs_gcount * sizeof(struct csum);
+		ump->um_e2fs->e2fs_clustersum = malloc(size, M_EXT2MNT, M_WAITOK);
+		lp = ump->um_e2fs->e2fs_maxcluster;
+		sump = ump->um_e2fs->e2fs_clustersum;
+		for (i = 0; i < ump->um_e2fs->e2fs_gcount; i++, sump++) {
+			*lp++ = ump->um_e2fs->e2fs_contigsumsize;
+			sump->cs_init = 0;
+			sump->cs_sum = malloc((ump->um_e2fs->e2fs_contigsumsize + 1) *
+			    sizeof(int32_t), M_EXT2MNT, M_WAITOK | M_ZERO);
+		}
+	}
+
 	brelse(bp);
 	bp = NULL;
 	fs = ump->um_e2fs;
@@ -656,7 +700,8 @@
 {
 	struct ext2mount *ump;
 	struct m_ext2fs *fs;
-	int error, flags, ronly;
+	struct csum *sump;
+	int error, flags, i, ronly;
 
 	flags = 0;
 	if (mntflags & MNT_FORCE) {
@@ -681,6 +726,11 @@
 	g_topology_unlock();
 	PICKUP_GIANT();
 	vrele(ump->um_devvp);
+	sump = fs->e2fs_clustersum;
+	for (i = 0; i < fs->e2fs_gcount; i++, sump++)
+		free(sump->cs_sum, M_EXT2MNT);
+	free(fs->e2fs_clustersum, M_EXT2MNT);
+	free(fs->e2fs_maxcluster, M_EXT2MNT);
 	free(fs->e2fs_gd, M_EXT2MNT);
 	free(fs->e2fs_contigdirs, M_EXT2MNT);
 	free(fs->e2fs, M_EXT2MNT);
diff -ru ext2fs.orig/ext2fs.h ext2fs/ext2fs.h
--- ext2fs.orig/ext2fs.h	2011-04-29 10:15:45.000000000 +0000
+++ ext2fs/ext2fs.h	2011-05-08 14:29:05.000000000 +0000
@@ -45,6 +45,16 @@
 #define EXT2_LINK_MAX		32000
 
 /*
+ * A summary of contiguous blocks of various sizes is maintained
+ * in each cylinder group. Normally this is set by the initial
+ * value of fs_maxcontig.
+ *
+ * XXX:FS_MAXCONTIG is set to 16 to conserve space. Here we set
+ * EXT2_MAXCONTIG to 32 for better performance.
+ */
+#define EXT2_MAXCONTIG		32
+
+/*
  * Constants relative to the data blocks
  */
 #define	EXT2_NDIR_BLOCKS		12
@@ -151,6 +161,10 @@
 	char     e2fs_wasvalid;   /* valid at mount time */
 	off_t    e2fs_maxfilesize;
 	struct   ext2_gd *e2fs_gd; /* Group Descriptors */
+	int32_t  e2fs_maxcontig;        /* max number of contiguous blks */
+	int32_t  e2fs_contigsumsize;    /* size of cluster summary array */
+	int32_t *e2fs_maxcluster;       /* max cluster in each cyl group */
+	struct   csum *e2fs_clustersum; /* cluster summary in each cyl group */
 };
 
 /*
@@ -253,6 +267,13 @@
 	uint32_t reserved2[3];
 };
 
+/* cluster summary information */
+
+struct csum {
+	int8_t   cs_init; /* cluster summary has been initialized */
+	int32_t *cs_sum;  /* cluster summary array */
+};
+
 /* EXT2FS metadatas are stored in little-endian byte order. These macros
  * helps reading these metadatas
  */


>Release-Note:
>Audit-Trail:
Responsible-Changed-From-To: freebsd-bugs->freebsd-fs 
Responsible-Changed-By: linimon 
Responsible-Changed-When: Wed Jul 27 23:50:21 UTC 2011 
Responsible-Changed-Why:  
Over to maintainer(s). 

http://www.freebsd.org/cgi/query-pr.cgi?pr=159233 
Responsible-Changed-From-To: freebsd-fs->pfg 
Responsible-Changed-By: pfg 
Responsible-Changed-When: Thu Dec 15 20:22:45 UTC 2011 
Responsible-Changed-Why:  
Assign to myself. 

http://www.freebsd.org/cgi/query-pr.cgi?pr=159233 

From: dfilter@FreeBSD.ORG (dfilter service)
To: bug-followup@FreeBSD.org
Cc:  
Subject: Re: kern/159233: commit references a PR
Date: Thu, 15 Dec 2011 20:31:28 +0000 (UTC)

 Author: pfg
 Date: Thu Dec 15 20:31:18 2011
 New Revision: 228539
 URL: http://svn.freebsd.org/changeset/base/228539
 
 Log:
   Bring in reallocblk to ext2fs.
   
   The feature has been standard for a while in UFS as a means to reduce
   fragmentation, therefore maintaining consistent performance with
   filesystem aging. This is also very similar to what ext4 calls
   "delayed allocation".
   
   In his 2010 GSoC, Zheng Liu ported and benchmarked the missing
   FANCY_REALLOC code to find more consistent performance improvements than
   with the preallocation approach.
   
   PR:		159233
   Author:		Zheng Liu <gnehzuil AT SPAMFREE gmail DOT com>
   Sponsored by:	Google Inc.
   Approved by:	jhb (mentor)
   MFC after:	2 weeks
 
 Modified:
   head/sys/fs/ext2fs/ext2_alloc.c
   head/sys/fs/ext2fs/ext2_extern.h
   head/sys/fs/ext2fs/ext2_subr.c
   head/sys/fs/ext2fs/ext2_vfsops.c
   head/sys/fs/ext2fs/ext2fs.h
 
 Modified: head/sys/fs/ext2fs/ext2_alloc.c
 ==============================================================================
 --- head/sys/fs/ext2fs/ext2_alloc.c	Thu Dec 15 20:27:36 2011	(r228538)
 +++ head/sys/fs/ext2fs/ext2_alloc.c	Thu Dec 15 20:31:18 2011	(r228539)
 @@ -42,6 +42,7 @@
  #include <sys/vnode.h>
  #include <sys/stat.h>
  #include <sys/mount.h>
 +#include <sys/sysctl.h>
  #include <sys/syslog.h>
  #include <sys/buf.h>
  
 @@ -52,6 +53,7 @@
  #include <fs/ext2fs/ext2_extern.h>
  
  static daddr_t	ext2_alloccg(struct inode *, int, daddr_t, int);
 +static daddr_t	ext2_clusteralloc(struct inode *, int, daddr_t, int);
  static u_long	ext2_dirpref(struct inode *);
  static void	ext2_fserr(struct m_ext2fs *, uid_t, char *);
  static u_long	ext2_hashalloc(struct inode *, int, long, int,
 @@ -59,9 +61,6 @@ static u_long	ext2_hashalloc(struct inod
  						int));
  static daddr_t	ext2_nodealloccg(struct inode *, int, daddr_t, int);
  static daddr_t  ext2_mapsearch(struct m_ext2fs *, char *, daddr_t);
 -#ifdef FANCY_REALLOC
 -static int	ext2_reallocblks(struct vop_reallocblks_args *);
 -#endif
  
  /*
   * Allocate a block in the file system.
 @@ -113,20 +112,20 @@ ext2_alloc(ip, lbn, bpref, size, cred, b
  	if (bpref >= fs->e2fs->e2fs_bcount)
  		bpref = 0;
  	if (bpref == 0)
 -                cg = ino_to_cg(fs, ip->i_number);
 -        else
 -                cg = dtog(fs, bpref);
 -        bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
 -                                                 ext2_alloccg);
 -        if (bno > 0) {
 +		cg = ino_to_cg(fs, ip->i_number);
 +	else
 +		cg = dtog(fs, bpref);
 +	bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
 +				      ext2_alloccg);
 +	if (bno > 0) {
  		/* set next_alloc fields as done in block_getblk */
  		ip->i_next_alloc_block = lbn;
  		ip->i_next_alloc_goal = bno;
  
 -                ip->i_blocks += btodb(fs->e2fs_bsize);
 -                ip->i_flag |= IN_CHANGE | IN_UPDATE;
 -                *bnp = bno;
 -                return (0);
 +		ip->i_blocks += btodb(fs->e2fs_bsize);
 +		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 +		*bnp = bno;
 +		return (0);
          }
  nospace:
  	EXT2_UNLOCK(ump);
 @@ -150,7 +149,6 @@ nospace:
   * the previous block allocation will be used.
   */
  
 -#ifdef FANCY_REALLOC
  static SYSCTL_NODE(_vfs, OID_AUTO, ext2fs, CTLFLAG_RW, 0, "EXT2FS filesystem");
  
  static int doasyncfree = 1;
 @@ -159,7 +157,6 @@ SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doasyn
  
  static int doreallocblks = 1;
  SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, "");
 -#endif
  
  int
  ext2_reallocblks(ap)
 @@ -168,11 +165,6 @@ ext2_reallocblks(ap)
  		struct cluster_save *a_buflist;
  	} */ *ap;
  {
 -#ifndef FANCY_REALLOC
 -/* printf("ext2_reallocblks not implemented\n"); */
 -return ENOSPC;
 -#else
 -
  	struct m_ext2fs *fs;
  	struct inode *ip;
  	struct vnode *vp;
 @@ -184,14 +176,17 @@ return ENOSPC;
  	int32_t start_lbn, end_lbn, soff, newblk, blkno;
  	int i, len, start_lvl, end_lvl, pref, ssize;
  
 +	if (doreallocblks == 0)
 +		  return (ENOSPC);
 +
  	vp = ap->a_vp;
  	ip = VTOI(vp);
  	fs = ip->i_e2fs;
  	ump = ip->i_ump;
 -#ifdef UNKLAR
 -	if (fs->fs_contigsumsize <= 0)
 +
 +	if (fs->e2fs_contigsumsize <= 0)
  		return (ENOSPC);
 -#endif
 +
  	buflist = ap->a_buflist;
  	len = buflist->bs_nchildren;
  	start_lbn = buflist->bs_children[0]->b_lblkno;
 @@ -228,11 +223,6 @@ return ENOSPC;
  		soff = idp->in_off;
  	}
  	/*
 -	 * Find the preferred location for the cluster.
 -	 */
 -	EXT2_LOCK(ump);
 -	pref = ext2_blkpref(ip, start_lbn, soff, sbap, 0);
 -	/*
  	 * If the block range spans two block maps, get the second map.
  	 */
  	if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
 @@ -243,13 +233,16 @@ return ENOSPC;
  			panic("ext2_reallocblk: start == end");
  #endif
  		ssize = len - (idp->in_off + 1);
 -		if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp)){
 -			EXT2_UNLOCK(ump);	
 +		if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp))
  			goto fail;
 -		}
  		ebap = (int32_t *)ebp->b_data;
  	}
  	/*
 +	 * Find the preferred location for the cluster.
 +	 */
 +	EXT2_LOCK(ump);
 +	pref = ext2_blkpref(ip, start_lbn, soff, sbap, 0);
 +	/*
  	 * Search the block map looking for an allocation of the desired size.
  	 */
  	if ((newblk = (int32_t)ext2_hashalloc(ip, dtog(fs, pref), pref,
 @@ -264,15 +257,23 @@ return ENOSPC;
  	 * block pointers in the inode and indirect blocks associated
  	 * with the file.
  	 */
 +#ifdef DEBUG
 +	printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number,
 +	    (intmax_t)start_lbn, (intmax_t)end_lbn);
 +#endif /* DEBUG */
  	blkno = newblk;
  	for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
 -		if (i == ssize)
 +		if (i == ssize) {
  			bap = ebap;
  			soff = -i;
 +		}
  #ifdef DIAGNOSTIC
  		if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap))
  			panic("ext2_reallocblks: alloc mismatch");
  #endif
 +#ifdef DEBUG
 +	printf(" %d,", *bap);
 +#endif /* DEBUG */
  		*bap++ = blkno;
  	}
  	/*
 @@ -308,11 +309,20 @@ return ENOSPC;
  	/*
  	 * Last, free the old blocks and assign the new blocks to the buffers.
  	 */
 +#ifdef DEBUG
 +	printf("\n\tnew:");
 +#endif /* DEBUG */
  	for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
  		ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno),
  		    fs->e2fs_bsize);
  		buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
 -	}
 +#ifdef DEBUG
 +		printf(" %d,", blkno);
 +#endif /* DEBUG */
 +	}
 +#ifdef DEBUG
 +	printf("\n");
 +#endif /* DEBUG */
  	return (0);
  
  fail:
 @@ -321,8 +331,6 @@ fail:
  	if (sbap != &ip->i_db[0])
  		brelse(sbp);
  	return (ENOSPC);
 -
 -#endif /* FANCY_REALLOC */
  }
  
  /*
 @@ -747,6 +755,7 @@ gotit:
  #endif
  	setbit(bbp, bno);
  	EXT2_LOCK(ump);
 +	ext2_clusteracct(fs, bbp, cg, bno, -1);
  	fs->e2fs->e2fs_fbcount--;
  	fs->e2fs_gd[cg].ext2bgd_nbfree--;
  	fs->e2fs_fmod = 1;
 @@ -756,6 +765,116 @@ gotit:
  }
  
  /*
 + * Determine whether a cluster can be allocated.
 + */
 +static daddr_t
 +ext2_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len)
 +{
 +	struct m_ext2fs *fs;
 +	struct ext2mount *ump;
 +	struct buf *bp;
 +	char *bbp;
 +	int bit, error, got, i, loc, run;
 +	int32_t *lp;
 +	daddr_t bno;
 +
 +	fs = ip->i_e2fs;
 +	ump = ip->i_ump;
 +
 +	if (fs->e2fs_maxcluster[cg] < len)
 +		return (0);
 +
 +	EXT2_UNLOCK(ump);
 +	error = bread(ip->i_devvp,
 +	    fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap),
 +	    (int)fs->e2fs_bsize, NOCRED, &bp);
 +	if (error)
 +		goto fail_lock;
 +
 +	bbp = (char *)bp->b_data;
 +	bp->b_xflags |= BX_BKGRDWRITE;
 +
 +	EXT2_LOCK(ump);
 +	/*
 +	 * Check to see if a cluster of the needed size (or bigger) is
 +	 * available in this cylinder group.
 +	 */
 +	lp = &fs->e2fs_clustersum[cg].cs_sum[len];
 +	for (i = len; i <= fs->e2fs_contigsumsize; i++)
 +		if (*lp++ > 0)
 +			break;
 +	if (i > fs->e2fs_contigsumsize) {
 +		/*
 +		 * Update the cluster summary information to reflect
 +		 * the true maximum-sized cluster so that future cluster
 +		 * allocation requests can avoid reading the bitmap only
 +		 * to find no cluster.
 +		 */
 +		lp = &fs->e2fs_clustersum[cg].cs_sum[len - 1];
 +			for (i = len - 1; i > 0; i--)
 +				if (*lp-- > 0)
 +					break;
 +		fs->e2fs_maxcluster[cg] = i;
 +		goto fail;
 +	}
 +	EXT2_UNLOCK(ump);
 +
 +	/* Search the bitmap to find a big enough cluster like in FFS. */
 +	if (dtog(fs, bpref) != cg)
 +		bpref = 0;
 +	if (bpref != 0)
 +		bpref = dtogd(fs, bpref);
 +	loc = bpref / NBBY;
 +	bit = 1 << (bpref % NBBY);
 +	for (run = 0, got = bpref; got < fs->e2fs->e2fs_fpg; got++) {
 +		if ((bbp[loc] & bit) != 0)
 +			run = 0;
 +		else {
 +			run++;
 +			if (run == len)
 +				break;
 +		}
 +		if ((got & (NBBY - 1)) != (NBBY - 1))
 +			bit <<= 1;
 +		else {
 +			loc++;
 +			bit = 1;
 +		}
 +	}
 +
 +	if (got >= fs->e2fs->e2fs_fpg)
 +		goto fail_lock;
 +
 +	/* Allocate the cluster that we found. */
 +	for (i = 1; i < len; i++)
 +		if (!isclr(bbp, got - run + i))
 +			panic("ext2_clusteralloc: map mismatch");
 +
 +	bno = got - run + 1;
 +	if (bno >= fs->e2fs->e2fs_fpg)
 +		panic("ext2_clusteralloc: allocated out of group");
 +
 +	EXT2_LOCK(ump);
 +	for (i = 0; i < len; i += fs->e2fs_fpb) {
 +		setbit(bbp, bno + i);
 +		ext2_clusteracct(fs, bbp, cg, bno + i, -1);
 +		fs->e2fs->e2fs_fbcount--;
 +		fs->e2fs_gd[cg].ext2bgd_nbfree--;
 +	}
 +	fs->e2fs_fmod = 1;
 +	EXT2_UNLOCK(ump);
 +
 +	bdwrite(bp);
 +	return (cg * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno);
 +
 +fail_lock:
 +	EXT2_LOCK(ump);
 +fail:
 +	brelse(bp);
 +	return (0);
 +}
 +
 +/*
   * Determine whether an inode can be allocated.
   *
   * Check to see if an inode is available, and if it is,
 @@ -877,6 +996,7 @@ ext2_blkfree(ip, bno, size)
          }
          clrbit(bbp, bno);
  	EXT2_LOCK(ump);
 +	ext2_clusteracct(fs, bbp, cg, bno, 1);
          fs->e2fs->e2fs_fbcount++;
          fs->e2fs_gd[cg].ext2bgd_nbfree++;
          fs->e2fs_fmod = 1;
 
 Modified: head/sys/fs/ext2fs/ext2_extern.h
 ==============================================================================
 --- head/sys/fs/ext2fs/ext2_extern.h	Thu Dec 15 20:27:36 2011	(r228538)
 +++ head/sys/fs/ext2fs/ext2_extern.h	Thu Dec 15 20:31:18 2011	(r228539)
 @@ -55,12 +55,13 @@ void	ext2_blkfree(struct inode *, int32_
  int32_t	ext2_blkpref(struct inode *, int32_t, int, int32_t *, int32_t);
  int	ext2_bmap(struct vop_bmap_args *);
  int	ext2_bmaparray(struct vnode *, int32_t, int32_t *, int *, int *);
 +void	ext2_clusteracct(struct m_ext2fs *, char *, int, daddr_t, int);
  void	ext2_dirbad(struct inode *ip, doff_t offset, char *how);
  void	ext2_ei2i(struct ext2fs_dinode *, struct inode *);
  int	ext2_getlbns(struct vnode *, int32_t, struct indir *, int *);
  void	ext2_i2ei(struct inode *, struct ext2fs_dinode *);
 +int     ext2_reallocblks(struct vop_reallocblks_args *);
  void	ext2_itimes(struct vnode *vp);
 -int	ext2_reallocblks(struct vop_reallocblks_args *);
  int	ext2_reclaim(struct vop_reclaim_args *);
  void	ext2_setblock(struct m_ext2fs *, u_char *, int32_t);
  int	ext2_truncate(struct vnode *, off_t, int, struct ucred *, struct thread *);
 
 Modified: head/sys/fs/ext2fs/ext2_subr.c
 ==============================================================================
 --- head/sys/fs/ext2fs/ext2_subr.c	Thu Dec 15 20:27:36 2011	(r228538)
 +++ head/sys/fs/ext2fs/ext2_subr.c	Thu Dec 15 20:31:18 2011	(r228539)
 @@ -120,3 +120,107 @@ ext2_checkoverlap(bp, ip)
  	}
  }
  #endif /* KDB */
 +
 +/*
 + * Update the cluster map because of an allocation of free like ffs.
 + *
 + * Cnt == 1 means free; cnt == -1 means allocating.
 + */
 +void
 +ext2_clusteracct(struct m_ext2fs *fs, char *bbp, int cg, daddr_t bno, int cnt)
 +{
 +	int32_t *sump = fs->e2fs_clustersum[cg].cs_sum;
 +	int32_t *lp;
 +	int back, bit, end, forw, i, loc, start;
 +
 +	/* Initialize the cluster summary array. */
 +	if (fs->e2fs_clustersum[cg].cs_init == 0) {
 +		int run = 0;
 +		bit = 1;
 +		loc = 0;
 +
 +		for (i = 0; i < fs->e2fs->e2fs_fpg; i++) {
 +			if ((bbp[loc] & bit) == 0)
 +				run++;
 +			else if (run != 0) {
 +				if (run > fs->e2fs_contigsumsize)
 +					run = fs->e2fs_contigsumsize;
 +				sump[run]++;
 +				run = 0;
 +			}
 +			if ((i & (NBBY - 1)) != (NBBY - 1))
 +				bit <<= 1;
 +			else {
 +				loc++;
 +				bit = 1;
 +			}
 +		}
 +		if (run != 0) {
 +			if (run > fs->e2fs_contigsumsize)
 +				run = fs->e2fs_contigsumsize;
 +			sump[run]++;
 +		}
 +		fs->e2fs_clustersum[cg].cs_init = 1;
 +	}
 +
 +	if (fs->e2fs_contigsumsize <= 0)
 +		return;
 +
 +	/* Find the size of the cluster going forward. */
 +	start = bno + 1;
 +	end = start + fs->e2fs_contigsumsize;
 +	if (end > fs->e2fs->e2fs_fpg)
 +		end = fs->e2fs->e2fs_fpg;
 +	loc = start / NBBY;
 +	bit = 1 << (start % NBBY);
 +	for (i = start; i < end; i++) {
 +		if ((bbp[loc] & bit) != 0)
 +			break;
 +		if ((i & (NBBY - 1)) != (NBBY - 1))
 +			bit <<= 1;
 +		else {
 +			loc++;
 +			bit = 1;
 +		}
 +	}
 +	forw = i - start;
 +
 +	/* Find the size of the cluster going backward. */
 +	start = bno - 1;
 +	end = start - fs->e2fs_contigsumsize;
 +	if (end < 0)
 +		end = -1;
 +	loc = start / NBBY;
 +	bit = 1 << (start % NBBY);
 +	for (i = start; i > end; i--) {
 +		if ((bbp[loc] & bit) != 0)
 +			break;
 +		if ((i & (NBBY - 1)) != 0)
 +			bit >>= 1;
 +		else {
 +			loc--;
 +			bit = 1 << (NBBY - 1);
 +		}
 +	}
 +	back = start - i;
 +
 +	/*
 +	 * Account for old cluster and the possibly new forward and
 +	 * back clusters.
 +	 */
 +	i = back + forw + 1;
 +	if (i > fs->e2fs_contigsumsize)
 +		i = fs->e2fs_contigsumsize;
 +	sump[i] += cnt;
 +	if (back > 0)
 +		sump[back] -= cnt;
 +	if (forw > 0)
 +		sump[forw] -= cnt;
 +
 +	/* Update cluster summary information. */
 +	lp = &sump[fs->e2fs_contigsumsize];
 +	for (i = fs->e2fs_contigsumsize; i > 0; i--)
 +		if (*lp-- > 0)
 +			break;
 +	fs->e2fs_maxcluster[cg] = i;
 +}
 
 Modified: head/sys/fs/ext2fs/ext2_vfsops.c
 ==============================================================================
 --- head/sys/fs/ext2fs/ext2_vfsops.c	Thu Dec 15 20:27:36 2011	(r228538)
 +++ head/sys/fs/ext2fs/ext2_vfsops.c	Thu Dec 15 20:31:18 2011	(r228539)
 @@ -405,7 +405,7 @@ compute_sb_data(struct vnode *devvp, str
   * Things to do to update the mount:
   *	1) invalidate all cached meta-data.
   *	2) re-read superblock from disk.
 - *	3) re-read summary information from disk.
 + *	3) invalidate all cluster summary information.
   *	4) invalidate all inactive vnodes.
   *	5) invalidate all cached file data.
   *	6) re-read inode data for all active vnodes.
 @@ -419,7 +419,9 @@ ext2_reload(struct mount *mp, struct thr
  	struct buf *bp;
  	struct ext2fs *es;
  	struct m_ext2fs *fs;
 -	int error;
 +	struct csum *sump;
 +	int error, i;
 +	int32_t *lp;
  
  	if ((mp->mnt_flag & MNT_RDONLY) == 0)
  		return (EINVAL);
 @@ -456,6 +458,19 @@ ext2_reload(struct mount *mp, struct thr
  #endif
  	brelse(bp);
  
 +	/*
 +	 * Step 3: invalidate all cluster summary information.
 +	 */
 +	if (fs->e2fs_contigsumsize > 0) {
 +		lp = fs->e2fs_maxcluster;
 +		sump = fs->e2fs_clustersum;
 +		for (i = 0; i < fs->e2fs_gcount; i++, sump++) {
 +			*lp++ = fs->e2fs_contigsumsize;
 +			sump->cs_init = 0;
 +			bzero(sump->cs_sum, fs->e2fs_contigsumsize + 1);
 +		}
 +	}
 +
  loop:
  	MNT_ILOCK(mp);
  	MNT_VNODE_FOREACH(vp, mp, mvp) {
 @@ -511,8 +526,11 @@ ext2_mountfs(struct vnode *devvp, struct
  	struct cdev *dev = devvp->v_rdev;
  	struct g_consumer *cp;
  	struct bufobj *bo;
 +	struct csum *sump;
  	int error;
  	int ronly;
 +	int i, size;
 +	int32_t *lp;
  
  	ronly = vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0);
  	/* XXX: use VOP_ACESS to check FS perms */
 @@ -582,6 +600,33 @@ ext2_mountfs(struct vnode *devvp, struct
  	if ((error = compute_sb_data(devvp, ump->um_e2fs->e2fs, ump->um_e2fs)))
  		goto out;
  
 +	/*
 +	 * Calculate the maximum contiguous blocks and size of cluster summary
 +	 * array.  In FFS this is done by newfs; however the superblock in 
 +	 * ext2fs doesn't have these variables so we just can calculate
 +	 * them here.
 +	 */
 +	ump->um_e2fs->e2fs_maxcontig = MAX(1, MAXPHYS / ump->um_e2fs->e2fs_bsize);
 +	if (ump->um_e2fs->e2fs_maxcontig > 0)
 +		ump->um_e2fs->e2fs_contigsumsize =
 +		    MIN(ump->um_e2fs->e2fs_maxcontig, EXT2_MAXCONTIG);
 +	else
 +		ump->um_e2fs->e2fs_contigsumsize = 0;
 +	if (ump->um_e2fs->e2fs_contigsumsize > 0) {
 +		size = ump->um_e2fs->e2fs_gcount * sizeof(int32_t);
 +		ump->um_e2fs->e2fs_maxcluster = malloc(size, M_EXT2MNT, M_WAITOK);
 +		size = ump->um_e2fs->e2fs_gcount * sizeof(struct csum);
 +		ump->um_e2fs->e2fs_clustersum = malloc(size, M_EXT2MNT, M_WAITOK);
 +		lp = ump->um_e2fs->e2fs_maxcluster;
 +		sump = ump->um_e2fs->e2fs_clustersum;
 +		for (i = 0; i < ump->um_e2fs->e2fs_gcount; i++, sump++) {
 +			*lp++ = ump->um_e2fs->e2fs_contigsumsize;
 +			sump->cs_init = 0;
 +			sump->cs_sum = malloc((ump->um_e2fs->e2fs_contigsumsize + 1) *
 +			    sizeof(int32_t), M_EXT2MNT, M_WAITOK | M_ZERO);
 +		}
 +	}
 +
  	brelse(bp);
  	bp = NULL;
  	fs = ump->um_e2fs;
 @@ -656,7 +701,8 @@ ext2_unmount(struct mount *mp, int mntfl
  {
  	struct ext2mount *ump;
  	struct m_ext2fs *fs;
 -	int error, flags, ronly;
 +	struct csum *sump;
 +	int error, flags, i, ronly;
  
  	flags = 0;
  	if (mntflags & MNT_FORCE) {
 @@ -681,6 +727,11 @@ ext2_unmount(struct mount *mp, int mntfl
  	g_topology_unlock();
  	PICKUP_GIANT();
  	vrele(ump->um_devvp);
 +	sump = fs->e2fs_clustersum;
 +	for (i = 0; i < fs->e2fs_gcount; i++, sump++)
 +		free(sump->cs_sum, M_EXT2MNT);
 +	free(fs->e2fs_clustersum, M_EXT2MNT);
 +	free(fs->e2fs_maxcluster, M_EXT2MNT);
  	free(fs->e2fs_gd, M_EXT2MNT);
  	free(fs->e2fs_contigdirs, M_EXT2MNT);
  	free(fs->e2fs, M_EXT2MNT);
 
 Modified: head/sys/fs/ext2fs/ext2fs.h
 ==============================================================================
 --- head/sys/fs/ext2fs/ext2fs.h	Thu Dec 15 20:27:36 2011	(r228538)
 +++ head/sys/fs/ext2fs/ext2fs.h	Thu Dec 15 20:31:18 2011	(r228539)
 @@ -45,6 +45,16 @@
  #define EXT2_LINK_MAX		32000
  
  /*
 + * A summary of contiguous blocks of various sizes is maintained
 + * in each cylinder group. Normally this is set by the initial
 + * value of fs_maxcontig.
 + *
 + * XXX:FS_MAXCONTIG is set to 16 to conserve space. Here we set
 + * EXT2_MAXCONTIG to 32 for better performance.
 + */
 +#define EXT2_MAXCONTIG		32
 +
 +/*
   * Constants relative to the data blocks
   */
  #define	EXT2_NDIR_BLOCKS		12
 @@ -151,6 +161,10 @@ struct m_ext2fs {
  	char     e2fs_wasvalid;   /* valid at mount time */
  	off_t    e2fs_maxfilesize;
  	struct   ext2_gd *e2fs_gd; /* Group Descriptors */
 +	int32_t  e2fs_maxcontig;        /* max number of contiguous blks */
 +	int32_t  e2fs_contigsumsize;    /* size of cluster summary array */
 +	int32_t *e2fs_maxcluster;       /* max cluster in each cyl group */
 +	struct   csum *e2fs_clustersum; /* cluster summary in each cyl group */
  };
  
  /*
 @@ -253,6 +267,13 @@ struct ext2_gd {
  	uint32_t reserved2[3];
  };
  
 +/* cluster summary information */
 +
 +struct csum {
 +	int8_t   cs_init; /* cluster summary has been initialized */
 +	int32_t *cs_sum;  /* cluster summary array */
 +};
 +
  /* EXT2FS metadatas are stored in little-endian byte order. These macros
   * helps reading these metadatas
   */
 _______________________________________________
 svn-src-all@freebsd.org mailing list
 http://lists.freebsd.org/mailman/listinfo/svn-src-all
 To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
 
State-Changed-From-To: open->patched 
State-Changed-By: pfg 
State-Changed-When: Thu Dec 15 20:46:06 UTC 2011 
State-Changed-Why:  
Committed to head. 

http://www.freebsd.org/cgi/query-pr.cgi?pr=159233 

From: dfilter@FreeBSD.ORG (dfilter service)
To: bug-followup@FreeBSD.org
Cc:  
Subject: Re: kern/159233: commit references a PR
Date: Thu,  5 Jan 2012 01:35:24 +0000 (UTC)

 Author: pfg
 Date: Thu Jan  5 01:35:01 2012
 New Revision: 229549
 URL: http://svn.freebsd.org/changeset/base/229549
 
 Log:
   MFC:	r228507, r228539, r228583
   
   Merge ext2_readwrite.c into ext2_vnops.c as done in UFS.
   Bring in reallocblk to ext2fs: new feature implemented by Zheng Liu as
   GSoC 2010.
   Many style fixes by jh@.
   
   PR:		159232, 159233 and 162564
   Approved by:	jhb (mentor)
 
 Deleted:
   stable/9/sys/fs/ext2fs/ext2_readwrite.c
 Modified:
   stable/9/sys/fs/ext2fs/ext2_alloc.c
   stable/9/sys/fs/ext2fs/ext2_balloc.c
   stable/9/sys/fs/ext2fs/ext2_bmap.c
   stable/9/sys/fs/ext2fs/ext2_extern.h
   stable/9/sys/fs/ext2fs/ext2_inode.c
   stable/9/sys/fs/ext2fs/ext2_lookup.c
   stable/9/sys/fs/ext2fs/ext2_subr.c
   stable/9/sys/fs/ext2fs/ext2_vfsops.c
   stable/9/sys/fs/ext2fs/ext2_vnops.c
   stable/9/sys/fs/ext2fs/ext2fs.h
   stable/9/sys/fs/ext2fs/fs.h
 Directory Properties:
   stable/9/sys/   (props changed)
 
 Modified: stable/9/sys/fs/ext2fs/ext2_alloc.c
 ==============================================================================
 --- stable/9/sys/fs/ext2fs/ext2_alloc.c	Thu Jan  5 01:25:47 2012	(r229548)
 +++ stable/9/sys/fs/ext2fs/ext2_alloc.c	Thu Jan  5 01:35:01 2012	(r229549)
 @@ -42,6 +42,7 @@
  #include <sys/vnode.h>
  #include <sys/stat.h>
  #include <sys/mount.h>
 +#include <sys/sysctl.h>
  #include <sys/syslog.h>
  #include <sys/buf.h>
  
 @@ -52,6 +53,7 @@
  #include <fs/ext2fs/ext2_extern.h>
  
  static daddr_t	ext2_alloccg(struct inode *, int, daddr_t, int);
 +static daddr_t	ext2_clusteralloc(struct inode *, int, daddr_t, int);
  static u_long	ext2_dirpref(struct inode *);
  static void	ext2_fserr(struct m_ext2fs *, uid_t, char *);
  static u_long	ext2_hashalloc(struct inode *, int, long, int,
 @@ -59,9 +61,6 @@ static u_long	ext2_hashalloc(struct inod
  						int));
  static daddr_t	ext2_nodealloccg(struct inode *, int, daddr_t, int);
  static daddr_t  ext2_mapsearch(struct m_ext2fs *, char *, daddr_t);
 -#ifdef FANCY_REALLOC
 -static int	ext2_reallocblks(struct vop_reallocblks_args *);
 -#endif
  
  /*
   * Allocate a block in the file system.
 @@ -113,20 +112,20 @@ ext2_alloc(ip, lbn, bpref, size, cred, b
  	if (bpref >= fs->e2fs->e2fs_bcount)
  		bpref = 0;
  	if (bpref == 0)
 -                cg = ino_to_cg(fs, ip->i_number);
 -        else
 -                cg = dtog(fs, bpref);
 -        bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
 -                                                 ext2_alloccg);
 -        if (bno > 0) {
 +		cg = ino_to_cg(fs, ip->i_number);
 +	else
 +		cg = dtog(fs, bpref);
 +	bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
 +				      ext2_alloccg);
 +	if (bno > 0) {
  		/* set next_alloc fields as done in block_getblk */
  		ip->i_next_alloc_block = lbn;
  		ip->i_next_alloc_goal = bno;
  
 -                ip->i_blocks += btodb(fs->e2fs_bsize);
 -                ip->i_flag |= IN_CHANGE | IN_UPDATE;
 -                *bnp = bno;
 -                return (0);
 +		ip->i_blocks += btodb(fs->e2fs_bsize);
 +		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 +		*bnp = bno;
 +		return (0);
          }
  nospace:
  	EXT2_UNLOCK(ump);
 @@ -150,7 +149,6 @@ nospace:
   * the previous block allocation will be used.
   */
  
 -#ifdef FANCY_REALLOC
  SYSCTL_NODE(_vfs, OID_AUTO, ext2fs, CTLFLAG_RW, 0, "EXT2FS filesystem");
  
  static int doasyncfree = 1;
 @@ -159,7 +157,6 @@ SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doasyn
  
  static int doreallocblks = 1;
  SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, "");
 -#endif
  
  int
  ext2_reallocblks(ap)
 @@ -168,11 +165,6 @@ ext2_reallocblks(ap)
  		struct cluster_save *a_buflist;
  	} */ *ap;
  {
 -#ifndef FANCY_REALLOC
 -/* printf("ext2_reallocblks not implemented\n"); */
 -return ENOSPC;
 -#else
 -
  	struct m_ext2fs *fs;
  	struct inode *ip;
  	struct vnode *vp;
 @@ -184,14 +176,17 @@ return ENOSPC;
  	int32_t start_lbn, end_lbn, soff, newblk, blkno;
  	int i, len, start_lvl, end_lvl, pref, ssize;
  
 +	if (doreallocblks == 0)
 +		  return (ENOSPC);
 +
  	vp = ap->a_vp;
  	ip = VTOI(vp);
  	fs = ip->i_e2fs;
  	ump = ip->i_ump;
 -#ifdef UNKLAR
 -	if (fs->fs_contigsumsize <= 0)
 +
 +	if (fs->e2fs_contigsumsize <= 0)
  		return (ENOSPC);
 -#endif
 +
  	buflist = ap->a_buflist;
  	len = buflist->bs_nchildren;
  	start_lbn = buflist->bs_children[0]->b_lblkno;
 @@ -228,11 +223,6 @@ return ENOSPC;
  		soff = idp->in_off;
  	}
  	/*
 -	 * Find the preferred location for the cluster.
 -	 */
 -	EXT2_LOCK(ump);
 -	pref = ext2_blkpref(ip, start_lbn, soff, sbap, 0);
 -	/*
  	 * If the block range spans two block maps, get the second map.
  	 */
  	if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
 @@ -243,13 +233,16 @@ return ENOSPC;
  			panic("ext2_reallocblk: start == end");
  #endif
  		ssize = len - (idp->in_off + 1);
 -		if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp)){
 -			EXT2_UNLOCK(ump);	
 +		if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp))
  			goto fail;
 -		}
  		ebap = (int32_t *)ebp->b_data;
  	}
  	/*
 +	 * Find the preferred location for the cluster.
 +	 */
 +	EXT2_LOCK(ump);
 +	pref = ext2_blkpref(ip, start_lbn, soff, sbap, 0);
 +	/*
  	 * Search the block map looking for an allocation of the desired size.
  	 */
  	if ((newblk = (int32_t)ext2_hashalloc(ip, dtog(fs, pref), pref,
 @@ -264,15 +257,23 @@ return ENOSPC;
  	 * block pointers in the inode and indirect blocks associated
  	 * with the file.
  	 */
 +#ifdef DEBUG
 +	printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number,
 +	    (intmax_t)start_lbn, (intmax_t)end_lbn);
 +#endif /* DEBUG */
  	blkno = newblk;
  	for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
 -		if (i == ssize)
 +		if (i == ssize) {
  			bap = ebap;
  			soff = -i;
 +		}
  #ifdef DIAGNOSTIC
  		if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap))
  			panic("ext2_reallocblks: alloc mismatch");
  #endif
 +#ifdef DEBUG
 +	printf(" %d,", *bap);
 +#endif /* DEBUG */
  		*bap++ = blkno;
  	}
  	/*
 @@ -308,11 +309,20 @@ return ENOSPC;
  	/*
  	 * Last, free the old blocks and assign the new blocks to the buffers.
  	 */
 +#ifdef DEBUG
 +	printf("\n\tnew:");
 +#endif /* DEBUG */
  	for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
  		ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno),
  		    fs->e2fs_bsize);
  		buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
 -	}
 +#ifdef DEBUG
 +		printf(" %d,", blkno);
 +#endif /* DEBUG */
 +	}
 +#ifdef DEBUG
 +	printf("\n");
 +#endif /* DEBUG */
  	return (0);
  
  fail:
 @@ -321,8 +331,6 @@ fail:
  	if (sbap != &ip->i_db[0])
  		brelse(sbp);
  	return (ENOSPC);
 -
 -#endif /* FANCY_REALLOC */
  }
  
  /*
 @@ -356,7 +364,7 @@ ext2_valloc(pvp, mode, cred, vpp)
  	 * ext2_dirpref else obtain it using ino_to_cg. The preferred inode is
  	 * always the next inode.
  	 */
 -	if((mode & IFMT) == IFDIR) {
 +	if ((mode & IFMT) == IFDIR) {
  		cg = ext2_dirpref(pip);
  		if (fs->e2fs_contigdirs[cg] < 255)
  			fs->e2fs_contigdirs[cg]++;
 @@ -549,13 +557,13 @@ ext2_blkpref(ip, lbn, indx, bap, blocknr
  	/* if the next block is actually what we thought it is,
  	   then set the goal to what we thought it should be
  	*/
 -	if(ip->i_next_alloc_block == lbn && ip->i_next_alloc_goal != 0)
 +	if (ip->i_next_alloc_block == lbn && ip->i_next_alloc_goal != 0)
  		return ip->i_next_alloc_goal;
  
  	/* now check whether we were provided with an array that basically
  	   tells us previous blocks to which we want to stay closeby
  	*/
 -	if(bap) 
 +	if (bap) 
                  for (tmp = indx - 1; tmp >= 0; tmp--) 
  			if (bap[tmp]) 
  				return bap[tmp];
 @@ -747,6 +755,7 @@ gotit:
  #endif
  	setbit(bbp, bno);
  	EXT2_LOCK(ump);
 +	ext2_clusteracct(fs, bbp, cg, bno, -1);
  	fs->e2fs->e2fs_fbcount--;
  	fs->e2fs_gd[cg].ext2bgd_nbfree--;
  	fs->e2fs_fmod = 1;
 @@ -756,6 +765,116 @@ gotit:
  }
  
  /*
 + * Determine whether a cluster can be allocated.
 + */
 +static daddr_t
 +ext2_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len)
 +{
 +	struct m_ext2fs *fs;
 +	struct ext2mount *ump;
 +	struct buf *bp;
 +	char *bbp;
 +	int bit, error, got, i, loc, run;
 +	int32_t *lp;
 +	daddr_t bno;
 +
 +	fs = ip->i_e2fs;
 +	ump = ip->i_ump;
 +
 +	if (fs->e2fs_maxcluster[cg] < len)
 +		return (0);
 +
 +	EXT2_UNLOCK(ump);
 +	error = bread(ip->i_devvp,
 +	    fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap),
 +	    (int)fs->e2fs_bsize, NOCRED, &bp);
 +	if (error)
 +		goto fail_lock;
 +
 +	bbp = (char *)bp->b_data;
 +	bp->b_xflags |= BX_BKGRDWRITE;
 +
 +	EXT2_LOCK(ump);
 +	/*
 +	 * Check to see if a cluster of the needed size (or bigger) is
 +	 * available in this cylinder group.
 +	 */
 +	lp = &fs->e2fs_clustersum[cg].cs_sum[len];
 +	for (i = len; i <= fs->e2fs_contigsumsize; i++)
 +		if (*lp++ > 0)
 +			break;
 +	if (i > fs->e2fs_contigsumsize) {
 +		/*
 +		 * Update the cluster summary information to reflect
 +		 * the true maximum-sized cluster so that future cluster
 +		 * allocation requests can avoid reading the bitmap only
 +		 * to find no cluster.
 +		 */
 +		lp = &fs->e2fs_clustersum[cg].cs_sum[len - 1];
 +			for (i = len - 1; i > 0; i--)
 +				if (*lp-- > 0)
 +					break;
 +		fs->e2fs_maxcluster[cg] = i;
 +		goto fail;
 +	}
 +	EXT2_UNLOCK(ump);
 +
 +	/* Search the bitmap to find a big enough cluster like in FFS. */
 +	if (dtog(fs, bpref) != cg)
 +		bpref = 0;
 +	if (bpref != 0)
 +		bpref = dtogd(fs, bpref);
 +	loc = bpref / NBBY;
 +	bit = 1 << (bpref % NBBY);
 +	for (run = 0, got = bpref; got < fs->e2fs->e2fs_fpg; got++) {
 +		if ((bbp[loc] & bit) != 0)
 +			run = 0;
 +		else {
 +			run++;
 +			if (run == len)
 +				break;
 +		}
 +		if ((got & (NBBY - 1)) != (NBBY - 1))
 +			bit <<= 1;
 +		else {
 +			loc++;
 +			bit = 1;
 +		}
 +	}
 +
 +	if (got >= fs->e2fs->e2fs_fpg)
 +		goto fail_lock;
 +
 +	/* Allocate the cluster that we found. */
 +	for (i = 1; i < len; i++)
 +		if (!isclr(bbp, got - run + i))
 +			panic("ext2_clusteralloc: map mismatch");
 +
 +	bno = got - run + 1;
 +	if (bno >= fs->e2fs->e2fs_fpg)
 +		panic("ext2_clusteralloc: allocated out of group");
 +
 +	EXT2_LOCK(ump);
 +	for (i = 0; i < len; i += fs->e2fs_fpb) {
 +		setbit(bbp, bno + i);
 +		ext2_clusteracct(fs, bbp, cg, bno + i, -1);
 +		fs->e2fs->e2fs_fbcount--;
 +		fs->e2fs_gd[cg].ext2bgd_nbfree--;
 +	}
 +	fs->e2fs_fmod = 1;
 +	EXT2_UNLOCK(ump);
 +
 +	bdwrite(bp);
 +	return (cg * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno);
 +
 +fail_lock:
 +	EXT2_LOCK(ump);
 +fail:
 +	brelse(bp);
 +	return (0);
 +}
 +
 +/*
   * Determine whether an inode can be allocated.
   *
   * Check to see if an inode is available, and if it is,
 @@ -877,6 +996,7 @@ ext2_blkfree(ip, bno, size)
          }
          clrbit(bbp, bno);
  	EXT2_LOCK(ump);
 +	ext2_clusteracct(fs, bbp, cg, bno, 1);
          fs->e2fs->e2fs_fbcount++;
          fs->e2fs_gd[cg].ext2bgd_nbfree++;
          fs->e2fs_fmod = 1;
 
 Modified: stable/9/sys/fs/ext2fs/ext2_balloc.c
 ==============================================================================
 --- stable/9/sys/fs/ext2fs/ext2_balloc.c	Thu Jan  5 01:25:47 2012	(r229548)
 +++ stable/9/sys/fs/ext2fs/ext2_balloc.c	Thu Jan  5 01:35:01 2012	(r229549)
 @@ -156,7 +156,7 @@ ext2_balloc(ip, lbn, size, cred, bpp, fl
  	 */
  	pref = 0;
  	if ((error = ext2_getlbns(vp, lbn, indirs, &num)) != 0)
 -		return(error);
 +		return (error);
  #ifdef DIAGNOSTIC
  	if (num < 1)
  		panic ("ext2_balloc: ext2_getlbns returned indirect block");
 
 Modified: stable/9/sys/fs/ext2fs/ext2_bmap.c
 ==============================================================================
 --- stable/9/sys/fs/ext2fs/ext2_bmap.c	Thu Jan  5 01:25:47 2012	(r229548)
 +++ stable/9/sys/fs/ext2fs/ext2_bmap.c	Thu Jan  5 01:35:01 2012	(r229549)
 @@ -215,7 +215,7 @@ ext2_bmaparray(vp, bn, bnp, runp, runb)
  			    ++bn, ++*runp);
  			bn = ap->in_off;
  			if (runb && bn) {
 -				for(--bn; bn >= 0 && *runb < maxrun &&
 +				for (--bn; bn >= 0 && *runb < maxrun &&
  			    		is_sequential(ump, ((int32_t *)bp->b_data)[bn],
  					    ((int32_t *)bp->b_data)[bn+1]);
  			    		--bn, ++*runb);
 
 Modified: stable/9/sys/fs/ext2fs/ext2_extern.h
 ==============================================================================
 --- stable/9/sys/fs/ext2fs/ext2_extern.h	Thu Jan  5 01:25:47 2012	(r229548)
 +++ stable/9/sys/fs/ext2fs/ext2_extern.h	Thu Jan  5 01:35:01 2012	(r229549)
 @@ -55,12 +55,13 @@ void	ext2_blkfree(struct inode *, int32_
  int32_t	ext2_blkpref(struct inode *, int32_t, int, int32_t *, int32_t);
  int	ext2_bmap(struct vop_bmap_args *);
  int	ext2_bmaparray(struct vnode *, int32_t, int32_t *, int *, int *);
 +void	ext2_clusteracct(struct m_ext2fs *, char *, int, daddr_t, int);
  void	ext2_dirbad(struct inode *ip, doff_t offset, char *how);
  void	ext2_ei2i(struct ext2fs_dinode *, struct inode *);
  int	ext2_getlbns(struct vnode *, int32_t, struct indir *, int *);
  void	ext2_i2ei(struct inode *, struct ext2fs_dinode *);
 +int     ext2_reallocblks(struct vop_reallocblks_args *);
  void	ext2_itimes(struct vnode *vp);
 -int	ext2_reallocblks(struct vop_reallocblks_args *);
  int	ext2_reclaim(struct vop_reclaim_args *);
  void	ext2_setblock(struct m_ext2fs *, u_char *, int32_t);
  int	ext2_truncate(struct vnode *, off_t, int, struct ucred *, struct thread *);
 
 Modified: stable/9/sys/fs/ext2fs/ext2_inode.c
 ==============================================================================
 --- stable/9/sys/fs/ext2fs/ext2_inode.c	Thu Jan  5 01:25:47 2012	(r229548)
 +++ stable/9/sys/fs/ext2fs/ext2_inode.c	Thu Jan  5 01:35:01 2012	(r229549)
 @@ -229,7 +229,7 @@ ext2_truncate(vp, length, flags, cred, t
  	 * will be returned to the free list.  lastiblock values are also
  	 * normalized to -1 for calls to ext2_indirtrunc below.
  	 */
 -	bcopy((caddr_t)&oip->i_db[0], (caddr_t)oldblks, sizeof oldblks);
 +	bcopy((caddr_t)&oip->i_db[0], (caddr_t)oldblks, sizeof(oldblks));
  	for (level = TRIPLE; level >= SINGLE; level--)
  		if (lastiblock[level] < 0) {
  			oip->i_ib[level] = 0;
 @@ -246,8 +246,8 @@ ext2_truncate(vp, length, flags, cred, t
  	 * Note that we save the new block configuration so we can check it
  	 * when we are done.
  	 */
 -	bcopy((caddr_t)&oip->i_db[0], (caddr_t)newblks, sizeof newblks);
 -	bcopy((caddr_t)oldblks, (caddr_t)&oip->i_db[0], sizeof oldblks);
 +	bcopy((caddr_t)&oip->i_db[0], (caddr_t)newblks, sizeof(newblks));
 +	bcopy((caddr_t)oldblks, (caddr_t)&oip->i_db[0], sizeof(oldblks));
  	oip->i_size = osize;
  	error = vtruncbuf(ovp, cred, td, length, (int)fs->e2fs_bsize);
  	if (error && (allerror == 0))
 @@ -418,7 +418,7 @@ ext2_indirtrunc(ip, lbn, dbn, lastbn, le
  	copy = malloc(fs->e2fs_bsize, M_TEMP, M_WAITOK);
  	bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->e2fs_bsize);
  	bzero((caddr_t)&bap[last + 1],
 -	  (u_int)(NINDIR(fs) - (last + 1)) * sizeof (int32_t));
 +	  (u_int)(NINDIR(fs) - (last + 1)) * sizeof(int32_t));
  	if (last == -1)
  		bp->b_flags |= B_INVAL;
  	error = bwrite(bp);
 
 Modified: stable/9/sys/fs/ext2fs/ext2_lookup.c
 ==============================================================================
 --- stable/9/sys/fs/ext2fs/ext2_lookup.c	Thu Jan  5 01:25:47 2012	(r229548)
 +++ stable/9/sys/fs/ext2fs/ext2_lookup.c	Thu Jan  5 01:35:01 2012	(r229549)
 @@ -1002,7 +1002,7 @@ ext2_dirempty(ip, parentino, cred)
  	struct dirtemplate dbuf;
  	struct ext2fs_direct_2 *dp = (struct ext2fs_direct_2 *)&dbuf;
  	int error, count, namlen;
 -#define	MINDIRSIZ (sizeof (struct dirtemplate) / 2)
 +#define	MINDIRSIZ (sizeof(struct dirtemplate) / 2)
  
  	for (off = 0; off < ip->i_size; off += dp->e2d_reclen) {
  		error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ,
 @@ -1070,7 +1070,7 @@ ext2_checkpath(source, target, cred)
  			break;
  		}
  		error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf,
 -			sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE,
 +			sizeof(struct dirtemplate), (off_t)0, UIO_SYSSPACE,
  			IO_NODELOCKED | IO_NOMACCHECK, cred, NOCRED, NULL,
  			NULL);
  		if (error != 0)
 
 Modified: stable/9/sys/fs/ext2fs/ext2_subr.c
 ==============================================================================
 --- stable/9/sys/fs/ext2fs/ext2_subr.c	Thu Jan  5 01:25:47 2012	(r229548)
 +++ stable/9/sys/fs/ext2fs/ext2_subr.c	Thu Jan  5 01:35:01 2012	(r229549)
 @@ -120,3 +120,107 @@ ext2_checkoverlap(bp, ip)
  	}
  }
  #endif /* KDB */
 +
 +/*
 + * Update the cluster map because of an allocation of free like ffs.
 + *
 + * Cnt == 1 means free; cnt == -1 means allocating.
 + */
 +void
 +ext2_clusteracct(struct m_ext2fs *fs, char *bbp, int cg, daddr_t bno, int cnt)
 +{
 +	int32_t *sump = fs->e2fs_clustersum[cg].cs_sum;
 +	int32_t *lp;
 +	int back, bit, end, forw, i, loc, start;
 +
 +	/* Initialize the cluster summary array. */
 +	if (fs->e2fs_clustersum[cg].cs_init == 0) {
 +		int run = 0;
 +		bit = 1;
 +		loc = 0;
 +
 +		for (i = 0; i < fs->e2fs->e2fs_fpg; i++) {
 +			if ((bbp[loc] & bit) == 0)
 +				run++;
 +			else if (run != 0) {
 +				if (run > fs->e2fs_contigsumsize)
 +					run = fs->e2fs_contigsumsize;
 +				sump[run]++;
 +				run = 0;
 +			}
 +			if ((i & (NBBY - 1)) != (NBBY - 1))
 +				bit <<= 1;
 +			else {
 +				loc++;
 +				bit = 1;
 +			}
 +		}
 +		if (run != 0) {
 +			if (run > fs->e2fs_contigsumsize)
 +				run = fs->e2fs_contigsumsize;
 +			sump[run]++;
 +		}
 +		fs->e2fs_clustersum[cg].cs_init = 1;
 +	}
 +
 +	if (fs->e2fs_contigsumsize <= 0)
 +		return;
 +
 +	/* Find the size of the cluster going forward. */
 +	start = bno + 1;
 +	end = start + fs->e2fs_contigsumsize;
 +	if (end > fs->e2fs->e2fs_fpg)
 +		end = fs->e2fs->e2fs_fpg;
 +	loc = start / NBBY;
 +	bit = 1 << (start % NBBY);
 +	for (i = start; i < end; i++) {
 +		if ((bbp[loc] & bit) != 0)
 +			break;
 +		if ((i & (NBBY - 1)) != (NBBY - 1))
 +			bit <<= 1;
 +		else {
 +			loc++;
 +			bit = 1;
 +		}
 +	}
 +	forw = i - start;
 +
 +	/* Find the size of the cluster going backward. */
 +	start = bno - 1;
 +	end = start - fs->e2fs_contigsumsize;
 +	if (end < 0)
 +		end = -1;
 +	loc = start / NBBY;
 +	bit = 1 << (start % NBBY);
 +	for (i = start; i > end; i--) {
 +		if ((bbp[loc] & bit) != 0)
 +			break;
 +		if ((i & (NBBY - 1)) != 0)
 +			bit >>= 1;
 +		else {
 +			loc--;
 +			bit = 1 << (NBBY - 1);
 +		}
 +	}
 +	back = start - i;
 +
 +	/*
 +	 * Account for old cluster and the possibly new forward and
 +	 * back clusters.
 +	 */
 +	i = back + forw + 1;
 +	if (i > fs->e2fs_contigsumsize)
 +		i = fs->e2fs_contigsumsize;
 +	sump[i] += cnt;
 +	if (back > 0)
 +		sump[back] -= cnt;
 +	if (forw > 0)
 +		sump[forw] -= cnt;
 +
 +	/* Update cluster summary information. */
 +	lp = &sump[fs->e2fs_contigsumsize];
 +	for (i = fs->e2fs_contigsumsize; i > 0; i--)
 +		if (*lp-- > 0)
 +			break;
 +	fs->e2fs_maxcluster[cg] = i;
 +}
 
 Modified: stable/9/sys/fs/ext2fs/ext2_vfsops.c
 ==============================================================================
 --- stable/9/sys/fs/ext2fs/ext2_vfsops.c	Thu Jan  5 01:25:47 2012	(r229548)
 +++ stable/9/sys/fs/ext2fs/ext2_vfsops.c	Thu Jan  5 01:35:01 2012	(r229549)
 @@ -349,7 +349,7 @@ compute_sb_data(struct vnode *devvp, str
  	}
  	fs->e2fs_ipb = fs->e2fs_bsize / EXT2_INODE_SIZE(fs);
  	fs->e2fs_itpg = fs->e2fs_ipg /fs->e2fs_ipb;
 -	fs->e2fs_descpb = fs->e2fs_bsize / sizeof (struct ext2_gd);
 +	fs->e2fs_descpb = fs->e2fs_bsize / sizeof(struct ext2_gd);
  	/* s_resuid / s_resgid ? */
  	fs->e2fs_gcount = (es->e2fs_bcount - es->e2fs_first_dblock +
  	    EXT2_BLOCKS_PER_GROUP(fs) - 1) / EXT2_BLOCKS_PER_GROUP(fs);
 @@ -405,7 +405,7 @@ compute_sb_data(struct vnode *devvp, str
   * Things to do to update the mount:
   *	1) invalidate all cached meta-data.
   *	2) re-read superblock from disk.
 - *	3) re-read summary information from disk.
 + *	3) invalidate all cluster summary information.
   *	4) invalidate all inactive vnodes.
   *	5) invalidate all cached file data.
   *	6) re-read inode data for all active vnodes.
 @@ -419,7 +419,9 @@ ext2_reload(struct mount *mp, struct thr
  	struct buf *bp;
  	struct ext2fs *es;
  	struct m_ext2fs *fs;
 -	int error;
 +	struct csum *sump;
 +	int error, i;
 +	int32_t *lp;
  
  	if ((mp->mnt_flag & MNT_RDONLY) == 0)
  		return (EINVAL);
 @@ -456,6 +458,19 @@ ext2_reload(struct mount *mp, struct thr
  #endif
  	brelse(bp);
  
 +	/*
 +	 * Step 3: invalidate all cluster summary information.
 +	 */
 +	if (fs->e2fs_contigsumsize > 0) {
 +		lp = fs->e2fs_maxcluster;
 +		sump = fs->e2fs_clustersum;
 +		for (i = 0; i < fs->e2fs_gcount; i++, sump++) {
 +			*lp++ = fs->e2fs_contigsumsize;
 +			sump->cs_init = 0;
 +			bzero(sump->cs_sum, fs->e2fs_contigsumsize + 1);
 +		}
 +	}
 +
  loop:
  	MNT_ILOCK(mp);
  	MNT_VNODE_FOREACH(vp, mp, mvp) {
 @@ -511,8 +526,11 @@ ext2_mountfs(struct vnode *devvp, struct
  	struct cdev *dev = devvp->v_rdev;
  	struct g_consumer *cp;
  	struct bufobj *bo;
 +	struct csum *sump;
  	int error;
  	int ronly;
 +	int i, size;
 +	int32_t *lp;
  
  	ronly = vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0);
  	/* XXX: use VOP_ACESS to check FS perms */
 @@ -565,8 +583,7 @@ ext2_mountfs(struct vnode *devvp, struct
  			goto out;
  		}
  	}
 -	ump = malloc(sizeof *ump, M_EXT2MNT, M_WAITOK);
 -	bzero((caddr_t)ump, sizeof *ump);
 +	ump = malloc(sizeof(*ump), M_EXT2MNT, M_WAITOK | M_ZERO);
  
  	/*
  	 * I don't know whether this is the right strategy. Note that
 @@ -582,6 +599,33 @@ ext2_mountfs(struct vnode *devvp, struct
  	if ((error = compute_sb_data(devvp, ump->um_e2fs->e2fs, ump->um_e2fs)))
  		goto out;
  
 +	/*
 +	 * Calculate the maximum contiguous blocks and size of cluster summary
 +	 * array.  In FFS this is done by newfs; however, the superblock 
 +	 * in ext2fs doesn't have these variables, so we can calculate 
 +	 * them here.
 +	 */
 +	ump->um_e2fs->e2fs_maxcontig = MAX(1, MAXPHYS / ump->um_e2fs->e2fs_bsize);
 +	if (ump->um_e2fs->e2fs_maxcontig > 0)
 +		ump->um_e2fs->e2fs_contigsumsize =
 +		    MIN(ump->um_e2fs->e2fs_maxcontig, EXT2_MAXCONTIG);
 +	else
 +		ump->um_e2fs->e2fs_contigsumsize = 0;
 +	if (ump->um_e2fs->e2fs_contigsumsize > 0) {
 +		size = ump->um_e2fs->e2fs_gcount * sizeof(int32_t);
 +		ump->um_e2fs->e2fs_maxcluster = malloc(size, M_EXT2MNT, M_WAITOK);
 +		size = ump->um_e2fs->e2fs_gcount * sizeof(struct csum);
 +		ump->um_e2fs->e2fs_clustersum = malloc(size, M_EXT2MNT, M_WAITOK);
 +		lp = ump->um_e2fs->e2fs_maxcluster;
 +		sump = ump->um_e2fs->e2fs_clustersum;
 +		for (i = 0; i < ump->um_e2fs->e2fs_gcount; i++, sump++) {
 +			*lp++ = ump->um_e2fs->e2fs_contigsumsize;
 +			sump->cs_init = 0;
 +			sump->cs_sum = malloc((ump->um_e2fs->e2fs_contigsumsize + 1) *
 +			    sizeof(int32_t), M_EXT2MNT, M_WAITOK | M_ZERO);
 +		}
 +	}
 +
  	brelse(bp);
  	bp = NULL;
  	fs = ump->um_e2fs;
 @@ -656,7 +700,8 @@ ext2_unmount(struct mount *mp, int mntfl
  {
  	struct ext2mount *ump;
  	struct m_ext2fs *fs;
 -	int error, flags, ronly;
 +	struct csum *sump;
 +	int error, flags, i, ronly;
  
  	flags = 0;
  	if (mntflags & MNT_FORCE) {
 @@ -681,6 +726,11 @@ ext2_unmount(struct mount *mp, int mntfl
  	g_topology_unlock();
  	PICKUP_GIANT();
  	vrele(ump->um_devvp);
 +	sump = fs->e2fs_clustersum;
 +	for (i = 0; i < fs->e2fs_gcount; i++, sump++)
 +		free(sump->cs_sum, M_EXT2MNT);
 +	free(fs->e2fs_clustersum, M_EXT2MNT);
 +	free(fs->e2fs_maxcluster, M_EXT2MNT);
  	free(fs->e2fs_gd, M_EXT2MNT);
  	free(fs->e2fs_contigdirs, M_EXT2MNT);
  	free(fs->e2fs, M_EXT2MNT);
 @@ -927,7 +977,7 @@ ext2_vget(struct mount *mp, ino_t ino, i
  	 */
  	if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) {
  		used_blocks = (ip->i_size+fs->e2fs_bsize-1) / fs->e2fs_bsize;
 -		for(i = used_blocks; i < EXT2_NDIR_BLOCKS; i++)
 +		for (i = used_blocks; i < EXT2_NDIR_BLOCKS; i++)
  			ip->i_db[i] = 0;
  	}
  /*
 
 Modified: stable/9/sys/fs/ext2fs/ext2_vnops.c
 ==============================================================================
 --- stable/9/sys/fs/ext2fs/ext2_vnops.c	Thu Jan  5 01:25:47 2012	(r229548)
 +++ stable/9/sys/fs/ext2fs/ext2_vnops.c	Thu Jan  5 01:35:01 2012	(r229549)
 @@ -64,9 +64,13 @@
  #include <sys/file.h>
  
  #include <vm/vm.h>
 +#include <vm/vm_page.h>
 +#include <vm/vm_object.h>
  #include <vm/vm_extern.h>
  #include <vm/vnode_pager.h>
  
 +#include "opt_directio.h"
 +
  #include <fs/fifofs/fifo.h>
  
  #include <ufs/ufs/dir.h>
 @@ -159,8 +163,6 @@ struct vop_vector ext2_fifoops = {
  	.vop_vptofh =		ext2_vptofh,
  };
  
 -#include <fs/ext2fs/ext2_readwrite.c>
 -
  /*
   * A virgin directory (no blushing please).
   * Note that the type and namlen fields are reversed relative to ext2.
 @@ -1078,7 +1080,7 @@ abortit:
  			dp->i_nlink--;
  			dp->i_flag |= IN_CHANGE;
  			error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf,
 -				sizeof (struct dirtemplate), (off_t)0,
 +				sizeof(struct dirtemplate), (off_t)0,
  				UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK,
  				tcnp->cn_cred, NOCRED, NULL, NULL);
  			if (error == 0) {
 @@ -1093,7 +1095,7 @@ abortit:
  					dirbuf.dotdot_ino = newparent;
  					(void) vn_rdwr(UIO_WRITE, fvp,
  					    (caddr_t)&dirbuf,
 -					    sizeof (struct dirtemplate),
 +					    sizeof(struct dirtemplate),
  					    (off_t)0, UIO_SYSSPACE,
  					    IO_NODELOCKED | IO_SYNC |
  					    IO_NOMACCHECK, tcnp->cn_cred,
 @@ -1231,7 +1233,7 @@ ext2_mkdir(ap)
  #define DIRBLKSIZ  VTOI(dvp)->i_e2fs->e2fs_bsize
  	dirtemplate.dotdot_reclen = DIRBLKSIZ - 12;
  	error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)&dirtemplate,
 -	    sizeof (dirtemplate), (off_t)0, UIO_SYSSPACE,
 +	    sizeof(dirtemplate), (off_t)0, UIO_SYSSPACE,
  	    IO_NODELOCKED | IO_SYNC | IO_NOMACCHECK, cnp->cn_cred, NOCRED,
  	    NULL, NULL);
  	if (error) {
 @@ -1675,3 +1677,328 @@ bad:
  	vput(tvp);
  	return (error);
  }
 +
 +/*
 + * Vnode op for reading.
 + */
 +static int
 +ext2_read(ap)
 +	struct vop_read_args /* {
 +		struct vnode *a_vp;
 +		struct uio *a_uio;
 +		int a_ioflag;
 +		struct ucred *a_cred;
 +	} */ *ap;
 +{
 +	struct vnode *vp;
 +	struct inode *ip;
 +	struct uio *uio;
 +	struct m_ext2fs *fs;
 +	struct buf *bp;
 +	daddr_t lbn, nextlbn;
 +	off_t bytesinfile;
 +	long size, xfersize, blkoffset;
 +	int error, orig_resid, seqcount;
 +	int ioflag;
 +
 +	vp = ap->a_vp;
 +	uio = ap->a_uio;
 +	ioflag = ap->a_ioflag;
 +
 +	seqcount = ap->a_ioflag >> IO_SEQSHIFT;
 +	ip = VTOI(vp);
 +
 +#ifdef INVARIANTS
 +	if (uio->uio_rw != UIO_READ)
 +		panic("%s: mode", "ext2_read");
 +
 +	if (vp->v_type == VLNK) {
 +		if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
 +			panic("%s: short symlink", "ext2_read");
 +	} else if (vp->v_type != VREG && vp->v_type != VDIR)
 +		panic("%s: type %d", "ext2_read", vp->v_type);
 +#endif
 +	orig_resid = uio->uio_resid;
 +	KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0"));
 +	if (orig_resid == 0)
 +		return (0);
 +	KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0"));
 +	fs = ip->i_e2fs;
 +	if (uio->uio_offset < ip->i_size &&
 +	    uio->uio_offset >= fs->e2fs_maxfilesize)
 +	    	return (EOVERFLOW);
 +
 +	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
 +		if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
 +			break;
 +		lbn = lblkno(fs, uio->uio_offset);
 +		nextlbn = lbn + 1;
 +		size = blksize(fs, ip, lbn);
 +		blkoffset = blkoff(fs, uio->uio_offset);
 +
 +		xfersize = fs->e2fs_fsize - blkoffset;
 +		if (uio->uio_resid < xfersize)
 +			xfersize = uio->uio_resid;
 +		if (bytesinfile < xfersize)
 +			xfersize = bytesinfile;
 +
 +		if (lblktosize(fs, nextlbn) >= ip->i_size)
 +			error = bread(vp, lbn, size, NOCRED, &bp);
 +		else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0)
 +			error = cluster_read(vp, ip->i_size, lbn, size,
 +			    NOCRED, blkoffset + uio->uio_resid, seqcount, &bp);
 +		else if (seqcount > 1) {
 +			int nextsize = blksize(fs, ip, nextlbn);
 +			error = breadn(vp, lbn,
 +			    size, &nextlbn, &nextsize, 1, NOCRED, &bp);
 +		} else
 +			error = bread(vp, lbn, size, NOCRED, &bp);
 +		if (error) {
 +			brelse(bp);
 +			bp = NULL;
 +			break;
 +		}
 +
 +		/*
 +		 * If IO_DIRECT then set B_DIRECT for the buffer.  This
 +		 * will cause us to attempt to release the buffer later on
 +		 * and will cause the buffer cache to attempt to free the
 +		 * underlying pages.
 +		 */
 +		if (ioflag & IO_DIRECT)
 +			bp->b_flags |= B_DIRECT;
 +
 +		/*
 +		 * We should only get non-zero b_resid when an I/O error
 +		 * has occurred, which should cause us to break above.
 +		 * However, if the short read did not cause an error,
 +		 * then we want to ensure that we do not uiomove bad
 +		 * or uninitialized data.
 +		 */
 +		size -= bp->b_resid;
 +		if (size < xfersize) {
 +			if (size == 0)
 +				break;
 +			xfersize = size;
 +		}
 +		error = uiomove((char *)bp->b_data + blkoffset,
 +  			(int)xfersize, uio);
 +		if (error)
 +			break;
 +
 +		if (ioflag & (IO_VMIO|IO_DIRECT)) {
 +			/*
 +			 * If it's VMIO or direct I/O, then we don't
 +			 * need the buf, mark it available for
 +			 * freeing. If it's non-direct VMIO, the VM has
 +			 * the data.
 +			 */
 +			bp->b_flags |= B_RELBUF;
 +			brelse(bp);
 +		} else {
 +			/*
 +			 * Otherwise let whoever
 +			 * made the request take care of
 +			 * freeing it. We just queue
 +			 * it onto another list.
 +			 */
 +			bqrelse(bp);
 +		}
 +	}
 +
 +	/* 
 +	 * This can only happen in the case of an error
 +	 * because the loop above resets bp to NULL on each iteration
 +	 * and on normal completion has not set a new value into it.
 +	 * so it must have come from a 'break' statement
 +	 */
 +	if (bp != NULL) {
 +		if (ioflag & (IO_VMIO|IO_DIRECT)) {
 +			bp->b_flags |= B_RELBUF;
 +			brelse(bp);
 +		} else {
 +			bqrelse(bp);
 +		}
 +	}
 +
 +	if ((error == 0 || uio->uio_resid != orig_resid) &&
 +	    (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
 +		ip->i_flag |= IN_ACCESS;
 +	return (error);
 +}
 +
 +/*
 + * Vnode op for writing.
 + */
 +static int
 +ext2_write(ap)
 +	struct vop_write_args /* {
 +		struct vnode *a_vp;
 +		struct uio *a_uio;
 +		int a_ioflag;
 +		struct ucred *a_cred;
 +	} */ *ap;
 +{
 +	struct vnode *vp;
 +	struct uio *uio;
 +	struct inode *ip;
 +	struct m_ext2fs *fs;
 +	struct buf *bp;
 +	daddr_t lbn;
 +	off_t osize;
 +	int blkoffset, error, flags, ioflag, resid, size, seqcount, xfersize;
 +
 +	ioflag = ap->a_ioflag;
 +	uio = ap->a_uio;
 +	vp = ap->a_vp;
 +
 +	seqcount = ioflag >> IO_SEQSHIFT;
 +	ip = VTOI(vp);
 +
 +#ifdef INVARIANTS
 +	if (uio->uio_rw != UIO_WRITE)
 +		panic("%s: mode", "ext2_write");
 +#endif
 +
 +	switch (vp->v_type) {
 +	case VREG:
 +		if (ioflag & IO_APPEND)
 +			uio->uio_offset = ip->i_size;
 +		if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
 +			return (EPERM);
 +		/* FALLTHROUGH */
 +	case VLNK:
 +		break;
 +	case VDIR:
 +		/* XXX differs from ffs -- this is called from ext2_mkdir(). */
 +		if ((ioflag & IO_SYNC) == 0)
 +		panic("ext2_write: nonsync dir write");
 +		break;
 +	default:
 +		panic("ext2_write: type %p %d (%jd,%jd)", (void *)vp,
 +		    vp->v_type, (intmax_t)uio->uio_offset,
 +		    (intmax_t)uio->uio_resid);
 +	}
 +
 +	KASSERT(uio->uio_resid >= 0, ("ext2_write: uio->uio_resid < 0"));
 +	KASSERT(uio->uio_offset >= 0, ("ext2_write: uio->uio_offset < 0"));
 +	fs = ip->i_e2fs;
 +	if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->e2fs_maxfilesize)
 +		return (EFBIG);
 +	/*
 +	 * Maybe this should be above the vnode op call, but so long as
 +	 * file servers have no limits, I don't think it matters.
 +	 */
 +	if (vn_rlimit_fsize(vp, uio, uio->uio_td))
 +		return (EFBIG);
 +
 +	resid = uio->uio_resid;
 +	osize = ip->i_size;
 +	if (seqcount > BA_SEQMAX)
 +		flags = BA_SEQMAX << BA_SEQSHIFT;
 +	else
 +		flags = seqcount << BA_SEQSHIFT;
 +	if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
 +		flags |= IO_SYNC;
 +
 +	for (error = 0; uio->uio_resid > 0;) {
 +		lbn = lblkno(fs, uio->uio_offset);
 +		blkoffset = blkoff(fs, uio->uio_offset);
 +		xfersize = fs->e2fs_fsize - blkoffset;
 +		if (uio->uio_resid < xfersize)
 +			xfersize = uio->uio_resid;
 +		if (uio->uio_offset + xfersize > ip->i_size)
 +			vnode_pager_setsize(vp, uio->uio_offset + xfersize);
 +
 +                /*
 +		 * We must perform a read-before-write if the transfer size
 +		 * does not cover the entire buffer.
 +                 */
 +		if (fs->e2fs_bsize > xfersize)
 +			flags |= BA_CLRBUF;
 +		else
 +			flags &= ~BA_CLRBUF;
 +		error = ext2_balloc(ip, lbn, blkoffset + xfersize,
 +		    ap->a_cred, &bp, flags);
 +		if (error != 0)
 +			break;
 +
 +		/*
 +		 * If the buffer is not valid and we did not clear garbage
 +		 * out above, we have to do so here even though the write
 +		 * covers the entire buffer in order to avoid a mmap()/write
 +		 * race where another process may see the garbage prior to
 +		 * the uiomove() for a write replacing it.
 
 *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
 _______________________________________________
 svn-src-all@freebsd.org mailing list
 http://lists.freebsd.org/mailman/listinfo/svn-src-all
 To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
 
State-Changed-From-To: patched->closed 
State-Changed-By: pfg 
State-Changed-When: Thu Jan 5 01:52:22 UTC 2012 
State-Changed-Why:  
Committed and MFC'd. 

http://www.freebsd.org/cgi/query-pr.cgi?pr=159233 
>Unformatted:
