From chris@gouda.netmonger.net Mon Nov  8 11:55:52 1999
Return-Path: <chris@gouda.netmonger.net>
Received: from gouda.netmonger.net (gouda.netmonger.net [167.206.208.2])
	by hub.freebsd.org (Postfix) with ESMTP id 3383114BF7
	for <FreeBSD-gnats-submit@freebsd.org>; Mon,  8 Nov 1999 11:55:46 -0800 (PST)
	(envelope-from chris@gouda.netmonger.net)
Received: (from chris@localhost)
	by gouda.netmonger.net (8.9.3/8.9.3) id OAA11086;
	Mon, 8 Nov 1999 14:55:41 -0500 (EST)
	(envelope-from chris)
Message-Id: <199911081955.OAA11086@gouda.netmonger.net>
Date: Mon, 8 Nov 1999 14:55:41 -0500 (EST)
From: chris@netmonger.net
Sender: chris@gouda.netmonger.net
Reply-To: chris@netmonger.net
To: FreeBSD-gnats-submit@freebsd.org
Subject: tail breaks on large files
X-Send-Pr-Version: 3.2

>Number:         14786
>Category:       bin
>Synopsis:       [PATCH] tail breaks on large files
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    dwmalone
>State:          closed
>Quarter:        
>Keywords:       
>Date-Required:  
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Mon Nov  8 12:00:01 PST 1999
>Closed-Date:    Mon Apr 23 07:18:42 PDT 2001
>Last-Modified:  Mon Apr 23 07:19:02 PDT 2001
>Originator:     Christopher Masto
>Release:        FreeBSD 4.0-CURRENT i386
>Organization:
>Environment:


>Description:

mmap() doesn't work on files >2GB as documented in its man page.

The following code is in /usr/src/usr.bin/tail/forward.c:

        if ((start = mmap(NULL, (size_t)size,
            PROT_READ, MAP_SHARED, fileno(fp), (off_t)0)) == MAP_FAILED) {
                ierr();
                return;
        }

Unfortunately, after returning from the rlines function which contains
that code, tail attempts to display any data that has come in "since
we read the file".  In this case, the file has not been read and tail
spits out the entire contents of the file.

>How-To-Repeat:

$ tail <file-larger-than-2GB>

>Fix:
	
Probably one of:

  * Detect sizes > mmap()'s limit and exit with an appropriate message.
  * Dump out when mmap() fails.
  * Use a more complicated algorithm that can handle large files.

>Release-Note:
>Audit-Trail:

From: David Malone <dwmalone@maths.tcd.ie>
To: freebsd-gnats-submit@FreeBSD.org, chris@netmonger.net
Cc:  
Subject: Re: bin/14786: tail breaks on large files
Date: Tue, 11 Jan 2000 13:39:17 +0000

 I've produced a patch which works by mapping 4MB chunks of the file
 at a time, instead of trying to map the whole file. It seems to work
 reasonably well, and seems to produce the same results as the original
 version of tail - other than on large files where it works correctly.
 
 "tail -r" also suffers from similar problems, which I could produce
 a patch for if people think this is the correct way to do things.
 
 	David.
 
 --- forward.c.orig	Mon Dec 13 09:40:31 1999
 +++ forward.c	Mon Dec 13 01:11:42 1999
 @@ -207,9 +207,11 @@
  	long off;
  	struct stat *sbp;
  {
 -	register off_t size;
 +	register off_t size, curoff;
  	register char *p;
  	char *start;
 +	off_t mapoff;
 +	size_t maplen;
  
  	if (!(size = sbp->st_size))
  		return;
 @@ -220,27 +222,59 @@
  		return;
  	}
  
 -	if ((start = mmap(NULL, (size_t)size,
 -	    PROT_READ, MAP_SHARED, fileno(fp), (off_t)0)) == MAP_FAILED) {
 -		ierr();
 -		return;
 +	start = NULL;
 +	for (curoff = size - 1, mapoff = size; curoff >= 0; curoff--) {
 +		if (curoff < mapoff) {
 +			if (start && munmap(start, maplen)) {
 +				ierr();
 +				return;
 +			}
 +			mapoff = curoff & (~((4<<20)-1));
 +			maplen = curoff - mapoff + 1;
 +			if ((start = mmap(NULL, maplen, PROT_READ,
 +			    MAP_SHARED, fileno(fp), mapoff)) == MAP_FAILED) {
 +				ierr();
 +				return;
 +			}
 +		}
 +		p = start + (curoff - mapoff);
 +		/* Last char is special, ignore whether newline or not. */
 +		if (*p == '\n' && curoff != size -1 && !--off) {
 +			curoff++;
 +			break;
 +		}
  	}
  
 -	/* Last char is special, ignore whether newline or not. */
 -	for (p = start + size - 1; --size;)
 -		if (*--p == '\n' && !--off) {
 -			++p;
 -			break;
 +	if (curoff < 0)
 +		curoff = 0;
 +
 +	while (curoff != size) {
 +		if (curoff < mapoff || curoff >= mapoff + maplen) {
 +			if (start && munmap(start, maplen)) {
 +				ierr();
 +				return;
 +			}
 +			mapoff = curoff & (~((4<<20)-1));
 +			maplen = 4<<20;
 +			if (mapoff + maplen > size)
 +				maplen = size - mapoff;
 +			if ((start = mmap(NULL, maplen, PROT_READ,
 +			    MAP_SHARED, fileno(fp), mapoff)) == MAP_FAILED) {
 +				ierr();
 +				return;
 +			}
  		}
 +		p = start + (curoff - mapoff);
 +		WR(p, maplen - (curoff - mapoff));
 +		curoff += maplen - (curoff - mapoff);
 +	}
  
  	/* Set the file pointer to reflect the length displayed. */
 -	size = sbp->st_size - size;
 -	WR(p, size);
  	if (fseek(fp, (long)sbp->st_size, SEEK_SET) == -1) {
  		ierr();
  		return;
  	}
 -	if (munmap(start, (size_t)sbp->st_size)) {
 +	if (start && munmap(start, maplen)) {
  		ierr();
  		return;
  	}
 
Responsible-Changed-From-To: freebsd-bugs->dwmalone 
Responsible-Changed-By: dwmalone 
Responsible-Changed-When: Tue Jul 11 04:45:34 PDT 2000 
Responsible-Changed-Why:  
Of interest to me. 

http://www.freebsd.org/cgi/query-pr.cgi?pr=14786 

From: David Malone <dwmalone@maths.tcd.ie>
To: Bryan Heitman <bryanh@communitech.net>
Cc: freebsd-gnats-submit@FreeBSD.org
Subject: Re: bin/14786: [PATCH] tail breaks on large files
Date: Sat, 16 Dec 2000 12:38:22 +0000

 > I've tested this with the patched forward.c file which i have attached and
 > it's still not working.
 
 It was a stupid problem - I'd fixed the problem, but left in a
 check that the file had to be smaller than 2GB! Try the following,
 it seemed to work on a 13GB file for me.
 
 	David.
 
 
 Index: forward.c
 ===================================================================
 RCS file: /cvs/FreeBSD-CVS/src/usr.bin/tail/forward.c,v
 retrieving revision 1.16
 diff -u -r1.16 forward.c
 --- forward.c	2000/12/02 19:10:12	1.16
 +++ forward.c	2000/12/16 12:30:32
 @@ -269,40 +269,68 @@
  	long off;
  	struct stat *sbp;
  {
 -	register off_t size;
 +	off_t size, curoff;
  	register char *p;
  	char *start;
 +	off_t mapoff;
 +	size_t maplen;
  
  	if (!(size = sbp->st_size))
  		return;
  
 -	if (size > SIZE_T_MAX) {
 -		errno = EFBIG;
 -		ierr();
 -		return;
 +	start = NULL;
 +	for (curoff = size - 1, mapoff = size; curoff >= 0; curoff--) {
 +		if (curoff < mapoff) {
 +			if (start && munmap(start, maplen)) {
 +				ierr();
 +				return;
 +			}
 +			mapoff = curoff & (~((4<<20)-1));
 +			maplen = curoff - mapoff + 1;
 +			if ((start = mmap(NULL, maplen, PROT_READ,
 +			    MAP_SHARED, fileno(fp), mapoff)) == MAP_FAILED) {
 +				ierr();
 +				return;
 +			}
 +		}
 +		p = start + (curoff - mapoff);
 +		/* Last char is special, ignore whether newline or not. */
 +		if (*p == '\n' && curoff != size -1 && !--off) {
 +			curoff++;
 +			break;
 +		}
  	}
  
 -	if ((start = mmap(NULL, (size_t)size,
 -	    PROT_READ, MAP_SHARED, fileno(fp), (off_t)0)) == MAP_FAILED) {
 -		ierr();
 -		return;
 -	}
 +	if (curoff < 0)
 +		curoff = 0;
  
 -	/* Last char is special, ignore whether newline or not. */
 -	for (p = start + size - 1; --size;)
 -		if (*--p == '\n' && !--off) {
 -			++p;
 -			break;
 +	while (curoff != size) {
 +		if (curoff < mapoff || curoff >= mapoff + maplen) {
 +			if (start && munmap(start, maplen)) {
 +				ierr();
 +				return;
 +			}
 +			mapoff = curoff & (~((4<<20)-1));
 +			maplen = 4<<20;
 +			if (mapoff + maplen > size)
 +				maplen = size - mapoff;
 +			if ((start = mmap(NULL, maplen, PROT_READ,
 +			    MAP_SHARED, fileno(fp), mapoff)) == MAP_FAILED) {
 +				ierr();
 +				return;
 +			}
  		}
 +		p = start + (curoff - mapoff);
 +		WR(p, maplen - (curoff - mapoff));
 +		curoff += maplen - (curoff - mapoff);
 +	}
  
  	/* Set the file pointer to reflect the length displayed. */
 -	size = sbp->st_size - size;
 -	WR(p, size);
 -	if (fseek(fp, (long)sbp->st_size, SEEK_SET) == -1) {
 +	if (fseeko(fp, sbp->st_size, SEEK_SET) == -1) {
  		ierr();
  		return;
  	}
 -	if (munmap(start, (size_t)sbp->st_size)) {
 +	if (start != NULL && munmap(start, maplen)) {
  		ierr();
  		return;
  	}
 
State-Changed-From-To: open->closed 
State-Changed-By: dwmalone 
State-Changed-When: Mon Apr 23 07:18:42 PDT 2001 
State-Changed-Why:  
Fixed in -current and 4.3. 

http://www.freebsd.org/cgi/query-pr.cgi?pr=14786 
>Unformatted:
