From nobody@FreeBSD.org  Sat Feb 13 04:45:31 2010
Return-Path: <nobody@FreeBSD.org>
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id 6EAFF106566C
	for <freebsd-gnats-submit@FreeBSD.org>; Sat, 13 Feb 2010 04:45:31 +0000 (UTC)
	(envelope-from nobody@FreeBSD.org)
Received: from www.freebsd.org (www.freebsd.org [IPv6:2001:4f8:fff6::21])
	by mx1.freebsd.org (Postfix) with ESMTP id 600C58FC0C
	for <freebsd-gnats-submit@FreeBSD.org>; Sat, 13 Feb 2010 04:45:31 +0000 (UTC)
Received: from www.freebsd.org (localhost [127.0.0.1])
	by www.freebsd.org (8.14.3/8.14.3) with ESMTP id o1D4jVva075184
	for <freebsd-gnats-submit@FreeBSD.org>; Sat, 13 Feb 2010 04:45:31 GMT
	(envelope-from nobody@www.freebsd.org)
Received: (from nobody@localhost)
	by www.freebsd.org (8.14.3/8.14.3/Submit) id o1D4jUKm075183;
	Sat, 13 Feb 2010 04:45:31 GMT
	(envelope-from nobody)
Message-Id: <201002130445.o1D4jUKm075183@www.freebsd.org>
Date: Sat, 13 Feb 2010 04:45:31 GMT
From: Guy Harris <guy@alum.mit.edu>
To: freebsd-gnats-submit@FreeBSD.org
Subject: non-blocking BPF reads return -1/EWOULDBLOCK until the store buffer fills up
X-Send-Pr-Version: www-3.1
X-GNATS-Notify:

>Number:         143855
>Category:       kern
>Synopsis:       [bpf] [patch] non-blocking BPF reads return -1/EWOULDBLOCK until the store buffer fills up
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    jkim
>State:          closed
>Quarter:        
>Keywords:       
>Date-Required:  
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Sat Feb 13 04:50:01 UTC 2010
>Closed-Date:    Sat Feb 20 03:24:59 UTC 2010
>Last-Modified:  Mon Mar 22 20:30:10 UTC 2010
>Originator:     Guy Harris
>Release:        7.0-RELEASE
>Organization:
>Environment:
FreeBSD gharris-freebsd.localdomain 7.0-RELEASE FreeBSD 7.0-Release #0: Sun Feb 24 19:59:52 UTC 2000     root@logan.cse.buffalo.edu:/usr/obj/usr/src/sys/GENERIC  i386
>Description:
Non-blocking reads from a BPF device not in immediate mode will not rotate the buffers even if there's data in the store buffer but not in the hold buffer, so, until the store buffer fills up, the reads will return -1 and set errno to EWOULDBLOCK.
>How-To-Repeat:
Compile the following (C++) program, run it on an adapter with the filter "icmp" on a reasonably quiet network, and then, on the same machine, ping some host the pings to which will go out on the same network.  Note that the program doesn't report any packets having been seen.

#include <iostream>
#include <stdio.h>
#include <string>
#include <vector>
#include <errno.h>
#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/time.h>
#include <net/bpf.h>
#include <pcap.h>

pcap_t* create(const std::string& name, 
			   const std::string& pcapFilter, 
			   uint32_t snapLen, bool promisc);
bool capture(pcap_t * pcapSession);
void close(pcap_t* pcapSession);


int main(int argc, char** argv)
{
	if (argc != 3)
	{
		std::cerr << "Usage: libpcaptest <interface> <filter>" 
               << std::endl;
		return 1;
	}
	
	std::string name(argv[1]), filter(argv[2]);
	std::cout << "Capturing from '" << name << " with filter "
       << filter << std::endl;
	 
	pcap_t * pcapSession = create(name, filter, 128, true);
	capture(pcapSession);
	close(pcapSession);
	return 0;
}

/**
This is the callback
**/
void test_pcap_handler(u_char* user, const struct pcap_pkthdr* header, 
					   const u_char* pkt_data)
{
	std::cout << "Packet captured" << std::endl;
}

/**
Temporary used since on Windows they forgot to sign as 'const char*'
the filter string provided to pcap_compile...
**/
void duplicateFilterString(const std::string& pcapFilter, 
    std::vector<char>& dupFilter)
{
	dupFilter.clear();
	dupFilter.resize(pcapFilter.size()+1, 0);
	
	for (uint32_t i=0; i<pcapFilter.size(); ++i)
		dupFilter[i] = pcapFilter[i];
}

void close(pcap_t* pcapSession)
{
	if (pcapSession)
	{
		pcap_close(pcapSession);
	}
}
		
		
pcap_t* create(const std::string& name, 
			   const std::string& pcapFilter, 
			   uint32_t snapLen, bool promisc)
{
	char errbuf[PCAP_ERRBUF_SIZE];
	pcap_t* pcapSession;
		
	if ((pcapSession = pcap_open_live(name.c_str(), 
             snapLen, promisc ? 1 : 0, 1000,	errbuf)) == NULL)
	{
		std::cerr << "Failed pcap_open_live because <"
                <<errbuf<<">" << std::endl;
		return NULL;
	}
			
	// compile the filter if it's been supplied or snapLen is provided
	if (pcapFilter.empty()==false || snapLen<65535)
	{
		// get netmask
		bpf_u_int32 pcapNetaddr, pcapMask;
		pcap_lookupnet(name.c_str(), &pcapNetaddr, &pcapMask, errbuf);
				
		struct bpf_program pcapFilterProgram;		
		std::vector<char> filterDup;
		duplicateFilterString(pcapFilter, filterDup);
				
		if (pcap_compile(pcapSession, &pcapFilterProgram, 
                    &filterDup[0], 1, pcapMask) == -1) 
		{
			std::string error = pcap_geterr(pcapSession);
			pcap_close(pcapSession);
			std::cerr << "Failed pcap_compile because <"
                       <<errbuf<<">" << std::endl;
			return NULL;
		}	
				
		if (pcap_setfilter(pcapSession, &pcapFilterProgram) == -1)
		{
			std::string error = pcap_geterr(pcapSession);
			pcap_freecode(&pcapFilterProgram);
			pcap_close(pcapSession);
			std::cerr << "Failed pcap_setfilter because <"
                       <<errbuf<<">" << std::endl;
			return NULL;
		}
				
		pcap_freecode(&pcapFilterProgram);
	}
			
	// set session in non blocking mode
	if (pcap_setnonblock(pcapSession, 1, errbuf)!=0)
	{
		pcap_close(pcapSession);
		std::cerr << "Failed pcap_setnonblock because <"
               <<errbuf<<">" << std::endl;
		return NULL;
	}

	/*
		Enable this for immediate delivery of packets through callback.
		
	uint32_t v = 1;
	if (ioctl(pcap_fileno(pcapSession), BIOCIMMEDIATE, &v) < 0) {
		pcap_close(pcapSession);
		std::cerr << "Failed ioctl BIOCIMMEDIATE" << std::endl;
		return NULL;
	}
	*/
	
	int dlt;
	const char *dlt_name;
	dlt = pcap_datalink(pcapSession);
	dlt_name = pcap_datalink_val_to_name(dlt);
	if (dlt_name == NULL) {
		(void)fprintf(stderr, 
"listening on %s, link-type %u, capture size %u bytes\n",
					  name.c_str(), dlt, snapLen);
	} else {
		(void)fprintf(stderr, 
"listening on %s, link-type %s (%s), capture size %u bytes\n",
					  name.c_str(), dlt_name,
					  pcap_datalink_val_to_description(dlt), snapLen);
	}
			
	return pcapSession;
}
		
bool capture(pcap_t * pcapSession)
{
	struct pcap_stat pcapStats;
		
	while (true)
	{
		int32_t ret = pcap_dispatch(pcapSession, 100, 
								test_pcap_handler, (u_char*)NULL);
		std::cout << "Read " << ret << " packets" << std::endl;
		if (pcap_stats(pcapSession, &pcapStats) != 0)
		{
				std::string error = pcap_geterr(pcapSession);
				std::cerr << "Failed pcap_setnonblock because <"
                               <<error<<">" << std::endl;
				return false;
		}
		std::cout << "ReceivedPackets " << pcapStats.ps_recv << 
				" DroppedPackets " << pcapStats.ps_drop << 
				" I/F DroppedPackets " << pcapStats.ps_ifdrop << std::endl;
				
				
		if (ret==-1)
		{
			std::string error = pcap_geterr(pcapSession);
			std::cerr << "Failed pcap_dispatch because <"<<error<<">" << std::endl;
			return NULL;
		}
				
				
		sleep(5);
	} 
	return true;
}

>Fix:


Patch attached with submission follows:

Index: bpf.c
===================================================================
--- bpf.c	(revision 68984)
+++ bpf.c	(working copy)
@@ -721,9 +721,12 @@
 	 * have arrived to fill the store buffer.
 	 */
 	while (d->bd_hbuf == 0) {
-		if (d->bd_immediate && d->bd_slen != 0) {
+		if ((d->bd_immediate || (ioflag & IO_NDELAY))
+		    && d->bd_slen != 0) {
 			/*
-			 * A packet(s) either arrived since the previous
+			 * We're in immediate mode, or are reading
+			 * in non-blocking mode, and a packet(s)
+			 * either arrived since the previous
 			 * read or arrived while we were asleep.
 			 * Rotate the buffers and return what's here.
 			 */


>Release-Note:
>Audit-Trail:
Responsible-Changed-From-To: freebsd-bugs->freebsd-net 
Responsible-Changed-By: linimon 
Responsible-Changed-When: Sat Feb 13 05:07:35 UTC 2010 
Responsible-Changed-Why:  
Over to maintainer(s). 

http://www.freebsd.org/cgi/query-pr.cgi?pr=143855 
Responsible-Changed-From-To: freebsd-net->jkim 
Responsible-Changed-By: jkim 
Responsible-Changed-When: Fri Feb 19 22:29:28 UTC 2010 
Responsible-Changed-Why:  
I'll take it. 

http://www.freebsd.org/cgi/query-pr.cgi?pr=143855 

From: dfilter@FreeBSD.ORG (dfilter service)
To: bug-followup@FreeBSD.org
Cc:  
Subject: Re: kern/143855: commit references a PR
Date: Sat, 20 Feb 2010 00:19:34 +0000 (UTC)

 Author: jkim
 Date: Sat Feb 20 00:19:21 2010
 New Revision: 204105
 URL: http://svn.freebsd.org/changeset/base/204105
 
 Log:
   Return partially filled buffer for non-blocking read(2)
   in non-immediate mode.
   
   PR:		kern/143855
 
 Modified:
   head/sys/net/bpf.c
 
 Modified: head/sys/net/bpf.c
 ==============================================================================
 --- head/sys/net/bpf.c	Sat Feb 20 00:16:44 2010	(r204104)
 +++ head/sys/net/bpf.c	Sat Feb 20 00:19:21 2010	(r204105)
 @@ -664,8 +664,9 @@ static	int
  bpfread(struct cdev *dev, struct uio *uio, int ioflag)
  {
  	struct bpf_d *d;
 -	int timed_out;
  	int error;
 +	int non_block;
 +	int timed_out;
  
  	error = devfs_get_cdevpriv((void **)&d);
  	if (error != 0)
 @@ -678,6 +679,8 @@ bpfread(struct cdev *dev, struct uio *ui
  	if (uio->uio_resid != d->bd_bufsize)
  		return (EINVAL);
  
 +	non_block = ((ioflag & O_NONBLOCK) != 0);
 +
  	BPFD_LOCK(d);
  	d->bd_pid = curthread->td_proc->p_pid;
  	if (d->bd_bufmode != BPF_BUFMODE_BUFFER) {
 @@ -694,14 +697,20 @@ bpfread(struct cdev *dev, struct uio *ui
  	 * have arrived to fill the store buffer.
  	 */
  	while (d->bd_hbuf == NULL) {
 -		if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
 +		if (d->bd_slen != 0) {
  			/*
  			 * A packet(s) either arrived since the previous
  			 * read or arrived while we were asleep.
 -			 * Rotate the buffers and return what's here.
  			 */
 -			ROTATE_BUFFERS(d);
 -			break;
 +			if (d->bd_immediate || non_block || timed_out) {
 +				/*
 +				 * Rotate the buffers and return what's here
 +				 * if we are in immediate mode, non-blocking
 +				 * flag is set, or this descriptor timed out.
 +				 */
 +				ROTATE_BUFFERS(d);
 +				break;
 +			}
  		}
  
  		/*
 @@ -715,7 +724,7 @@ bpfread(struct cdev *dev, struct uio *ui
  			return (ENXIO);
  		}
  
 -		if (ioflag & O_NONBLOCK) {
 +		if (non_block) {
  			BPFD_UNLOCK(d);
  			return (EWOULDBLOCK);
  		}
 _______________________________________________
 svn-src-all@freebsd.org mailing list
 http://lists.freebsd.org/mailman/listinfo/svn-src-all
 To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
 
State-Changed-From-To: open->closed 
State-Changed-By: jkim 
State-Changed-When: Sat Feb 20 03:21:10 UTC 2010 
State-Changed-Why:  
Committed (a different version for HEAD).  Thanks! 
I will MFC it to stable/8 next week but I think we will miss 7.3-RELEASE. 

http://www.freebsd.org/cgi/query-pr.cgi?pr=143855 

From: dfilter@FreeBSD.ORG (dfilter service)
To: bug-followup@FreeBSD.org
Cc:  
Subject: Re: kern/143855: commit references a PR
Date: Fri, 26 Feb 2010 00:11:32 +0000 (UTC)

 Author: jkim
 Date: Fri Feb 26 00:11:17 2010
 New Revision: 204341
 URL: http://svn.freebsd.org/changeset/base/204341
 
 Log:
   MFC:	r204105
   
   Return partially filled buffer for non-blocking read(2)
   in non-immediate mode.
   
   PR:		kern/143855
   Submitted by:	Guy Harris (guy at alum dot mit dot edu)
 
 Modified:
   stable/8/sys/net/bpf.c
 Directory Properties:
   stable/8/sys/   (props changed)
   stable/8/sys/amd64/include/xen/   (props changed)
   stable/8/sys/cddl/contrib/opensolaris/   (props changed)
   stable/8/sys/contrib/dev/acpica/   (props changed)
   stable/8/sys/contrib/pf/   (props changed)
   stable/8/sys/dev/xen/xenpci/   (props changed)
   stable/8/sys/netinet/   (props changed)
 
 Modified: stable/8/sys/net/bpf.c
 ==============================================================================
 --- stable/8/sys/net/bpf.c	Thu Feb 25 22:44:23 2010	(r204340)
 +++ stable/8/sys/net/bpf.c	Fri Feb 26 00:11:17 2010	(r204341)
 @@ -661,8 +661,9 @@ static	int
  bpfread(struct cdev *dev, struct uio *uio, int ioflag)
  {
  	struct bpf_d *d;
 -	int timed_out;
  	int error;
 +	int non_block;
 +	int timed_out;
  
  	error = devfs_get_cdevpriv((void **)&d);
  	if (error != 0)
 @@ -675,6 +676,8 @@ bpfread(struct cdev *dev, struct uio *ui
  	if (uio->uio_resid != d->bd_bufsize)
  		return (EINVAL);
  
 +	non_block = ((ioflag & O_NONBLOCK) != 0);
 +
  	BPFD_LOCK(d);
  	d->bd_pid = curthread->td_proc->p_pid;
  	if (d->bd_bufmode != BPF_BUFMODE_BUFFER) {
 @@ -691,14 +694,20 @@ bpfread(struct cdev *dev, struct uio *ui
  	 * have arrived to fill the store buffer.
  	 */
  	while (d->bd_hbuf == NULL) {
 -		if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
 +		if (d->bd_slen != 0) {
  			/*
  			 * A packet(s) either arrived since the previous
  			 * read or arrived while we were asleep.
 -			 * Rotate the buffers and return what's here.
  			 */
 -			ROTATE_BUFFERS(d);
 -			break;
 +			if (d->bd_immediate || non_block || timed_out) {
 +				/*
 +				 * Rotate the buffers and return what's here
 +				 * if we are in immediate mode, non-blocking
 +				 * flag is set, or this descriptor timed out.
 +				 */
 +				ROTATE_BUFFERS(d);
 +				break;
 +			}
  		}
  
  		/*
 @@ -712,7 +721,7 @@ bpfread(struct cdev *dev, struct uio *ui
  			return (ENXIO);
  		}
  
 -		if (ioflag & O_NONBLOCK) {
 +		if (non_block) {
  			BPFD_UNLOCK(d);
  			return (EWOULDBLOCK);
  		}
 _______________________________________________
 svn-src-all@freebsd.org mailing list
 http://lists.freebsd.org/mailman/listinfo/svn-src-all
 To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
 

From: dfilter@FreeBSD.ORG (dfilter service)
To: bug-followup@FreeBSD.org
Cc:  
Subject: Re: kern/143855: commit references a PR
Date: Fri, 26 Feb 2010 00:12:56 +0000 (UTC)

 Author: jkim
 Date: Fri Feb 26 00:12:48 2010
 New Revision: 204342
 URL: http://svn.freebsd.org/changeset/base/204342
 
 Log:
   MFC:	r204105
   
   Return partially filled buffer for non-blocking read(2)
   in non-immediate mode.
   
   PR:		kern/143855
   Submitted by:	Guy Harris (guy at alum dot mit dot edu)
 
 Modified:
   stable/7/sys/net/bpf.c
 Directory Properties:
   stable/7/sys/   (props changed)
   stable/7/sys/cddl/contrib/opensolaris/   (props changed)
   stable/7/sys/contrib/dev/acpica/   (props changed)
   stable/7/sys/contrib/pf/   (props changed)
 
 Modified: stable/7/sys/net/bpf.c
 ==============================================================================
 --- stable/7/sys/net/bpf.c	Fri Feb 26 00:11:17 2010	(r204341)
 +++ stable/7/sys/net/bpf.c	Fri Feb 26 00:12:48 2010	(r204342)
 @@ -481,8 +481,9 @@ static	int
  bpfread(struct cdev *dev, struct uio *uio, int ioflag)
  {
  	struct bpf_d *d = dev->si_drv1;
 -	int timed_out;
  	int error;
 +	int non_block;
 +	int timed_out;
  
  	/*
  	 * Restrict application to use a buffer the same size as
 @@ -491,6 +492,8 @@ bpfread(struct cdev *dev, struct uio *ui
  	if (uio->uio_resid != d->bd_bufsize)
  		return (EINVAL);
  
 +	non_block = ((ioflag & O_NONBLOCK) != 0);
 +
  	BPFD_LOCK(d);
  	d->bd_pid = curthread->td_proc->p_pid;
  	if (d->bd_state == BPF_WAITING)
 @@ -503,14 +506,20 @@ bpfread(struct cdev *dev, struct uio *ui
  	 * have arrived to fill the store buffer.
  	 */
  	while (d->bd_hbuf == NULL) {
 -		if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
 +		if (d->bd_slen != 0) {
  			/*
  			 * A packet(s) either arrived since the previous
  			 * read or arrived while we were asleep.
 -			 * Rotate the buffers and return what's here.
  			 */
 -			ROTATE_BUFFERS(d);
 -			break;
 +			if (d->bd_immediate || non_block || timed_out) {
 +				/*
 +				 * Rotate the buffers and return what's here
 +				 * if we are in immediate mode, non-blocking
 +				 * flag is set, or this descriptor timed out.
 +				 */
 +				ROTATE_BUFFERS(d);
 +				break;
 +			}
  		}
  
  		/*
 @@ -524,7 +533,7 @@ bpfread(struct cdev *dev, struct uio *ui
  			return (ENXIO);
  		}
  
 -		if (ioflag & O_NONBLOCK) {
 +		if (non_block) {
  			BPFD_UNLOCK(d);
  			return (EWOULDBLOCK);
  		}
 _______________________________________________
 svn-src-all@freebsd.org mailing list
 http://lists.freebsd.org/mailman/listinfo/svn-src-all
 To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
 

From: dfilter@FreeBSD.ORG (dfilter service)
To: bug-followup@FreeBSD.org
Cc:  
Subject: Re: kern/143855: commit references a PR
Date: Mon, 22 Mar 2010 20:27:10 +0000 (UTC)

 Author: jkim
 Date: Mon Mar 22 20:26:52 2010
 New Revision: 205467
 URL: http://svn.freebsd.org/changeset/base/205467
 
 Log:
   MFC:	r204105
   
   Return partially filled buffer for non-blocking read(2)
   in non-immediate mode.
   
   PR:		kern/143855
   Submitted by:	Guy Harris (guy at alum dot mit dot edu)
 
 Modified:
   stable/6/sys/net/bpf.c
 Directory Properties:
   stable/6/sys/   (props changed)
   stable/6/sys/contrib/pf/   (props changed)
   stable/6/sys/dev/cxgb/   (props changed)
 
 Modified: stable/6/sys/net/bpf.c
 ==============================================================================
 --- stable/6/sys/net/bpf.c	Mon Mar 22 20:24:00 2010	(r205466)
 +++ stable/6/sys/net/bpf.c	Mon Mar 22 20:26:52 2010	(r205467)
 @@ -456,8 +456,9 @@ static	int
  bpfread(struct cdev *dev, struct uio *uio, int ioflag)
  {
  	struct bpf_d *d = dev->si_drv1;
 -	int timed_out;
  	int error;
 +	int non_block;
 +	int timed_out;
  
  	/*
  	 * Restrict application to use a buffer the same size as
 @@ -466,6 +467,8 @@ bpfread(struct cdev *dev, struct uio *ui
  	if (uio->uio_resid != d->bd_bufsize)
  		return (EINVAL);
  
 +	non_block = ((ioflag & O_NONBLOCK) != 0);
 +
  	BPFD_LOCK(d);
  	d->bd_pid = curthread->td_proc->p_pid;
  	if (d->bd_state == BPF_WAITING)
 @@ -478,14 +481,20 @@ bpfread(struct cdev *dev, struct uio *ui
  	 * have arrived to fill the store buffer.
  	 */
  	while (d->bd_hbuf == NULL) {
 -		if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
 +		if (d->bd_slen != 0) {
  			/*
  			 * A packet(s) either arrived since the previous
  			 * read or arrived while we were asleep.
 -			 * Rotate the buffers and return what's here.
  			 */
 -			ROTATE_BUFFERS(d);
 -			break;
 +			if (d->bd_immediate || non_block || timed_out) {
 +				/*
 +				 * Rotate the buffers and return what's here
 +				 * if we are in immediate mode, non-blocking
 +				 * flag is set, or this descriptor timed out.
 +				 */
 +				ROTATE_BUFFERS(d);
 +				break;
 +			}
  		}
  
  		/*
 @@ -499,7 +508,7 @@ bpfread(struct cdev *dev, struct uio *ui
  			return (ENXIO);
  		}
  
 -		if (ioflag & O_NONBLOCK) {
 +		if (non_block) {
  			BPFD_UNLOCK(d);
  			return (EWOULDBLOCK);
  		}
 _______________________________________________
 svn-src-all@freebsd.org mailing list
 http://lists.freebsd.org/mailman/listinfo/svn-src-all
 To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
 
>Unformatted:
