From fanf@shirt.www.demon.net Wed Jun  2 02:29:43 1999
Return-Path: <fanf@shirt.www.demon.net>
Received: from shirt.www.demon.net (shirt.www.demon.net [212.240.58.2])
	by hub.freebsd.org (Postfix) with ESMTP id A616114CC7
	for <FreeBSD-gnats-submit@freebsd.org>; Wed,  2 Jun 1999 02:29:41 -0700 (PDT)
	(envelope-from fanf@shirt.www.demon.net)
Received: (from fanf@localhost)
	by shirt.www.demon.net (8.9.2/8.9.2) id XAA02365;
	Sun, 11 Apr 1999 23:06:40 +0100 (BST)
	(envelope-from fanf)
Message-Id: <199904112206.XAA02365@shirt.www.demon.net>
Date: Sun, 11 Apr 1999 23:06:40 +0100 (BST)
From: Tony Finch <dot@dotat.at>
Sender: fanf@shirt.www.demon.net
Reply-To: Tony Finch <dot@dotat.at>
To: FreeBSD-gnats-submit@freebsd.org
Subject: recvmsg with a cmsghdr but no iovec is broken
X-Send-Pr-Version: 3.2

>Number:         11988
>Category:       kern
>Synopsis:       recvmsg with a cmsghdr but no iovec is broken
>Confidential:   no
>Severity:       critical
>Priority:       medium
>Responsible:    freebsd-bugs
>State:          closed
>Quarter:        
>Keywords:       
>Date-Required:  
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Wed Jun  2 02:30:00 PDT 1999
>Closed-Date:    Sun Oct 28 11:00:48 PST 2001
>Last-Modified:  Sun Oct 28 11:18:05 PST 2001
>Originator:     Tony Finch
>Release:        FreeBSD 3.1-STABLE i386
>Organization:
Demon Internet Ltd
>Environment:

FreeBSD shirt.www.demon.net 3.1-STABLE FreeBSD 3.1-STABLE #15: Thu Apr  1 17:31:19 BST 1999     root@shirt.www.demon.net:/usr/src/sys/compile/SHIRT  i386

>Description:

I have a program (included below) which opens a TCP listen socket and forks
a child with which it communicates over a unix domain socketpair. When a TCP
connection is received it passes the new fd to the child which deals with
the connection while the parent returns to accept(). The fd is passed using
sendmsg() with an empty msg_iov.

If the child process does not provide a pointer to a iovec in the msghdr,
or if the sum of the iov_len fields is zero, then the child only receives
one file descriptor over the socket and subsequent recvmsg()s do not fill
in the msg_control block. Furthermore, if the program is killed with SIGINT
between the first (successful) connection and the second connection (which
would cause the child to exit owing to EBADF) the machine reboots. I don't
have a panic message to show because my test machine is a couple of miles
away and doesn't like the serial console server.

The program does work if the receive iovec has more than zero space.

>How-To-Repeat:

#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <sys/uio.h>
#include <string.h>
#include <signal.h>
#include <unistd.h>
#include <stdarg.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <time.h>

extern const char *__progname;

static void vwarn(const char *message, va_list ap) {
    int err = errno;

    fprintf(stderr, "%s (%d): ", __progname, (int)getpid());
    vfprintf(stderr, message, ap);
    if(message[strlen(message)-1] == ':')
	fprintf(stderr, " %s (%d)\n", strerror(err), err);
    else
	fprintf(stderr, "\n");
}

#define DO_VWARN \
	va_list ap; \
	va_start(ap, message); \
	vwarn(message, ap); \
	va_end(ap)

static void warn(const char *message, ...) {
    DO_VWARN;
}

static void die(const char *message, ...) {
    DO_VWARN;
    exit(1);
}

static void debug_msg(struct msghdr *msg) {
    warn("msg = %p", msg);
    warn("msg->msg_name = %p", msg->msg_name);
    warn("msg->msg_namelen = %d", msg->msg_namelen);
    warn("msg->msg_iov = %p", msg->msg_iov);
    warn("msg->msg_iovlen = %d", msg->msg_iovlen);
    if(msg->msg_iov) {
	warn("msg->msg_iov->iov_base = %p", msg->msg_iov->iov_base);
	warn("msg->msg_iov->iov_len = %d", msg->msg_iov->iov_len);
	if(msg->msg_iov->iov_base)
	    warn("*msg->msg_iov->iov_base = %d", *msg->msg_iov->iov_base);
    }
    warn("msg->msg_control = %p", msg->msg_control);
    warn("msg->msg_controllen = %d", msg->msg_controllen);
    warn("msg->msg_flags = %d", msg->msg_flags);
    warn("((struct cmsghdr *)msg->msg_control)->cmsg_level = %d", ((struct cmsghdr *)msg->msg_control)->cmsg_level);
    warn("((struct cmsghdr *)msg->msg_control)->cmsg_type = %d", ((struct cmsghdr *)msg->msg_control)->cmsg_type);
    warn("((struct cmsghdr *)msg->msg_control)->cmsg_len = %d", ((struct cmsghdr *)msg->msg_control)->cmsg_len);
    warn("*(int *)CMSG_DATA((struct cmsghdr *)msg->msg_control) = %d", *(int *)CMSG_DATA((struct cmsghdr *)msg->msg_control));
}

static void child_process(void)
{
    int n, fd;
    FILE *fp;
    char byte;
    struct iovec iov;
    struct msghdr msg;
    struct fdcmsg {
	struct cmsghdr cmsg;
	int fd;
    } cmsg;
    fd_set rfds;

    msg.msg_name = NULL;
    msg.msg_namelen = 0;
#if 0
    /* if this code is enabled the bug does not appear */
    msg.msg_iov = &iov;
    msg.msg_iovlen = 1;
    iov.iov_base = &byte;
    iov.iov_len = 1;
#elif 1
    /* this and the next two sections all exhibit the bug */
    msg.msg_iov = &iov;
    msg.msg_iovlen = 1;
    iov.iov_base = &byte;
    iov.iov_len = 0;
#elif 0
    msg.msg_iov = &iov;
    msg.msg_iovlen = 1;
    iov.iov_base = NULL;
    iov.iov_len = 0;
#else
    msg.msg_iov = NULL;
    msg.msg_iovlen = 0;
    iov.iov_base = NULL;
    iov.iov_len = 0;
#endif    
    msg.msg_control = (void *)&cmsg;
    msg.msg_controllen = sizeof(cmsg);
    msg.msg_flags = 0;

    for(;;) {
	FD_ZERO(&rfds);
	FD_SET(3, &rfds);
	if(select(4, &rfds, NULL, NULL, NULL) < 0)
	    die("select:");
	if(!FD_ISSET(3, &rfds))
	   exit(1);
	byte = 255;
	cmsg.cmsg.cmsg_level = 0;
	cmsg.cmsg.cmsg_type = 0;
	cmsg.cmsg.cmsg_len = sizeof(cmsg);
	*(int *)CMSG_DATA(&cmsg.cmsg) = -1;
	debug_msg(&msg);
	n = recvmsg(3, &msg, 0);
	if(n < 0)
	    die("recvmsg:");
	else
	    warn("recvmsg = %d", n);
	debug_msg(&msg);
	fd = *(int *)CMSG_DATA(&cmsg.cmsg);
	if(write(fd, "wibble\n", 7) < 7)
	    die("write:");
	fp = fdopen(fd, "w");
	if(!fp)
	    die("fdopen:");
	fprintf(fp, "connection to pid %d\n", getpid());
	fclose(fp);
    }
}

static int make_child(void) {
    int pid;
    int fd[2];

    if(socketpair(PF_UNIX, SOCK_STREAM, 0, fd) < 0)
	die("socketpair:");
    switch(pid = fork()) {
    case -1:
	die("fork:");
    case 0:
	close(fd[0]);
	dup2(fd[1], 3);
	close(fd[1]);
	child_process();
	exit(0);
    default:
	warn("started pid %d", pid);
	close(fd[1]);
	return(fd[0]);
    }
}

static int make_sock(int port, int backlog) {
    int s;
    int one = 1;
    struct sockaddr_in sa;

    s = socket(PF_INET, SOCK_STREAM, 0);
    if(s < 0)
	die("socket:");
    
    if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void *)&one, sizeof(one)) < 0)
	die("setsockopt SO_REUSEADDR:");

    memset(&sa, 0, sizeof(sa));
    sa.sin_family = AF_INET;
    sa.sin_port = htons(port);
    sa.sin_addr.s_addr = INADDR_ANY;
    if(bind(s, (struct sockaddr *)&sa, sizeof(sa)) < 0)
	die("bind:");

    if(listen(s, backlog) < 0)
	die("listen:");

    return(s);
}

int main(int argc, char *argv[]) {
    int sock, child, conn, n;
    char byte;
    struct iovec iov;
    struct msghdr msg;
    struct fdcmsg {
	struct cmsghdr cmsg;
	int fd;
    } cmsg;

    setvbuf(stderr, NULL, _IOLBF, 0);

    sock = make_sock(5000, 10);
    child = make_child();

    msg.msg_name = NULL;
    msg.msg_namelen = 0;
    /* these alternatives don't make any difference */
#if 0
    msg.msg_iov = &iov;
    msg.msg_iovlen = 1;
#else
    msg.msg_iov = NULL;
    msg.msg_iovlen = 0;
#endif    
    msg.msg_control = (void *)&cmsg;
    msg.msg_controllen = sizeof(cmsg);
    msg.msg_flags = 0;

    iov.iov_base = &byte;
    iov.iov_len = 1;
    byte = 0;

    cmsg.cmsg.cmsg_level = SOL_SOCKET;
    cmsg.cmsg.cmsg_type = SCM_RIGHTS;
    cmsg.cmsg.cmsg_len = sizeof(cmsg);

    for(;;) {
	conn = accept(sock, NULL, 0);
	if(conn < 0) {
	    warn("accept:");
	    continue;
	} else
	    warn("accept = %d", conn);
	*(int *)CMSG_DATA(&cmsg.cmsg) = conn;
	debug_msg(&msg);
	n = sendmsg(child, &msg, 0);
	if(n < 0)
	    warn("sendmsg:");
	else
	    warn("sendmsg = %d", n);
	n = close(conn);
	warn("close = %d", n);
    }
}

>Fix:


>Release-Note:
>Audit-Trail:

From: Brian Somers <brian@Awfulhak.org>
To: Tony Finch <dot@dotat.at>
Cc: FreeBSD-gnats-submit@freebsd.org
Subject: Re: kern/11988: recvmsg with a cmsghdr but no iovec is broken 
Date: Wed, 02 Jun 1999 10:58:41 +0100

 > 
 > >Number:         11988
 > >Category:       kern
 > >Synopsis:       recvmsg with a cmsghdr but no iovec is broken
 [.....]
 
 This has been the case for some time - since before I implemented 
 multi-link ppp.  When I was writing it, I attempted to do exactly 
 what you're doing and then to write() all my link configuration data. 
 Things failed badly so I ended up merging everything into a single 
 sendmsg()/recvmsg().
 
 I'm afraid I have nothing fantastic to say (except to confirm that 
 the problem exists, and has done for some time).
 
 One other observation (a problem I suffered with for some time); you 
 *must* have some sort of handshake to confirm that the descriptor has 
 been received.  If you call close(conn) while the descriptor is still 
 in limbo (an mbuf) the descriptor will arrive at the other end ready 
 to give you EBADF next time you use it.
 
 A simple read(child, &ch, 1) after sendmsg() and before the close(conn) 
 and a write(3, "X", 1) after the recvmsg() will do the trick.
 -- 
 Brian <brian@Awfulhak.org>                        <brian@FreeBSD.org>
       <http://www.Awfulhak.org>                   <brian@OpenBSD.org>
 Don't _EVER_ lose your sense of humour !          <brian@uk.FreeBSD.org>
 
 
 

From: Tony Finch <dot@dotat.at>
To: Brian Somers <brian@Awfulhak.org>
Cc: Tony Finch <dot@dotat.at>, FreeBSD-gnats-submit@freebsd.org
Subject: Re: kern/11988: recvmsg with a cmsghdr but no iovec is broken 
Date: Fri, 4 Jun 1999 12:11:18 +0100 (BST)

 Brian Somers writes:
 > 
 > One other observation (a problem I suffered with for some time); you 
 > *must* have some sort of handshake to confirm that the descriptor has 
 > been received.  If you call close(conn) while the descriptor is still 
 > in limbo (an mbuf) the descriptor will arrive at the other end ready 
 > to give you EBADF next time you use it.
 > 
 > A simple read(child, &ch, 1) after sendmsg() and before the close(conn) 
 > and a write(3, "X", 1) after the recvmsg() will do the trick.
 
 AIUI that shouldn't be necessary: the f_count field should be
 incremented while the descriptor is in transit so close()ing it should
 not cause the resources to be released. If the receiver of the
 descriptor gets EBADF because the sender close()d it while it was in
 transit that is a bug (but a different one from the one that started
 this PR).
 
 Tony.
 

From: Brian Somers <brian@Awfulhak.org>
To: Tony Finch <dot@dotat.at>
Cc: Brian Somers <brian@Awfulhak.org>,
	FreeBSD-gnats-submit@freebsd.org
Subject: Re: kern/11988: recvmsg with a cmsghdr but no iovec is broken 
Date: Fri, 04 Jun 1999 13:12:16 +0100

 > Brian Somers writes:
 > > 
 > > One other observation (a problem I suffered with for some time); you 
 > > *must* have some sort of handshake to confirm that the descriptor has 
 > > been received.  If you call close(conn) while the descriptor is still 
 > > in limbo (an mbuf) the descriptor will arrive at the other end ready 
 > > to give you EBADF next time you use it.
 > > 
 > > A simple read(child, &ch, 1) after sendmsg() and before the close(conn) 
 > > and a write(3, "X", 1) after the recvmsg() will do the trick.
 > 
 > AIUI that shouldn't be necessary: the f_count field should be
 > incremented while the descriptor is in transit so close()ing it should
 > not cause the resources to be released. If the receiver of the
 > descriptor gets EBADF because the sender close()d it while it was in
 > transit that is a bug (but a different one from the one that started
 > this PR).
 
 I've had a poke around 'cos I recalled seeing someone else spot this. 
 The -current sendmsg man page mentions it in the BUGS section.
 
 > Tony.
 
 -- 
 Brian <brian@Awfulhak.org>                        <brian@FreeBSD.org>
       <http://www.Awfulhak.org>                   <brian@OpenBSD.org>
 Don't _EVER_ lose your sense of humour !          <brian@uk.FreeBSD.org>
 
 
 

From: Tony Finch <dot@dotat.at>
To: Brian Somers <brian@Awfulhak.org>
Cc: Tony Finch <dot@dotat.at>, FreeBSD-gnats-submit@freebsd.org
Subject: Re: kern/11988: recvmsg with a cmsghdr but no iovec is broken 
Date: Fri, 4 Jun 1999 13:23:29 +0100 (BST)

 Brian Somers writes:
 > > Brian Somers writes:
 > > > 
 > > > One other observation (a problem I suffered with for some time); you 
 > > > *must* have some sort of handshake to confirm that the descriptor has 
 > > > been received.  If you call close(conn) while the descriptor is still 
 > > > in limbo (an mbuf) the descriptor will arrive at the other end ready 
 > > > to give you EBADF next time you use it.
 > > > 
 > > > A simple read(child, &ch, 1) after sendmsg() and before the close(conn) 
 > > > and a write(3, "X", 1) after the recvmsg() will do the trick.
 > > 
 > > AIUI that shouldn't be necessary: the f_count field should be
 > > incremented while the descriptor is in transit so close()ing it should
 > > not cause the resources to be released. If the receiver of the
 > > descriptor gets EBADF because the sender close()d it while it was in
 > > transit that is a bug (but a different one from the one that started
 > > this PR).
 > 
 > I've had a poke around 'cos I recalled seeing someone else spot this. 
 > The -current sendmsg man page mentions it in the BUGS section.
 
 -STABLE too. Do you have code to provoke it?
 
 Tony.
 

From: Pierre Beyssac <pb@fasterix.freenix.org>
To: Tony Finch <dot@dotat.at>, FreeBSD-gnats-submit@FreeBSD.ORG
Cc:  
Subject: Re: kern/11988: recvmsg with a cmsghdr but no iovec is broken
Date: Mon, 28 Jun 1999 00:22:03 +0200

 On Sun, Apr 11, 1999 at 11:06:40PM +0100, Tony Finch wrote:
 > would cause the child to exit owing to EBADF) the machine reboots. I don't
 > have a panic message to show because my test machine is a couple of miles
 > away and doesn't like the serial console server.
 
 The problem is easy to reproduce under -current; I've appended
 panic information below. The panic is in sbflush() which, after
 dropping all the data on the socket, finds sb->sb_mbcnt != 0.
 I'm still trying to understand that code so I can't say if I'll be
 able to fix it for the moment.
 
 Pierre
 
 panicstr: sbflush: cc 0 || mb 0xc05c3100 || mbcnt 128
 panic messages:
 ---
 panic: sbflush: cc 0 || mb 0xc05c3100 || mbcnt 128
 
 syncing disks... 14 14 1 done
 
 dumping to dev (4,9), offset 32
 dump 63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 
 ---
 #0  0xc0142360 in boot ()
 (kgdb) where
 #0  0xc0142360 in boot ()
 #1  0xc014259c in at_shutdown ()
 #2  0xc015d515 in sbflush (sb=0xc4a62e04) at ../../kern/uipc_socket2.c:723
 #3  0xc015cffd in sbrelease (sb=0xc4a62e04) at ../../kern/uipc_socket2.c:411
 #4  0xc015c430 in sorflush (so=0xc4423840) at ../../kern/uipc_socket.c:929
 #5  0xc015b1f4 in sofree (so=0xc4423840) at ../../kern/uipc_socket.c:220
 #6  0xc015b332 in soclose (so=0xc4423840) at ../../kern/uipc_socket.c:284
 #7  0xc01515ff in soo_close (fp=0xc0987600, p=0xc44e3100)
     at ../../kern/sys_socket.c:175
 #8  0xc013afac in closef ()
 #9  0xc013ad84 in fdfree ()
 #10 0xc013c238 in exit1 ()
 #11 0xc0143946 in sigexit ()
 #12 0xc0143789 in postsig ()
 #13 0xc0204dca in syscall ()
 #14 0xc01f9d10 in Xint0x80_syscall ()
 #15 0x8048d70 in ?? ()
 #16 0x8048eae in ?? ()
 #17 0x80488a5 in ?? ()
 (kgdb) select 3
 (kgdb) print *sb
 $1 = {sb_cc = 0, sb_hiwat = 8192, sb_mbcnt = 128, sb_mbmax = 65536, 
   sb_lowat = 1, sb_mb = 0xc05c3100, sb_sel = {si_pid = 0, si_flags = 0}, 
   sb_flags = 64, sb_timeo = 0}
 (kgdb) print *sb->sb_mb
 $2 = {m_hdr = {mh_next = 0x0, mh_nextpkt = 0x0, mh_data = 0xc05c3120 "", 
     mh_len = 0, mh_type = 1, mh_flags = 2}, M_dat = {MH = {MH_pkthdr = {
         rcvif = 0x0, len = 0, header = 0x0}, MH_dat = {MH_ext = {
           ext_buf = 0x0, ext_free = 0x200, ext_size = 13288073, ext_ref = 0}, 
         MH_databuf = "\000\000\000\000\000\002\000\000\211", '\000' <repeats 12 times>, "\002\000\002\000\000\211", '\000' <repeats 12 times>, "\002\000\002\000\000\211$", '\000' <repeats 12 times>, "\002\000\002\000\000\211\200", '\000' <repeats 12 times>, "\002\000\002\000\000\211,\000\000\000\000"}}, 
     M_databuf = '\000' <repeats 17 times>, "\002\000\000\211", '\000' <repeats 12 times>, "\002\000\002\000\000\211", '\000' <repeats 12 times>, "\002\000\002\000\000\211$", '\000' <repeats 12 times>, "\002\000\002\000\000\211\200", '\000' <repeats 12 times>, "\002\000\002\000\000\211,\000\000\000\000"}}
 
 

From: Pierre Beyssac <beyssac@enst.fr>
To: FreeBSD-gnats-submit@freebsd.org
Cc: dot@dotat.at, brian@Awfulhak.org
Subject: Re: kern/11988: recvmsg with a cmsghdr but no iovec is broken
Date: Tue, 28 Sep 1999 11:04:16 +0200

 Hello,
 
 Here's a kernel patch that fixes the panic under -current. Could
 you try it on your system and tell me if it works for you?
 
 I'm still investigating the rest of the PR regarding how descriptor
 passing should work.
 
 Pierre
 
 Index: uipc_socket2.c
 ===================================================================
 RCS file: /usr/cvs/src/sys/kern/uipc_socket2.c,v
 retrieving revision 1.50
 diff -u -r1.50 uipc_socket2.c
 --- uipc_socket2.c	1999/09/19 02:16:19	1.50
 +++ uipc_socket2.c	1999/09/28 08:23:53
 @@ -730,8 +730,15 @@
  
  	if (sb->sb_flags & SB_LOCK)
  		panic("sbflush: locked");
 -	while (sb->sb_mbcnt && sb->sb_cc)
 +	while (sb->sb_mbcnt) {
 +		/*
 +		 * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
 +		 * we would loop forever. Panic instead.
 +		 */
 +		if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
 +			break;
  		sbdrop(sb, (int)sb->sb_cc);
 +	}
  	if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt)
  		panic("sbflush: cc %ld || mb %p || mbcnt %ld", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt);
  }
 -- 
 Pierre Beyssac		pb@enst.fr
 

From: Brian Somers <brian@Awfulhak.org>
To: Pierre Beyssac <beyssac@enst.fr>
Cc: FreeBSD-gnats-submit@FreeBSD.ORG, dot@dotat.at,
	brian@Awfulhak.org
Subject: Re: kern/11988: recvmsg with a cmsghdr but no iovec is broken 
Date: Fri, 01 Oct 1999 11:18:41 +0100

 > Hello,
 > 
 > Here's a kernel patch that fixes the panic under -current. Could
 > you try it on your system and tell me if it works for you?
 > 
 > I'm still investigating the rest of the PR regarding how descriptor
 > passing should work.
 > 
 > Pierre
 
 Hi,
 
 I'm afraid I don't have any code to test this... I saw the problem 
 while developing the ppp MP, but decided that I needed to send data 
 at the same time anyway...   I believe Tony is the person with the 
 code.
 
 > -- 
 > Pierre Beyssac		pb@enst.fr
 
 Cheers.
 
 -- 
 Brian <brian@Awfulhak.org>                        <brian@FreeBSD.org>
       <http://www.Awfulhak.org>                   <brian@OpenBSD.org>
 Don't _EVER_ lose your sense of humour !          <brian@FreeBSD.org.uk>
 
 
 
State-Changed-From-To: open->closed 
State-Changed-By: asmodai 
State-Changed-When: Sun Oct 28 11:00:48 PST 2001 
State-Changed-Why:  
This code is present in STABLE and CURRENT. 

http://www.FreeBSD.org/cgi/query-pr.cgi?pr=11988 
>Unformatted:
