From rfg@monkeys.com  Wed Feb 27 19:39:46 2002
Return-Path: <rfg@monkeys.com>
Received: from segfault.monkeys.com (246.dsl6660157.rstatic.surewest.net [66.60.157.246])
	by hub.freebsd.org (Postfix) with ESMTP id 85FAE37B400
	for <FreeBSD-gnats-submit@freebsd.org>; Wed, 27 Feb 2002 19:39:44 -0800 (PST)
Received: by segfault.monkeys.com (Postfix, from userid 1237)
	id A169F660B; Wed, 27 Feb 2002 19:39:38 -0800 (PST)
Message-Id: <20020228033938.A169F660B@segfault.monkeys.com>
Date: Wed, 27 Feb 2002 19:39:38 -0800 (PST)
From: rfg@monkeys.com
Reply-To: rfg@monkeys.com
To: FreeBSD-gnats-submit@freebsd.org
Cc:
Subject: poll(2) doesn't set POLLERR for failed connect(2) attempts
X-Send-Pr-Version: 3.113
X-GNATS-Notify:

>Number:         35396
>Category:       kern
>Synopsis:       poll(2) doesn't set POLLERR for failed connect(2) attempts
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    freebsd-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:  
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Wed Feb 27 19:40:01 PST 2002
>Closed-Date:    
>Last-Modified:  Thu Feb 28 12:00:12 PST 2002
>Originator:     Ronald F. Guilmette
>Release:        FreeBSD 4.3-RELEASE i386
>Organization:
Infinite Monkeys & Co.
>Environment:

FreeBSD 4.3-RELEASE #0

>Description:

If you create a socket and then use fcntl(2) to set O_NONBLOCK for it
(thus making it non-blocking) and then you start up an asynchronous
connect(2) on the socket to some dead host/port, and then use poll(2)
to try to check status on the state of the socket, eventually, the
connect attempt will actually fail (after a suitable number of failed
retries) and the call to poll(2) will return, but in the specific
`struct pollfd' structure corresponding to the socket's fd, the POLLERR
bit will _not_ be set in the `revents' field.

That (POLLERR) bit _should_ be set when such errors occur however.

The behavion of poll(2) under FreeBSD 4.3 is clearly not correct behavior.
The connect attempt _has_ failed with an error.  But poll(2) is failing
to properly set the corresponding POLLERR bit to note that fact.

(See my posting today to freebsd-questions for a bit more background
on this problem.)

>How-To-Repeat:

The short example program below illustrates the problem.  It attempts to
connect to a dead/filtered port on one of my servers.  This program should
(after a short delay) print "poll(2) indicates connect error", but instead
it prints "getsockopt(2) indicates connect error", thus indicating that
the connection error has _not_ been flaged via a POLLERR bit being set.

cut here
=======================================================================
/* poll(2) error test #1 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <fcntl.h>
#include <poll.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>

static struct protoent *tcp_proto;

static void
fatal (register char const *const fmt, register char const *const arg)
{
  fprintf (stderr, fmt, arg);
  putc ('\n', stderr);
  exit (1);
}

static void
poll_for_completion (register int const fd)
{
  auto struct pollfd pfd;
  auto int err;
  auto socklen_t err_size;

  pfd.fd = fd;
  pfd.events = POLLOUT;
  pfd.revents = 0;

  if (poll (&pfd, 1, -1) == -1)
    fatal ("Error in poll: %s", strerror (errno));

  if (pfd.revents & POLLERR)
    fatal ("poll(2) indicates connect error", NULL);

  if (getsockopt (fd, SOL_SOCKET, SO_ERROR, &err, &err_size) == -1)
    fatal ("Error in getsockopt: %s", strerror (errno));
  
  if (err != 0)
    fatal ("getsockopt(2) indicates connect error: %s", strerror (err));

  fatal ("Connect successful", NULL);
}

static void
start_connecting (struct in_addr addr, unsigned short port)
{
  auto struct sockaddr_in sin;
  register int fd;

  if ((fd = socket (PF_INET, SOCK_STREAM, tcp_proto->p_proto)) == -1)
    fatal ("Error creating socket: %s", strerror (errno));

  if (fcntl (fd, F_SETFL, O_NONBLOCK) == -1)
    fatal ("Error setting O_NONBLOCK for socket: %s", strerror (errno));

  memset (&sin, 0, sizeof sin);
  sin.sin_family = AF_INET;
  sin.sin_addr = addr;
  sin.sin_port = htons (port);

  if (connect (fd, (struct sockaddr *) &sin, sizeof sin) == -1)
    {
      if (errno != EINPROGRESS)
        {
	  printf ("Connection failed immediately\n");
          close (fd);
        }
      else
	poll_for_completion (fd);
    }
  else
    {
      printf ("Connection completed immediately\n");
      close (fd);
    }
}

int
main (void)
{
  static char const protocol_name[] = "tcp";
  auto struct in_addr addr;

  if ((tcp_proto = getprotobyname (protocol_name)) == NULL)
    fatal ("Cannot find number for protocol: %s", protocol_name);

  inet_aton ("66.60.157.246", &addr);
  start_connecting (addr, 32767);

  return 0;
}
=======================================================================

>Fix:

Hack the kernel and make poll(2) properly check for connect(2) errors.  If
it finds any, have it set the POLLERR bit in the relevant `struct pollfd'
structure.
>Release-Note:
>Audit-Trail:

From: "Ronald F. Guilmette" <rfg@monkeys.com>
To: freebsd-gnats-submit@FreeBSD.org
Cc:  
Subject: Re: kern/35396: poll(2) doesn't set POLLERR for failed connect(2) attempts 
Date: Thu, 28 Feb 2002 11:56:15 -0800

 There was a small programming error in the example program that
 I originally submitted with this bug report.  The server process
 called listen(2) twice, rather than calling listen(2) and then
 accept(2), as it should have done.
 
 This programming error doesn't really affect the validity of the
 bug report, and the original example program still does illustrate
 (with its client process) the bug I was reporting adequately well,
 however for the sake of complete clarity, and to insure there are
 no misunderstandings, I am attaching a corrected example program
 for this bug report below.
 
 
 cut here for corrected example program
 ========================================================================
 /* poll(2) error test #2 - corrected */
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <errno.h>
 #include <unistd.h>
 #include <fcntl.h>
 #include <poll.h>
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <netinet/in.h>
 #include <arpa/inet.h>
 #include <netdb.h>
 
 static struct protoent *tcp_proto;
 
 static void
 fatal (register char const *const fmt, register char const *const arg)
 {
   fprintf (stderr, fmt, arg);
   putc ('\n', stderr);
   exit (1);
 }
 
 static void
 do_poll (register int const fd)
 {
   auto struct pollfd pfd;
 
   pfd.fd = fd;
   pfd.events = POLLIN | POLLHUP | POLLERR;
   pfd.revents = 0;
 
   if (poll (&pfd, 1, -1) == -1)
     fatal ("Error in poll: %s", strerror (errno));
 
   fprintf (stderr, "poll(2) did return\n");
 
   if (pfd.revents & POLLHUP)
     fatal ("poll(2) indicates hangup", NULL);
 
   if (pfd.revents & POLLIN)
     fatal ("poll(2) indicates input waiting", NULL);
 }
 
 static void
 do_connect_and_poll (struct in_addr addr, unsigned short port)
 {
   auto struct sockaddr_in sin;
   register int fd;
 
   if ((fd = socket (PF_INET, SOCK_STREAM, tcp_proto->p_proto)) == -1)
     fatal ("Error creating socket: %s", strerror (errno));
 
   memset (&sin, 0, sizeof sin);
   sin.sin_family = AF_INET;
   sin.sin_addr = addr;
   sin.sin_port = htons (port);
 
   if (connect (fd, (struct sockaddr *) &sin, sizeof sin) == -1)
     fatal ("Error in connect: %s", strerror (errno));
 
   fprintf (stderr, "Client connected to server\n");
 
   do_poll (fd);
 }
 
 static void
 do_listen_sleep_and_die (struct in_addr addr, unsigned short port)
 {
   auto int one = 1;
   auto struct sockaddr_in sin;
   auto struct sockaddr_in sin2;
   auto socklen_t sin2_size = sizeof sin2;
   register int listen_fd;
   register int client_fd;
 
   if ((listen_fd = socket (PF_INET, SOCK_STREAM, tcp_proto->p_proto)) == -1)
     fatal ("Error creating socket: %s", strerror (errno));
 
   memset (&sin, 0, sizeof sin);
   sin.sin_family = AF_INET;
   sin.sin_addr = addr;
   sin.sin_port = htons (port);
 
   if (bind (listen_fd, (struct sockaddr *) &sin, sizeof sin) == -1)
     fatal ("Error in bind: %s", strerror (errno));
 
   if (setsockopt (listen_fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof one) == -1)
     fatal ("Error in setsockopt: %s", strerror (errno));
 
   if (listen (listen_fd, 1) == -1)
     fatal ("Error in listen: %s", strerror (errno));
 
   client_fd = accept (listen_fd, (struct sockaddr *) &sin2, &sin2_size);
   if (client_fd == -1)
     fatal ("Error in accept: %s", strerror (errno));
 
   fprintf (stderr, "Server accepted connection from client\n");
 
   sleep (1);
   close (client_fd);  /* Shut it down.  */
   fprintf (stderr, "Server closed connection\n");
 }
 
 int
 main (void)
 {
   static char const protocol_name[] = "tcp";
   auto struct in_addr addr;
   register int const port = 32767;
   register int pid;
 
   if ((tcp_proto = getprotobyname (protocol_name)) == NULL)
     fatal ("Cannot find number for protocol: %s", protocol_name);
 
   inet_aton ("127.0.0.1", &addr);
 
   if ((pid = fork ()) == -1)
     fatal ("Error in fork: %s", strerror (errno));
 
   if (pid)
     {
       sleep (1);  /* Give server time to start up. */
       do_connect_and_poll (addr, port);	/* Child process does this. */
     }
   else
     do_listen_sleep_and_die (addr, port);  /* Parent process does this.  */
 
   return 0;
 }
 ========================================================================
>Unformatted:
