From iedowse@maths.tcd.ie Mon Aug  9 15:30:13 1999
Return-Path: <iedowse@maths.tcd.ie>
Received: from salmon.maths.tcd.ie (salmon.maths.tcd.ie [134.226.81.11])
	by hub.freebsd.org (Postfix) with SMTP id 8574E14EED
	for <FreeBSD-gnats-submit@freebsd.org>; Mon,  9 Aug 1999 15:30:07 -0700 (PDT)
	(envelope-from iedowse@maths.tcd.ie)
Message-Id: <199908092326.aa93394@salmon.maths.tcd.ie>
Date: Mon, 9 Aug 1999 23:26:34 +0100 (BST)
From: iedowse@maths.tcd.ie
Sender: iedowse@maths.tcd.ie
Reply-To: iedowse@maths.tcd.ie
To: FreeBSD-gnats-submit@freebsd.org
Subject: [PATCH] NFS replies with incorrect source IP
X-Send-Pr-Version: 3.2

>Number:         13049
>Category:       kern
>Synopsis:       [PATCH] NFS replies with incorrect source IP
>Confidential:   no
>Severity:       serious
>Priority:       low
>Responsible:    dillon
>State:          closed
>Quarter:        
>Keywords:       
>Date-Required:  
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Mon Aug  9 15:30:01 PDT 1999
>Closed-Date:    Thu Nov 11 09:37:10 PST 1999
>Last-Modified:  Sun Nov 14 17:40:00 PST 1999
>Originator:     Ian Dowse
>Release:        FreeBSD 3.2-STABLE i386
>Organization:
		School of Mathematics
		Trinity College, Dublin
>Environment:

	FreeBSD -current and -stable

>Description:

	As described in PR's kern/2858, kern/5964, kern/6412, kern/9612,
	and possibly others, NFS can send replies using the wrong source
	IP address.

	NFS over UDP uses just one socket to send and receive, so it can
	neither determine at which local IP a request was directed, nor
	control from which IP the reply is sent. The patches below fix
	this problem by setting up one udp socket for each local IP
	address.

	These patches are against -current, but we have been using them
	on a number of busy 3.2-stable machines without any problems for
	over a month.

	One unfortunate side-effect of these changes is that if the new
	nfsd is run with an old kernel, a panic will result. This is
	because of an old bug in nfssvc_addsock() - see the 'mynam != NULL'
	change below. It is safe to run the old nfsd with the new kernel.

>How-To-Repeat:
>Fix:
	

--- nfs_syscalls.c.orig	Sat Aug  7 20:54:46 1999
+++ nfs_syscalls.c	Sat Aug  7 20:54:12 1999
@@ -86,7 +86,7 @@
 extern struct nfsstats nfsstats;
 extern int nfsrvw_procrastinate;
 extern int nfsrvw_procrastinate_v3;
-struct nfssvc_sock *nfs_udpsock, *nfs_cltpsock;
+struct nfssvc_sock *nfs_cltpsock;
 static int nuidhash_max = NFS_MAXUIDHASH;
 
 #ifndef NFS_NOSERVER
@@ -372,28 +372,24 @@
 	/*
 	 * Add it to the list, as required.
 	 */
-	if (so->so_proto->pr_protocol == IPPROTO_UDP) {
-		tslp = nfs_udpsock;
-		if (tslp->ns_flag & SLP_VALID) {
-			FREE(mynam, M_SONAME);
-			return (EPERM);
-		}
 #ifdef ISO
-	} else if (so->so_proto->pr_protocol == ISOPROTO_CLTP) {
+	if (so->so_proto->pr_protocol == ISOPROTO_CLTP) {
 		tslp = nfs_cltpsock;
 		if (tslp->ns_flag & SLP_VALID) {
-			FREE(mynam, M_SONAME);
+			if (mynam != NULL)
+				FREE(mynam, M_SONAME);
 			return (EPERM);
 		}
-#endif /* ISO */
 	}
+#endif /* ISO */
 	if (so->so_type == SOCK_STREAM)
 		siz = NFS_MAXPACKET + sizeof (u_long);
 	else
 		siz = NFS_MAXPACKET;
 	error = soreserve(so, siz, siz);
 	if (error) {
-		FREE(mynam, M_SONAME);
+		if (mynam != NULL)
+			FREE(mynam, M_SONAME);
 		return (error);
 	}
 
@@ -898,13 +894,6 @@
 
 	TAILQ_INIT(&nfsd_head);
 	nfsd_head_flag &= ~NFSD_CHECKSLP;
-
-	nfs_udpsock = (struct nfssvc_sock *)
-	    malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
-	bzero((caddr_t)nfs_udpsock, sizeof (struct nfssvc_sock));
-	STAILQ_INIT(&nfs_udpsock->ns_rec);
-	TAILQ_INIT(&nfs_udpsock->ns_uidlruhead);
-	TAILQ_INSERT_HEAD(&nfssvc_sockhead, nfs_udpsock, ns_chain);
 
 	nfs_cltpsock = (struct nfssvc_sock *)
 	    malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
--- nfs_nqlease.c.orig	Sat Aug  7 20:55:12 1999
+++ nfs_nqlease.c	Sat Aug  7 20:53:51 1999
@@ -138,7 +138,7 @@
 
 extern nfstype nfsv2_type[9];
 extern nfstype nfsv3_type[9];
-extern struct nfssvc_sock *nfs_udpsock, *nfs_cltpsock;
+extern struct nfssvc_sock *nfs_cltpsock;
 extern int nfsd_waiting;
 extern struct nfsstats nfsstats;
 
@@ -384,11 +384,13 @@
 
 	if (slp == NQLOCALSLP)
 		lph->lph_flag |= (LC_VALID | LC_LOCAL);
-	else if (slp == nfs_udpsock) {
+	else if (slp->ns_so->so_proto->pr_protocol == IPPROTO_UDP) {
 		saddr = (struct sockaddr_in *)nam;
-		lph->lph_flag |= (LC_VALID | LC_UDP);
+		lph->lph_flag |= (LC_VALID | LC_UDP | LC_SREF);
 		lph->lph_inetaddr = saddr->sin_addr.s_addr;
 		lph->lph_port = saddr->sin_port;
+		lph->lph_slp = slp;
+		slp->ns_sref++;
 	} else if (slp == nfs_cltpsock) {
 		lph->lph_nam = dup_sockaddr(nam, 1);
 		lph->lph_flag |= (LC_VALID | LC_CLTP);
@@ -458,7 +460,8 @@
 		else
 			return (0);
 	}
-	if (slp == nfs_udpsock || slp == nfs_cltpsock)
+	if (slp->ns_so->so_proto->pr_protocol == IPPROTO_UDP ||
+	    slp == nfs_cltpsock)
 		addr = nam;
 	else
 		addr = slp->ns_nam;
@@ -517,7 +520,7 @@
 				saddr->sin_family = AF_INET;
 				saddr->sin_addr.s_addr = lph->lph_inetaddr;
 				saddr->sin_port = lph->lph_port;
-				so = nfs_udpsock->ns_so;
+				so = lph->lph_slp->ns_so;
 			} else if (lph->lph_flag & LC_CLTP) {
 				nam2 = lph->lph_nam;
 				so = nfs_cltpsock->ns_so;
--- nfsd.c.orig	Tue Jun 15 14:15:50 1999
+++ nfsd.c	Sat Aug  7 21:04:01 1999
@@ -52,6 +52,7 @@
 #include <sys/syslog.h>
 #include <sys/wait.h>
 #include <sys/mount.h>
+#include <sys/sysctl.h>
 
 #include <rpc/rpc.h>
 #include <rpc/pmap_clnt.h>
@@ -62,6 +63,9 @@
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
 #include <nfs/nfs.h>
+#include <net/if.h>
+#include <net/route.h>
+#include <arpa/inet.h>
 
 #ifdef NFSKERB
 #include <kerberosIV/des.h>
@@ -110,6 +114,7 @@
 #endif
 #endif
 void	usage __P((void));
+int	get_ifaddrs __P((struct in_addr **));
 
 /*
  * Nfs server daemon mostly just a user context for nfssvc()
@@ -143,7 +148,7 @@
 #endif
 	fd_set ready, sockbits;
 	int ch, cltpflag, connect_type_cnt, i, len, maxsock, msgsock;
-	int nfsdcnt, nfssvc_flag, on, reregister, sock, tcpflag, tcpsock;
+	int nfsdcnt, nfssvc_flag, on, reregister, tcpflag, tcpsock;
 	int tp4cnt, tp4flag, tpipcnt, tpipflag, udpflag;
 #ifdef notyet
 	int tp4sock, tpipsock;
@@ -375,32 +380,56 @@
 
 	/* If we are serving udp, set up the socket. */
 	if (udpflag) {
-		if ((sock = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
-			syslog(LOG_ERR, "can't create udp socket");
-			exit(1);
-		}
-		inetaddr.sin_family = AF_INET;
-		inetaddr.sin_addr.s_addr = INADDR_ANY;
-		inetaddr.sin_port = htons(NFS_PORT);
-		inetaddr.sin_len = sizeof(inetaddr);
-		if (bind(sock,
-		    (struct sockaddr *)&inetaddr, sizeof(inetaddr)) < 0) {
-			syslog(LOG_ERR, "can't bind udp addr");
-			exit(1);
+		struct in_addr *ifaddr_list;
+		int i;
+		int ifaddr_count;
+
+
+		ifaddr_count = get_ifaddrs(&ifaddr_list);
+
+		for (i = 0; i < ifaddr_count; i++) {
+			int on = 1;
+			int sock;
+
+			if ((sock = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
+				syslog(LOG_ERR, "can't create udp socket: %m");
+				exit(1);
+			}
+			if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &on,
+			    sizeof(on)) != 0) {
+				syslog(LOG_ERR, "setsockopt failed: %m");
+				exit(1);
+			}
+			bzero(&inetaddr, sizeof(inetaddr));
+			inetaddr.sin_family = AF_INET;
+			inetaddr.sin_addr.s_addr = ifaddr_list[i].s_addr;
+			inetaddr.sin_port = htons(NFS_PORT);
+			inetaddr.sin_len = sizeof(inetaddr);
+			if (bind(sock, (struct sockaddr *)&inetaddr,
+			    sizeof(inetaddr)) < 0) {
+				syslog(LOG_ERR, "can't bind udp addr %s: %m",
+				    ifaddr_list[i].s_addr == INADDR_ANY ?
+				    "INADDR_ANY" : inet_ntoa(ifaddr_list[i]));
+				exit(1);
+			}
+
+			nfsdargs.sock = sock;
+			nfsdargs.name = NULL;
+			nfsdargs.namelen = 0;
+			if (nfssvc(NFSSVC_ADDSOCK, &nfsdargs) < 0) {
+				syslog(LOG_ERR, "can't add UDP socket: %m");
+				exit(1);
+			}
+			(void)close(sock);
 		}
+
+		free(ifaddr_list);
+
 		if (!pmap_set(RPCPROG_NFS, 2, IPPROTO_UDP, NFS_PORT) ||
 		    !pmap_set(RPCPROG_NFS, 3, IPPROTO_UDP, NFS_PORT)) {
 			syslog(LOG_ERR, "can't register with udp portmap");
 			exit(1);
 		}
-		nfsdargs.sock = sock;
-		nfsdargs.name = NULL;
-		nfsdargs.namelen = 0;
-		if (nfssvc(NFSSVC_ADDSOCK, &nfsdargs) < 0) {
-			syslog(LOG_ERR, "can't Add UDP socket");
-			exit(1);
-		}
-		(void)close(sock);
 	}
 
 #ifdef ISO
@@ -671,3 +700,106 @@
 }
 #endif	/* __FreeBSD__ */
 #endif
+
+
+#define ROUNDUP(a) \
+	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
+#define ADVANCE(x) (x += ROUNDUP(((struct sockaddr *)(x))->sa_len))
+
+/*
+ * Get a list of local IP addresses, and store a pointer to the malloc'd
+ * list in *ifaddrp. The first address will be INADDR_ANY as a fallback entry.
+ * The number of entries in the list is returned.
+ */
+int get_ifaddrs(struct in_addr **ifaddrp) {
+	register struct if_msghdr *ifm;
+	register struct ifa_msghdr *ifam;
+	struct in_addr *ifaddr;
+	size_t needed;
+	int mib[6], flags = 0;
+	int adr_count;
+	char *buf, *lim, *next;
+
+	/* Create first address as wildcard */
+	ifaddr = malloc(sizeof(*ifaddr));
+	if (ifaddr == NULL) {
+		syslog(LOG_ERR, "malloc failed");
+		exit(1);
+	}
+	ifaddr->s_addr = INADDR_ANY;
+	adr_count = 1;
+
+	mib[0] = CTL_NET;
+	mib[1] = PF_ROUTE;
+	mib[2] = 0;
+	mib[3] = AF_INET;
+	mib[4] = NET_RT_IFLIST;
+	mib[5] = 0;
+	if (sysctl(mib, 6, NULL, &needed, NULL, 0) < 0) {
+		syslog(LOG_ERR, "route-sysctl-estimate failed: %m");
+		exit(1);
+	}
+	if ((buf = malloc(needed)) == NULL) {
+		syslog(LOG_ERR, "malloc failed");
+		exit(1);
+	}
+	if (sysctl(mib, 6, buf, &needed, NULL, 0) < 0) {
+		syslog(LOG_ERR, "retrieval of interface table failed: %m");
+		exit(1);
+	}
+	lim = buf + needed;
+
+	for (next = buf; next < lim; next += ifm->ifm_msglen) {
+		int i;
+		char *cp, *cplim;
+		struct sockaddr *sa;
+		struct sockaddr_in *sin;
+
+		ifm = (struct if_msghdr *)next;
+		if (ifm->ifm_type == RTM_IFINFO) {
+			flags = ifm->ifm_flags;
+			continue;
+		}
+		if ((flags & IFF_UP) == 0)
+			continue;
+		if (ifm->ifm_type != RTM_NEWADDR)
+			errx(1, "out of sync parsing NET_RT_IFLIST");
+		ifam = (struct ifa_msghdr *)ifm;
+		cp = (char *)(ifam + 1);
+		cplim = ifam->ifam_msglen + (char *)ifam;
+
+		/* Find the RTAX_IFA entry */
+		for (i = 0; i < RTAX_IFA && cp < cplim; i++)
+			if (ifam->ifam_addrs & (1 << i))
+				ADVANCE(cp);
+
+		if (i != RTAX_IFA || (ifam->ifam_addrs & (1 << RTAX_IFA)) == 0)
+			continue;
+
+		sa = (struct sockaddr *)cp;
+		if (sa->sa_family != AF_INET)
+			continue;
+
+		sin = (struct sockaddr_in *)sa;
+
+		/* Check if we've seen this address before */
+		for (i = 0; i < adr_count; i++)
+			if (ifaddr[i].s_addr == sin->sin_addr.s_addr)
+				break;
+
+		if (i < adr_count)
+			continue;
+
+		adr_count++;
+		ifaddr = realloc(ifaddr, sizeof(*ifaddr) * adr_count);
+		if (ifaddr == NULL) {
+			syslog(LOG_ERR, "realloc failed");
+			exit(1);
+		}
+		ifaddr[adr_count - 1] = sin->sin_addr;
+	}
+	free(buf);
+
+	*ifaddrp = ifaddr;
+	return adr_count;
+}

>Release-Note:
>Audit-Trail:

From: Bill Fenner <fenner@research.att.com>
To: iedowse@maths.tcd.ie
Cc: freebsd-gnats-submit@freebsd.org
Subject: Re: kern/13049: [PATCH] NFS replies with incorrect source IP
Date: Thu, 9 Sep 1999 18:30:11 -0700

 >	NFS over UDP uses just one socket to send and receive, so it can
 >	neither determine at which local IP a request was directed
 
 Can't it use IP_RECVDSTADDR?
 
 >	nor control from which IP the reply is sent.
 
 You could rebind the socket before & after sending each reply.  That's
 probably a high enough overhead that multiple sockets is the right way
 to go anyway.
 
   BIll
 
Responsible-Changed-From-To: freebsd-bugs->dillon 
Responsible-Changed-By: dillon 
Responsible-Changed-When: Mon Nov 8 10:53:12 PST 1999 
Responsible-Changed-Why:  
I'm working on a solution to the problem.  Having nfsd simply bind itself 
to every possible interface ip will blow up installations that make heavy 
use of IP aliases.  All we really need to do is add an option to nfsd to 
allow it to bind to a specific IP.  If a sysop wishes to bind to multiple 
IPs the sysop can simply run several nfsd's.  It would be very rare for a  
sysop to want to bind to all interface IPs since the situation where this 
problem tends to come up the most is where you want to bind NFS to one  
interface but not another, and protect it with ipfw. 

I will do a separate commit right now on the bug fixes submitted by the PR. 
State-Changed-From-To: open->closed 
State-Changed-By: dillon 
State-Changed-When: Thu Nov 11 09:37:10 PST 1999 
State-Changed-Why:  
Committed changes to the nfs driver in the kernel and to the nfsd utility 
to allow nfsd to be bound to specific ip addresses into current. 

From: Garrett Wollman <wollman@khavrinen.lcs.mit.edu>
To: <dillon@FreeBSD.ORG>
Cc: freebsd-gnats-submit@FreeBSD.ORG
Subject: Re: kern/13049: [PATCH] NFS replies with incorrect source IP
Date: Sun, 14 Nov 1999 20:35:15 -0500 (EST)

 <<On Mon, 8 Nov 1999 10:59:50 -0800 (PST), <dillon@FreeBSD.ORG> said:
 
 > I'm working on a solution to the problem.  Having nfsd simply bind itself
 > to every possible interface ip will blow up installations that make heavy
 > use of IP aliases.  All we really need to do is add an option to nfsd to
 > allow it to bind to a specific IP.
 
 Actually, the Right Thing (significantly harder than what you're
 suggesting) is to:
 
 1) Provide an IP_HDRINCL- or IP_RECVDSTADDR-equivalent option for
 UDP on the sending side.
 
 2) Make NFS keep track of the original destination address and use
 said option (/control message) when sending replies.
 
 The same approach will fix named, dhcpd, Kerberos' KDC and admin
 server, and doubtless many other datagram-based servers which
 currently must open and bind zillions of sockets in order to work
 around the lack of this functionality (which I believe is a bug).
 SOCK_RAW ought to work with UDP as well.
 
 -GAWollman
 
 --
 Garrett A. Wollman   | O Siem / We are all family / O Siem / We're all the same
 wollman@lcs.mit.edu  | O Siem / The fires of freedom 
 Opinions not those of| Dance in the burning flame
 MIT, LCS, CRS, or NSA|                     - Susan Aglukark and Chad Irschick
 
>Unformatted:
