From nobody@FreeBSD.org  Fri Mar  8 19:37:26 2013
Return-Path: <nobody@FreeBSD.org>
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1])
	by hub.freebsd.org (Postfix) with ESMTP id C7AE4272
	for <freebsd-gnats-submit@FreeBSD.org>; Fri,  8 Mar 2013 19:37:26 +0000 (UTC)
	(envelope-from nobody@FreeBSD.org)
Received: from red.freebsd.org (red.freebsd.org [IPv6:2001:4f8:fff6::22])
	by mx1.freebsd.org (Postfix) with ESMTP id B861FCF3
	for <freebsd-gnats-submit@FreeBSD.org>; Fri,  8 Mar 2013 19:37:26 +0000 (UTC)
Received: from red.freebsd.org (localhost [127.0.0.1])
	by red.freebsd.org (8.14.5/8.14.5) with ESMTP id r28JbPoT085971
	for <freebsd-gnats-submit@FreeBSD.org>; Fri, 8 Mar 2013 19:37:25 GMT
	(envelope-from nobody@red.freebsd.org)
Received: (from nobody@localhost)
	by red.freebsd.org (8.14.5/8.14.5/Submit) id r28JbPd4085970;
	Fri, 8 Mar 2013 19:37:25 GMT
	(envelope-from nobody)
Message-Id: <201303081937.r28JbPd4085970@red.freebsd.org>
Date: Fri, 8 Mar 2013 19:37:25 GMT
From: Kajetan Staszkiewicz <vegeta@tuxpowered.net>
To: freebsd-gnats-submit@FreeBSD.org
Subject: Removing pf Source entries locks kernel.
X-Send-Pr-Version: www-3.1
X-GNATS-Notify:

>Number:         176763
>Category:       kern
>Synopsis:       [pf] [patch] Removing pf Source entries locks kernel.
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    glebius
>State:          closed
>Quarter:        
>Keywords:       
>Date-Required:  
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Fri Mar 08 19:40:01 UTC 2013
>Closed-Date:    Wed Jan 22 10:29:33 UTC 2014
>Last-Modified:  Wed Jan 22 10:30:00 UTC 2014
>Originator:     Kajetan Staszkiewicz
>Release:        9.1-RELEASE
>Organization:
InnoGames GmbH
>Environment:
FreeBSD xxxxxxx 9.1-RELEASE FreeBSD 9.1-RELEASE #10 r247265M: Mon Feb 25 14:58:39 CET 2013     root@xxxxxxx:/usr/obj/usr/src/sys/IGLB3  amd64
>Description:
The case will happen on a FreeBSD router with large amount of pf State and Source entries. Use pfctl to remove some Sources. For each matching source whole State table is searched element by element.

With hundreds of matching Sources (and hundreds of thousands of States to search through) this can freeze the kernel for a few seconds. Under some conditions (e.g. a DDoS attack hitting the IP address for which the Source entries will be removed), kernel will freeze permanently.
>How-To-Repeat:
`pfctl -K`
>Fix:
Create a list of State entries and attach it to Source. This way only this short list must be searched when Source is removed. List is maintained during State creation and removal.

Patch attached with submission follows:

--- sys/contrib/pf/net/pf.c.orig	2013-03-05 11:27:01.000000000 +0100
+++ sys/contrib/pf/net/pf.c	2013-03-08 14:14:39.000000000 +0100
@@ -1517,6 +1517,19 @@
 	u_int32_t timeout;
 
 	if (s->src_node != NULL) {
+
+		/* Remove this pf_state from the list of states linked to pf_src_node */
+		if (s->prev_state) /* not the first pf_state in list */
+			s->prev_state->next_state = s->next_state;
+		else /* the fist pf_state in the list, modify list head in pf_src_node */
+			s->src_node->linked_states = s->next_state;
+	
+		if (s->next_state) /* not the last pf_state in list */
+			s->next_state->prev_state = s->prev_state;
+
+		s->prev_state = NULL;
+		s->next_state = NULL;
+
 		if (s->src.tcp_est)
 			--s->src_node->conn;
 		if (--s->src_node->states <= 0) {
@@ -1532,6 +1545,19 @@
 		}
 	}
 	if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
+
+		/* Remove this pf_state from the list of states linked to pf_src_node */
+		if (s->prev_state) /* not the first pf_state in list */
+			s->prev_state->next_state = s->next_state;
+		else /* the fist pf_state in the list, modify list head in pf_src_node */
+			s->nat_src_node->linked_states = s->next_state;
+	
+		if (s->next_state) /* not the last pf_state in list */
+			s->next_state->prev_state = s->prev_state;
+
+		s->prev_state = NULL;
+		s->next_state = NULL;
+
 		if (--s->nat_src_node->states <= 0) {
 			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
 			if (!timeout)
@@ -3895,12 +3921,24 @@
 	if (sn != NULL) {
 		s->src_node = sn;
 		s->src_node->states++;
+
+		/* attach this state to head of list */
+		s->next_state = sn->linked_states;
+		if (s->next_state)
+			s->next_state->prev_state = s;
+		sn->linked_states = s;
 	}
 	if (nsn != NULL) {
 		/* XXX We only modify one side for now. */
 		PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af);
 		s->nat_src_node = nsn;
 		s->nat_src_node->states++;
+
+		/* attach this state to head of list */
+		s->next_state = nsn->linked_states;
+		if (s->next_state)
+			s->next_state->prev_state = s;
+		nsn->linked_states = s;
 	}
 	if (pd->proto == IPPROTO_TCP) {
 		if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
--- sys/contrib/pf/net/pf_ioctl.c.orig	2013-03-05 11:26:44.000000000 +0100
+++ sys/contrib/pf/net/pf_ioctl.c	2013-03-08 14:10:08.000000000 +0100
@@ -3790,6 +3790,7 @@
 	case DIOCKILLSRCNODES: {
 		struct pf_src_node	*sn;
 		struct pf_state		*s;
+		struct pf_state		**pns; /* pointer to next_state of previous state */
 		struct pfioc_src_node_kill *psnk =
 		    (struct pfioc_src_node_kill *)addr;
 		u_int			killed = 0;
@@ -3808,20 +3809,17 @@
 				&psnk->psnk_dst.addr.v.a.mask,
 				&sn->raddr, sn->af)) {
 				/* Handle state to src_node linkage */
-				if (sn->states != 0) {
-					RB_FOREACH(s, pf_state_tree_id,
-#ifdef __FreeBSD__
-					    &V_tree_id) {
-#else
-					    &tree_id) {
-#endif
-						if (s->src_node == sn)
-							s->src_node = NULL;
-						if (s->nat_src_node == sn)
-							s->nat_src_node = NULL;
-					}
-					sn->states = 0;
+				s = NULL; /* make gcc happy */
+				pns = &sn->linked_states;
+				for (s = sn->linked_states; s != NULL; s = s->next_state) {
+					s->src_node = NULL;
+					s->nat_src_node = NULL;
+					*pns = NULL;
+					s->prev_state = NULL;
+					pns = &s->next_state;
 				}
+				*pns = NULL;
+				sn->states = 0;
 				sn->expire = 1;
 				killed++;
 			}
--- sys/contrib/pf/net/pfvar.h.orig	2013-03-05 11:27:14.000000000 +0100
+++ sys/contrib/pf/net/pfvar.h	2013-03-08 11:02:29.000000000 +0100
@@ -748,6 +748,7 @@
 	u_int32_t	 expire;
 	sa_family_t	 af;
 	u_int8_t	 ruletype;
+	struct pf_state  *linked_states;
 };
 
 #define PFSNODE_HIWAT		10000	/* default source node table size */
@@ -852,6 +853,8 @@
 	struct pfi_kif		*rt_kif;
 	struct pf_src_node	*src_node;
 	struct pf_src_node	*nat_src_node;
+	struct pf_state		*prev_state;
+	struct pf_state		*next_state;
 	u_int64_t		 packets[2];
 	u_int64_t		 bytes[2];
 	u_int32_t		 creation;


>Release-Note:
>Audit-Trail:
Responsible-Changed-From-To: freebsd-bugs->freebsd-pf 
Responsible-Changed-By: linimon 
Responsible-Changed-When: Sun Mar 10 04:31:13 UTC 2013 
Responsible-Changed-Why:  
Over to maintainer(s). 

http://www.freebsd.org/cgi/query-pr.cgi?pr=176763 

From: Kajetan Staszkiewicz <vegeta@tuxpowered.net>
To: bug-followup@freebsd.org
Cc:  
Subject: Re: kern/176763: [pf] [patch] Removing pf Source entries locks kernel.
Date: Mon, 18 Nov 2013 18:12:36 +0100

 --Boundary-00=_EqkiSNwqoUJOzB0
 Content-Type: Text/Plain;
   charset="us-ascii"
 Content-Transfer-Encoding: 7bit
 
 The attached patch is for FreeBSD 10. It adds a new parameter "-c" to pfctl 
 which when killing src_nodes, also kills states linked to the found nodes.
 
 -- 
 | pozdrawiam / greetings | powered by Debian, FreeBSD and CentOS |
 |  Kajetan Staszkiewicz  | jabber,email: vegeta()tuxpowered net  |
 |        Vegeta          | www: http://vegeta.tuxpowered.net     |
 `------------------------^---------------------------------------'
 
 --Boundary-00=_EqkiSNwqoUJOzB0
 Content-Type: text/x-patch;
   charset="UTF-8";
   name="link-states-to-src-nodes.patch"
 Content-Transfer-Encoding: 7bit
 Content-Disposition: attachment;
 	filename="link-states-to-src-nodes.patch"
 
 # Removing src_nodes causes the list of states to be fully searched through
 # to find ones linked to the given src_node. With large amount of src_nodes
 # and states (for example when under a DDoS attack) this operation can take
 # many seconds to complete.
 #
 # Provide a list of states linked to each src_node and use the list to make
 # the operation faster. Add new parameter "-c" to pfctl which, when
 # killing src_nodes, also kills states linked to found nodes.
 #
 # kajetan.staszkiewicz@innogames.de
 # Work sponsored by InnoGames GmbH
 #
 diff --git a/sbin/pfctl/pfctl.8 b/sbin/pfctl/pfctl.8
 index 5c0e7b3..61c5711 100644
 --- a/sbin/pfctl/pfctl.8
 +++ b/sbin/pfctl/pfctl.8
 @@ -42,7 +42,8 @@
  .Op Fl F Ar modifier
  .Op Fl f Ar file
  .Op Fl i Ar interface
 -.Op Fl K Ar host | network
 +.Oo Fl K Ar host | network
 +.Op Fl c Oc
  .Xo
  .Oo Fl k
  .Ar host | network | label | id
 @@ -189,6 +190,10 @@ as the anchor name:
  .Bd -literal -offset indent
  # pfctl -a '*' -sr
  .Ed
 +.It Fl c
 +When removing source tracking entries, remove state entries linked to
 +them. This option can be only used in conjunction with
 +.Fl K .
  .It Fl D Ar macro Ns = Ns Ar value
  Define
  .Ar macro
 diff --git a/sbin/pfctl/pfctl.c b/sbin/pfctl/pfctl.c
 index 90a2bb5..6a2dd90 100644
 --- a/sbin/pfctl/pfctl.c
 +++ b/sbin/pfctl/pfctl.c
 @@ -236,7 +236,7 @@ usage(void)
  
  	fprintf(stderr, "usage: %s [-AdeghmNnOPqRrvz] ", __progname);
  	fprintf(stderr, "[-a anchor] [-D macro=value] [-F modifier]\n");
 -	fprintf(stderr, "\t[-f file] [-i interface] [-K host | network]\n");
 +	fprintf(stderr, "\t[-f file] [-i interface] [-K host | network [-c]]\n");
  	fprintf(stderr, "\t[-k host | network | label | id] ");
  	fprintf(stderr, "[-o level] [-p device]\n");
  	fprintf(stderr, "\t[-s modifier] ");
 @@ -449,10 +449,10 @@ pfctl_kill_src_nodes(int dev, const char *iface, int opts)
  	struct pfioc_src_node_kill psnk;
  	struct addrinfo *res[2], *resp[2];
  	struct sockaddr last_src, last_dst;
 -	int killed, sources, dests;
 +	int killed, killed_states, sources, dests;
  	int ret_ga;
  
 -	killed = sources = dests = 0;
 +	killed = killed_states = sources = dests = 0;
  
  	memset(&psnk, 0, sizeof(psnk));
  	memset(&psnk.psnk_src.addr.v.a.mask, 0xff,
 @@ -462,6 +462,8 @@ pfctl_kill_src_nodes(int dev, const char *iface, int opts)
  
  	pfctl_addrprefix(src_node_kill[0], &psnk.psnk_src.addr.v.a.mask);
  
 +	psnk.psnk_kill_linked_states = opts & PF_OPT_KILLLINKEDSTATES;
 +
  	if ((ret_ga = getaddrinfo(src_node_kill[0], NULL, NULL, &res[0]))) {
  		errx(1, "getaddrinfo: %s", gai_strerror(ret_ga));
  		/* NOTREACHED */
 @@ -529,20 +531,23 @@ pfctl_kill_src_nodes(int dev, const char *iface, int opts)
  				if (ioctl(dev, DIOCKILLSRCNODES, &psnk))
  					err(1, "DIOCKILLSRCNODES");
  				killed += psnk.psnk_killed;
 +				killed_states += psnk.psnk_killed_states;
  			}
  			freeaddrinfo(res[1]);
  		} else {
  			if (ioctl(dev, DIOCKILLSRCNODES, &psnk))
  				err(1, "DIOCKILLSRCNODES");
  			killed += psnk.psnk_killed;
 +			killed_states += psnk.psnk_killed_states;
  		}
  	}
  
  	freeaddrinfo(res[0]);
  
  	if ((opts & PF_OPT_QUIET) == 0)
 -		fprintf(stderr, "killed %d src nodes from %d sources and %d "
 -		    "destinations\n", killed, sources, dests);
 +		fprintf(stderr, "killed %d src nodes and %d linked states "
 +		    "from %d sources and %d destinations\n",
 +		    killed, killed_states, sources, dests);
  	return (0);
  }
  
 @@ -2002,11 +2007,14 @@ main(int argc, char *argv[])
  		usage();
  
  	while ((ch = getopt(argc, argv,
 -	    "a:AdD:eqf:F:ghi:k:K:mnNOo:Pp:rRs:t:T:vx:z")) != -1) {
 +	    "a:AcdD:eqf:F:ghi:k:K:mnNOo:Pp:rRs:t:T:vx:z")) != -1) {
  		switch (ch) {
  		case 'a':
  			anchoropt = optarg;
  			break;
 +		case 'c':
 +			opts |= PF_OPT_KILLLINKEDSTATES;
 +			break;
  		case 'd':
  			opts |= PF_OPT_DISABLE;
  			mode = O_RDWR;
 diff --git a/sbin/pfctl/pfctl_parser.h b/sbin/pfctl/pfctl_parser.h
 index 4560d66..b272b0b 100644
 --- a/sbin/pfctl/pfctl_parser.h
 +++ b/sbin/pfctl/pfctl_parser.h
 @@ -51,6 +51,7 @@
  #define PF_OPT_NUMERIC		0x1000
  #define PF_OPT_MERGE		0x2000
  #define PF_OPT_RECURSE		0x4000
 +#define PF_OPT_KILLLINKEDSTATES	0x8000
  
  #define PF_TH_ALL		0xFF
  
 diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
 index c16591b..e5395e3 100644
 --- a/sys/net/pfvar.h
 +++ b/sys/net/pfvar.h
 @@ -697,6 +697,7 @@ struct pf_threshold {
  
  struct pf_src_node {
  	LIST_ENTRY(pf_src_node) entry;
 +	TAILQ_HEAD(, pf_state)	state_list;
  	struct pf_addr	 addr;
  	struct pf_addr	 raddr;
  	union pf_rule_ptr rule;
 @@ -787,6 +788,7 @@ struct pf_state {
  	TAILQ_ENTRY(pf_state)	 sync_list;
  	TAILQ_ENTRY(pf_state)	 key_list[2];
  	LIST_ENTRY(pf_state)	 entry;
 +	TAILQ_ENTRY(pf_state)	 srcnode_link;
  	struct pf_state_peer	 src;
  	struct pf_state_peer	 dst;
  	union pf_rule_ptr	 rule;
 @@ -1445,6 +1447,8 @@ struct pfioc_src_node_kill {
  	struct pf_rule_addr psnk_src;
  	struct pf_rule_addr psnk_dst;
  	u_int		    psnk_killed;
 +	u_int		    psnk_killed_states;
 +	u_int		    psnk_kill_linked_states;
  };
  
  struct pfioc_state_kill {
 diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
 index 2de8c40..9da73c5 100644
 --- a/sys/netpfil/pf/pf.c
 +++ b/sys/netpfil/pf/pf.c
 @@ -652,6 +652,8 @@ pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
  		    rule->max_src_conn_rate.limit,
  		    rule->max_src_conn_rate.seconds);
  
 +		TAILQ_INIT(&(*sn)->state_list);
 +
  		(*sn)->af = af;
  		(*sn)->rule.ptr = rule;
  		PF_ACPY(&(*sn)->addr, src, af);
 @@ -1482,6 +1484,7 @@ static void
  pf_src_tree_remove_state(struct pf_state *s)
  {
  	u_int32_t timeout;
 +	struct pf_srchash *sh = NULL;
  
  	if (s->src_node != NULL) {
  		if (s->src.tcp_est)
 @@ -1493,6 +1496,12 @@ pf_src_tree_remove_state(struct pf_state *s)
  				    V_pf_default_rule.timeout[PFTM_SRC_NODE];
  			s->src_node->expire = time_uptime + timeout;
  		}
 +		sh = &V_pf_srchash[pf_hashsrc(&s->src_node->addr, s->src_node->af)];
 +		PF_HASHROW_LOCK(sh);
 +		if (!TAILQ_EMPTY(&s->src_node->state_list))
 +			TAILQ_REMOVE(&s->src_node->state_list, s, srcnode_link);
 +		PF_HASHROW_UNLOCK(sh);
 +
  	}
  	if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
  		if (--s->nat_src_node->states <= 0) {
 @@ -1502,6 +1511,11 @@ pf_src_tree_remove_state(struct pf_state *s)
  				    V_pf_default_rule.timeout[PFTM_SRC_NODE];
  			s->nat_src_node->expire = time_uptime + timeout;
  		}
 +		sh = &V_pf_srchash[pf_hashsrc(&s->nat_src_node->addr, s->nat_src_node->af)];
 +		PF_HASHROW_LOCK(sh);
 +		if (!TAILQ_EMPTY(&s->nat_src_node->state_list))
 +			TAILQ_REMOVE(&s->nat_src_node->state_list, s, srcnode_link);
 +		PF_HASHROW_UNLOCK(sh);
  	}
  	s->src_node = s->nat_src_node = NULL;
  }
 @@ -3407,6 +3421,7 @@ pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a,
      int tag, u_int16_t bproto_sum, u_int16_t bip_sum, int hdrlen)
  {
  	struct pf_state		*s = NULL;
 +	struct pf_srchash	*sh = NULL;
  	struct pf_src_node	*sn = NULL;
  	struct tcphdr		*th = pd->hdr.tcp;
  	u_int16_t		 mss = V_tcp_mssdflt;
 @@ -3505,14 +3520,22 @@ pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a,
  	s->expire = time_uptime;
  
  	if (sn != NULL) {
 +		sh = &V_pf_srchash[pf_hashsrc(&sn->addr, sn->af)];
 +		PF_HASHROW_LOCK(sh);
  		s->src_node = sn;
  		s->src_node->states++;
 +		TAILQ_INSERT_HEAD(&sn->state_list, s, srcnode_link);
 +		PF_HASHROW_UNLOCK(sh);
  	}
  	if (nsn != NULL) {
  		/* XXX We only modify one side for now. */
 +		sh = &V_pf_srchash[pf_hashsrc(&nsn->addr, nsn->af)];
 +		PF_HASHROW_LOCK(sh);
  		PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af);
  		s->nat_src_node = nsn;
  		s->nat_src_node->states++;
 +		TAILQ_INSERT_HEAD(&nsn->state_list, s, srcnode_link);
 +		PF_HASHROW_UNLOCK(sh);
  	}
  	if (pd->proto == IPPROTO_TCP) {
  		if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
 diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c
 index 2b0f2cd..0267ef0 100644
 --- a/sys/netpfil/pf/pf_ioctl.c
 +++ b/sys/netpfil/pf/pf_ioctl.c
 @@ -150,7 +150,8 @@ struct cdev *pf_dev;
   */
  static void		 pf_clear_states(void);
  static int		 pf_clear_tables(void);
 -static void		 pf_clear_srcnodes(struct pf_src_node *);
 +static u_int32_t	 pf_clear_srcnodes(struct pf_src_node *,
 +    int kill_states);
  static void		 pf_tbladdr_copyout(struct pf_addr_wrap *);
  
  /*
 @@ -3134,7 +3135,7 @@ DIOCCHANGEADDR_error:
  
  	case DIOCCLRSRCNODES: {
  
 -		pf_clear_srcnodes(NULL);
 +		pf_clear_srcnodes(NULL, 0);
  		pf_purge_expired_src_nodes();
  		V_pf_status.src_nodes = 0;
  		break;
 @@ -3145,7 +3146,7 @@ DIOCCHANGEADDR_error:
  		    (struct pfioc_src_node_kill *)addr;
  		struct pf_srchash	*sh;
  		struct pf_src_node	*sn;
 -		u_int			i, killed = 0;
 +		u_int			i, killed = 0, killed_states = 0;
  
  		for (i = 0, sh = V_pf_srchash; i < V_pf_srchashmask;
  		    i++, sh++) {
 @@ -3166,7 +3167,7 @@ DIOCCHANGEADDR_error:
  				&sn->raddr, sn->af)) {
  				/* Handle state to src_node linkage */
  				if (sn->states != 0)
 -					pf_clear_srcnodes(sn);
 +					killed_states += pf_clear_srcnodes(sn,  psnk->psnk_kill_linked_states);
  				sn->expire = 1;
  				killed++;
  			}
 @@ -3177,6 +3178,7 @@ DIOCCHANGEADDR_error:
  			pf_purge_expired_src_nodes();
  
  		psnk->psnk_killed = killed;
 +		psnk->psnk_killed_states = killed_states;
  		break;
  	}
  
 @@ -3360,24 +3362,12 @@ pf_clear_tables(void)
  	return (error);
  }
  
 -static void
 -pf_clear_srcnodes(struct pf_src_node *n)
 +static u_int32_t
 +pf_clear_srcnodes(struct pf_src_node *n, int kill_states)
  {
  	struct pf_state *s;
  	int i;
 -
 -	for (i = 0; i <= V_pf_hashmask; i++) {
 -		struct pf_idhash *ih = &V_pf_idhash[i];
 -
 -		PF_HASHROW_LOCK(ih);
 -		LIST_FOREACH(s, &ih->states, entry) {
 -			if (n == NULL || n == s->src_node)
 -				s->src_node = NULL;
 -			if (n == NULL || n == s->nat_src_node)
 -				s->nat_src_node = NULL;
 -		}
 -		PF_HASHROW_UNLOCK(ih);
 -	}
 +	int killed_states = 0;
  
  	if (n == NULL) {
  		struct pf_srchash *sh;
 @@ -3386,6 +3376,19 @@ pf_clear_srcnodes(struct pf_src_node *n)
  		    i++, sh++) {
  			PF_HASHROW_LOCK(sh);
  			LIST_FOREACH(n, &sh->nodes, entry) {
 +				while (!TAILQ_EMPTY(&n->state_list)) {
 +					s = TAILQ_FIRST(&n->state_list);
 +					if (kill_states) {
 +						pf_unlink_state(s, 0);
 +						killed_states++;
 +					} else {
 +						PF_STATE_LOCK(s);
 +						TAILQ_REMOVE(&n->state_list, s, srcnode_link);
 +						s->src_node = NULL;
 +						s->nat_src_node = NULL;
 +						PF_STATE_UNLOCK(s);
 +					}
 +				}
  				n->expire = 1;
  				n->states = 0;
  			}
 @@ -3393,9 +3396,24 @@ pf_clear_srcnodes(struct pf_src_node *n)
  		}
  	} else {
  		/* XXX: hash slot should already be locked here. */
 +		while (!TAILQ_EMPTY(&n->state_list)) {
 +			s = TAILQ_FIRST(&n->state_list);
 +			if (kill_states) {
 +				pf_unlink_state(s, 0);
 +				killed_states++;
 +			} else {
 +				PF_STATE_LOCK(s);
 +				TAILQ_REMOVE(&n->state_list, s, srcnode_link);
 +				s->src_node = NULL;
 +				s->nat_src_node = NULL;
 +				PF_STATE_UNLOCK(s);
 +			}
 +		}
  		n->expire = 1;
  		n->states = 0;
  	}
 +
 +	return killed_states;
  }
  /*
   * XXX - Check for version missmatch!!!
 @@ -3459,7 +3477,7 @@ shutdown_pf(void)
  
  		pf_clear_states();
  
 -		pf_clear_srcnodes(NULL);
 +		pf_clear_srcnodes(NULL, 0);
  
  		/* status does not use malloced mem so no need to cleanup */
  		/* fingerprints and interfaces have thier own cleanup code */
 
 --Boundary-00=_EqkiSNwqoUJOzB0--
State-Changed-From-To: open->patched 
State-Changed-By: glebius 
State-Changed-When: Fri Nov 22 19:14:57 UTC 2013 
State-Changed-Why:  
Fixed in head, thanks! 


Responsible-Changed-From-To: freebsd-pf->glebius 
Responsible-Changed-By: glebius 
Responsible-Changed-When: Fri Nov 22 19:14:57 UTC 2013 
Responsible-Changed-Why:  
Fixed in head, thanks! 

http://www.freebsd.org/cgi/query-pr.cgi?pr=176763 

From: dfilter@FreeBSD.ORG (dfilter service)
To: bug-followup@FreeBSD.org
Cc:  
Subject: Re: kern/176763: commit references a PR
Date: Fri, 22 Nov 2013 19:22:33 +0000 (UTC)

 Author: glebius
 Date: Fri Nov 22 19:22:26 2013
 New Revision: 258480
 URL: http://svnweb.freebsd.org/changeset/base/258480
 
 Log:
   The DIOCKILLSRCNODES operation was implemented with O(m*n) complexity,
   where "m" is number of source nodes and "n" is number of states. Thus,
   on heavy loaded router its processing consumed a lot of CPU time.
   
   Reimplement it with O(m+n) complexity. We first scan through source
   nodes and disconnect matching ones, putting them on the freelist and
   marking with a cookie value in their expire field. Then we scan through
   the states, detecting references to source nodes with a cookie, and
   disconnect them as well. Then the freelist is passed to pf_free_src_nodes().
   
   In collaboration with:	Kajetan Staszkiewicz <kajetan.staszkiewicz innogames.de>
   PR:		kern/176763
   Sponsored by:	InnoGames GmbH
   Sponsored by:	Nginx, Inc.
 
 Modified:
   head/sys/netpfil/pf/pf_ioctl.c
 
 Modified: head/sys/netpfil/pf/pf_ioctl.c
 ==============================================================================
 --- head/sys/netpfil/pf/pf_ioctl.c	Fri Nov 22 19:16:34 2013	(r258479)
 +++ head/sys/netpfil/pf/pf_ioctl.c	Fri Nov 22 19:22:26 2013	(r258480)
 @@ -155,6 +155,7 @@ struct cdev *pf_dev;
  static void		 pf_clear_states(void);
  static int		 pf_clear_tables(void);
  static void		 pf_clear_srcnodes(struct pf_src_node *);
 +static void		 pf_kill_srcnodes(struct pfioc_src_node_kill *);
  static void		 pf_tbladdr_copyout(struct pf_addr_wrap *);
  
  /*
 @@ -3143,45 +3144,9 @@ DIOCCHANGEADDR_error:
  		break;
  	}
  
 -	case DIOCKILLSRCNODES: {
 -		struct pfioc_src_node_kill *psnk =
 -		    (struct pfioc_src_node_kill *)addr;
 -		struct pf_srchash	*sh;
 -		struct pf_src_node	*sn;
 -		u_int			i, killed = 0;
 -
 -		for (i = 0, sh = V_pf_srchash; i < V_pf_srchashmask;
 -		    i++, sh++) {
 -		    /*
 -		     * XXXGL: we don't ever acquire sources hash lock
 -		     * but if we ever do, the below call to pf_clear_srcnodes()
 -		     * would lead to a LOR.
 -		     */
 -		    PF_HASHROW_LOCK(sh);
 -		    LIST_FOREACH(sn, &sh->nodes, entry)
 -			if (PF_MATCHA(psnk->psnk_src.neg,
 -				&psnk->psnk_src.addr.v.a.addr,
 -				&psnk->psnk_src.addr.v.a.mask,
 -				&sn->addr, sn->af) &&
 -			    PF_MATCHA(psnk->psnk_dst.neg,
 -				&psnk->psnk_dst.addr.v.a.addr,
 -				&psnk->psnk_dst.addr.v.a.mask,
 -				&sn->raddr, sn->af)) {
 -				/* Handle state to src_node linkage */
 -				if (sn->states != 0)
 -					pf_clear_srcnodes(sn);
 -				sn->expire = 1;
 -				killed++;
 -			}
 -		    PF_HASHROW_UNLOCK(sh);
 -		}
 -
 -		if (killed > 0)
 -			pf_purge_expired_src_nodes();
 -
 -		psnk->psnk_killed = killed;
 +	case DIOCKILLSRCNODES:
 +		pf_kill_srcnodes((struct pfioc_src_node_kill *)addr);
  		break;
 -	}
  
  	case DIOCSETHOSTID: {
  		u_int32_t	*hostid = (u_int32_t *)addr;
 @@ -3400,6 +3365,59 @@ pf_clear_srcnodes(struct pf_src_node *n)
  		n->states = 0;
  	}
  }
 +
 +static void
 +pf_kill_srcnodes(struct pfioc_src_node_kill *psnk)
 +{
 +	struct pf_src_node_list	 kill;
 +
 +	LIST_INIT(&kill);
 +	for (int i = 0; i <= V_pf_srchashmask; i++) {
 +		struct pf_srchash *sh = &V_pf_srchash[i];
 +		struct pf_src_node *sn, *tmp;
 +
 +		PF_HASHROW_LOCK(sh);
 +		LIST_FOREACH_SAFE(sn, &sh->nodes, entry, tmp)
 +			if (PF_MATCHA(psnk->psnk_src.neg,
 +			      &psnk->psnk_src.addr.v.a.addr,
 +			      &psnk->psnk_src.addr.v.a.mask,
 +			      &sn->addr, sn->af) &&
 +			    PF_MATCHA(psnk->psnk_dst.neg,
 +			      &psnk->psnk_dst.addr.v.a.addr,
 +			      &psnk->psnk_dst.addr.v.a.mask,
 +			      &sn->raddr, sn->af)) {
 +				pf_unlink_src_node_locked(sn);
 +				LIST_INSERT_HEAD(&kill, sn, entry);
 +				sn->expire = 1;
 +			}
 +		PF_HASHROW_UNLOCK(sh);
 +	}
 +
 +	for (int i = 0; i <= V_pf_hashmask; i++) {
 +		struct pf_idhash *ih = &V_pf_idhash[i];
 +		struct pf_state *s;
 +
 +		PF_HASHROW_LOCK(ih);
 +		LIST_FOREACH(s, &ih->states, entry) {
 +			if (s->src_node && s->src_node->expire == 1) {
 +#ifdef INVARIANTS
 +				s->src_node->states--;
 +#endif
 +				s->src_node = NULL;
 +			}
 +			if (s->nat_src_node && s->nat_src_node->expire == 1) {
 +#ifdef INVARIANTS
 +				s->nat_src_node->states--;
 +#endif
 +				s->nat_src_node = NULL;
 +			}
 +		}
 +		PF_HASHROW_UNLOCK(ih);
 +	}
 +
 +	psnk->psnk_killed = pf_free_src_nodes(&kill);
 +}
 +
  /*
   * XXX - Check for version missmatch!!!
   */
 _______________________________________________
 svn-src-all@freebsd.org mailing list
 http://lists.freebsd.org/mailman/listinfo/svn-src-all
 To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
 
State-Changed-From-To: patched->closed 
State-Changed-By: glebius 
State-Changed-When: Wed Jan 22 10:29:18 UTC 2014 
State-Changed-Why:  
Merged to 10.0-STABLE. 

http://www.freebsd.org/cgi/query-pr.cgi?pr=176763 

From: dfilter@FreeBSD.ORG (dfilter service)
To: bug-followup@FreeBSD.org
Cc:  
Subject: Re: kern/176763: commit references a PR
Date: Wed, 22 Jan 2014 10:29:24 +0000 (UTC)

 Author: glebius
 Date: Wed Jan 22 10:29:15 2014
 New Revision: 261019
 URL: http://svnweb.freebsd.org/changeset/base/261019
 
 Log:
   Merge r258478, r258479, r258480, r259719: fixes related to mass source
   nodes removal.
   
   PR:		176763
 
 Modified:
   stable/10/sys/net/pfvar.h
   stable/10/sys/netpfil/pf/pf.c
   stable/10/sys/netpfil/pf/pf_ioctl.c
 Directory Properties:
   stable/10/   (props changed)
 
 Modified: stable/10/sys/net/pfvar.h
 ==============================================================================
 --- stable/10/sys/net/pfvar.h	Wed Jan 22 10:18:25 2014	(r261018)
 +++ stable/10/sys/net/pfvar.h	Wed Jan 22 10:29:15 2014	(r261019)
 @@ -1643,8 +1643,9 @@ struct pf_ifspeed {
  #define	DIOCGIFSPEED	_IOWR('D', 92, struct pf_ifspeed)
  
  #ifdef _KERNEL
 +LIST_HEAD(pf_src_node_list, pf_src_node);
  struct pf_srchash {
 -	LIST_HEAD(, pf_src_node)	nodes;
 +	struct pf_src_node_list		nodes;
  	struct mtx			lock;
  };
  
 @@ -1750,8 +1751,11 @@ pf_release_state(struct pf_state *s)
  extern struct pf_state		*pf_find_state_byid(uint64_t, uint32_t);
  extern struct pf_state		*pf_find_state_all(struct pf_state_key_cmp *,
  				    u_int, int *);
 -struct pf_src_node		*pf_find_src_node(struct pf_addr *, struct pf_rule *,
 -				    sa_family_t, int);
 +extern struct pf_src_node	*pf_find_src_node(struct pf_addr *,
 +				    struct pf_rule *, sa_family_t, int);
 +extern void			 pf_unlink_src_node(struct pf_src_node *);
 +extern void			 pf_unlink_src_node_locked(struct pf_src_node *);
 +extern u_int			 pf_free_src_nodes(struct pf_src_node_list *);
  extern void			 pf_print_state(struct pf_state *);
  extern void			 pf_print_flags(u_int8_t);
  extern u_int16_t		 pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t,
 
 Modified: stable/10/sys/netpfil/pf/pf.c
 ==============================================================================
 --- stable/10/sys/netpfil/pf/pf.c	Wed Jan 22 10:18:25 2014	(r261018)
 +++ stable/10/sys/netpfil/pf/pf.c	Wed Jan 22 10:29:15 2014	(r261019)
 @@ -673,20 +673,53 @@ pf_insert_src_node(struct pf_src_node **
  	return (0);
  }
  
 -static void
 -pf_remove_src_node(struct pf_src_node *src)
 +void
 +pf_unlink_src_node_locked(struct pf_src_node *src)
  {
 +#ifdef INVARIANTS
  	struct pf_srchash *sh;
  
  	sh = &V_pf_srchash[pf_hashsrc(&src->addr, src->af)];
 -	PF_HASHROW_LOCK(sh);
 +	PF_HASHROW_ASSERT(sh);
 +#endif
  	LIST_REMOVE(src, entry);
 -	PF_HASHROW_UNLOCK(sh);
 -
 +	if (src->rule.ptr)
 +		src->rule.ptr->src_nodes--;
  	V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
  	V_pf_status.src_nodes--;
 +}
  
 -	uma_zfree(V_pf_sources_z, src);
 +void
 +pf_unlink_src_node(struct pf_src_node *src)
 +{
 +	struct pf_srchash *sh;
 +
 +	sh = &V_pf_srchash[pf_hashsrc(&src->addr, src->af)];
 +	PF_HASHROW_LOCK(sh);
 +	pf_unlink_src_node_locked(src);
 +	PF_HASHROW_UNLOCK(sh);
 +}
 +
 +static void
 +pf_free_src_node(struct pf_src_node *sn)
 +{
 +
 +	KASSERT(sn->states == 0, ("%s: %p has refs", __func__, sn));
 +	uma_zfree(V_pf_sources_z, sn);
 +}
 +
 +u_int
 +pf_free_src_nodes(struct pf_src_node_list *head)
 +{
 +	struct pf_src_node *sn, *tmp;
 +	u_int count = 0;
 +
 +	LIST_FOREACH_SAFE(sn, head, entry, tmp) {
 +		pf_free_src_node(sn);
 +		count++;
 +	}
 +
 +	return (count);
  }
  
  /* Data storage structures initialization. */
 @@ -1456,24 +1489,24 @@ pf_state_expires(const struct pf_state *
  void
  pf_purge_expired_src_nodes()
  {
 +	struct pf_src_node_list	 freelist;
  	struct pf_srchash	*sh;
  	struct pf_src_node	*cur, *next;
  	int i;
  
 +	LIST_INIT(&freelist);
  	for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) {
  	    PF_HASHROW_LOCK(sh);
  	    LIST_FOREACH_SAFE(cur, &sh->nodes, entry, next)
  		if (cur->states == 0 && cur->expire <= time_uptime) {
 -			if (cur->rule.ptr != NULL)
 -				cur->rule.ptr->src_nodes--;
 -			LIST_REMOVE(cur, entry);
 -			V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
 -			V_pf_status.src_nodes--;
 -			uma_zfree(V_pf_sources_z, cur);
 +			pf_unlink_src_node_locked(cur);
 +			LIST_INSERT_HEAD(&freelist, cur, entry);
  		} else if (cur->rule.ptr != NULL)
  			cur->rule.ptr->rule_flag |= PFRULE_REFS;
  	    PF_HASHROW_UNLOCK(sh);
  	}
 +
 +	pf_free_src_nodes(&freelist);
  }
  
  static void
 @@ -3609,11 +3642,15 @@ csfailed:
  	if (nk != NULL)
  		uma_zfree(V_pf_state_key_z, nk);
  
 -	if (sn != NULL && sn->states == 0 && sn->expire == 0)
 -		pf_remove_src_node(sn);
 +	if (sn != NULL && sn->states == 0 && sn->expire == 0) {
 +		pf_unlink_src_node(sn);
 +		pf_free_src_node(sn);
 +	}
  
 -	if (nsn != sn && nsn != NULL && nsn->states == 0 && nsn->expire == 0)
 -		pf_remove_src_node(nsn);
 +	if (nsn != sn && nsn != NULL && nsn->states == 0 && nsn->expire == 0) {
 +		pf_unlink_src_node(nsn);
 +		pf_free_src_node(nsn);
 +	}
  
  	return (PF_DROP);
  }
 
 Modified: stable/10/sys/netpfil/pf/pf_ioctl.c
 ==============================================================================
 --- stable/10/sys/netpfil/pf/pf_ioctl.c	Wed Jan 22 10:18:25 2014	(r261018)
 +++ stable/10/sys/netpfil/pf/pf_ioctl.c	Wed Jan 22 10:29:15 2014	(r261019)
 @@ -151,6 +151,7 @@ struct cdev *pf_dev;
  static void		 pf_clear_states(void);
  static int		 pf_clear_tables(void);
  static void		 pf_clear_srcnodes(struct pf_src_node *);
 +static void		 pf_kill_srcnodes(struct pfioc_src_node_kill *);
  static void		 pf_tbladdr_copyout(struct pf_addr_wrap *);
  
  /*
 @@ -3139,45 +3140,9 @@ DIOCCHANGEADDR_error:
  		break;
  	}
  
 -	case DIOCKILLSRCNODES: {
 -		struct pfioc_src_node_kill *psnk =
 -		    (struct pfioc_src_node_kill *)addr;
 -		struct pf_srchash	*sh;
 -		struct pf_src_node	*sn;
 -		u_int			i, killed = 0;
 -
 -		for (i = 0, sh = V_pf_srchash; i < V_pf_srchashmask;
 -		    i++, sh++) {
 -		    /*
 -		     * XXXGL: we don't ever acquire sources hash lock
 -		     * but if we ever do, the below call to pf_clear_srcnodes()
 -		     * would lead to a LOR.
 -		     */
 -		    PF_HASHROW_LOCK(sh);
 -		    LIST_FOREACH(sn, &sh->nodes, entry)
 -			if (PF_MATCHA(psnk->psnk_src.neg,
 -				&psnk->psnk_src.addr.v.a.addr,
 -				&psnk->psnk_src.addr.v.a.mask,
 -				&sn->addr, sn->af) &&
 -			    PF_MATCHA(psnk->psnk_dst.neg,
 -				&psnk->psnk_dst.addr.v.a.addr,
 -				&psnk->psnk_dst.addr.v.a.mask,
 -				&sn->raddr, sn->af)) {
 -				/* Handle state to src_node linkage */
 -				if (sn->states != 0)
 -					pf_clear_srcnodes(sn);
 -				sn->expire = 1;
 -				killed++;
 -			}
 -		    PF_HASHROW_UNLOCK(sh);
 -		}
 -
 -		if (killed > 0)
 -			pf_purge_expired_src_nodes();
 -
 -		psnk->psnk_killed = killed;
 +	case DIOCKILLSRCNODES:
 +		pf_kill_srcnodes((struct pfioc_src_node_kill *)addr);
  		break;
 -	}
  
  	case DIOCSETHOSTID: {
  		u_int32_t	*hostid = (u_int32_t *)addr;
 @@ -3396,6 +3361,59 @@ pf_clear_srcnodes(struct pf_src_node *n)
  		n->states = 0;
  	}
  }
 +
 +static void
 +pf_kill_srcnodes(struct pfioc_src_node_kill *psnk)
 +{
 +	struct pf_src_node_list	 kill;
 +
 +	LIST_INIT(&kill);
 +	for (int i = 0; i <= V_pf_srchashmask; i++) {
 +		struct pf_srchash *sh = &V_pf_srchash[i];
 +		struct pf_src_node *sn, *tmp;
 +
 +		PF_HASHROW_LOCK(sh);
 +		LIST_FOREACH_SAFE(sn, &sh->nodes, entry, tmp)
 +			if (PF_MATCHA(psnk->psnk_src.neg,
 +			      &psnk->psnk_src.addr.v.a.addr,
 +			      &psnk->psnk_src.addr.v.a.mask,
 +			      &sn->addr, sn->af) &&
 +			    PF_MATCHA(psnk->psnk_dst.neg,
 +			      &psnk->psnk_dst.addr.v.a.addr,
 +			      &psnk->psnk_dst.addr.v.a.mask,
 +			      &sn->raddr, sn->af)) {
 +				pf_unlink_src_node_locked(sn);
 +				LIST_INSERT_HEAD(&kill, sn, entry);
 +				sn->expire = 1;
 +			}
 +		PF_HASHROW_UNLOCK(sh);
 +	}
 +
 +	for (int i = 0; i <= V_pf_hashmask; i++) {
 +		struct pf_idhash *ih = &V_pf_idhash[i];
 +		struct pf_state *s;
 +
 +		PF_HASHROW_LOCK(ih);
 +		LIST_FOREACH(s, &ih->states, entry) {
 +			if (s->src_node && s->src_node->expire == 1) {
 +#ifdef INVARIANTS
 +				s->src_node->states--;
 +#endif
 +				s->src_node = NULL;
 +			}
 +			if (s->nat_src_node && s->nat_src_node->expire == 1) {
 +#ifdef INVARIANTS
 +				s->nat_src_node->states--;
 +#endif
 +				s->nat_src_node = NULL;
 +			}
 +		}
 +		PF_HASHROW_UNLOCK(ih);
 +	}
 +
 +	psnk->psnk_killed = pf_free_src_nodes(&kill);
 +}
 +
  /*
   * XXX - Check for version missmatch!!!
   */
 _______________________________________________
 svn-src-all@freebsd.org mailing list
 http://lists.freebsd.org/mailman/listinfo/svn-src-all
 To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
 
>Unformatted:
