panic in pf_purge_expired_states

Matthew Dillon dillon at apollo.backplane.com
Tue Jul 4 10:21:28 PDT 2006


:Max Laier wrote:
:> On Sunday 02 July 2006 17:17, Simon 'corecode' Schubert wrote:
:>> about every month i'm getting a panic in pf_purge_expired_states on some
:>> RB_* function.
:> Can you show me a trace instead?
:
:sure (panics not always here):
:
:#10 0xc578bc7f in pf_state_tree_lan_ext_RB_REMOVE (head=0xc56da9c0, 
:elm=0xc593a900)
:     at /usr/src/sys/net/pf/pf.c:272
:#11 0xc578df19 in pf_purge_expired_states () at /usr/src/sys/net/pf/pf.c:814
:#12 0xc578db91 in pf_purge_timeout (arg=0xc57aeaa8) at 
:...

    Well, I see two problems immediately.  It is calling pf_send_tcp()
    before removing the item from the tree, which can trivally block
    and break the critical section, and it is also calculating the 'next'
    RB entry in the loop as well.  Well, if pf_send_tcp() blocks then
    the next RB entry can easily be ripped out from under the scan.

    The solution is to (1) use DragonFly's RB_SCAN function, which safely
    handles any ripouts that occur during the loop as long as the loop
    is interlocked (which it is with a critical section), and (2) I'm
    guessing we have to remove the entry from the various trees *BEFORE*
    we call pf_send_tcp().

    I am absolutely *NOT* an expert on PF, and I could only compile-test
    this patch, not actually try it, so this patch might not work, or it
    might work and not actually solve the problem.

						-Matt


Index: pf.c
===================================================================
RCS file: /cvs/src/sys/net/pf/pf.c,v
retrieving revision 1.8
diff -u -r1.8 pf.c
--- pf.c	11 Dec 2005 13:00:17 -0000	1.8
+++ pf.c	4 Jul 2006 17:14:59 -0000
@@ -793,51 +793,54 @@
 	s->src_node = s->nat_src_node = NULL;
 }
 
-void
-pf_purge_expired_states(void)
+static int
+pf_purge_expired_states_callback(struct pf_state *cur, void *data __unused)
 {
-	struct pf_state		*cur, *next;
-
-	for (cur = RB_MIN(pf_state_tree_id, &tree_id);
-	    cur; cur = next) {
-		next = RB_NEXT(pf_state_tree_id, &tree_id, cur);
-
-		if (pf_state_expires(cur) <= time_second) {
-			if (cur->src.state == PF_TCPS_PROXY_DST)
-				pf_send_tcp(cur->rule.ptr, cur->af,
-				    &cur->ext.addr, &cur->lan.addr,
-				    cur->ext.port, cur->lan.port,
-				    cur->src.seqhi, cur->src.seqlo + 1, 0,
-				    TH_RST|TH_ACK, 0, 0);
-			RB_REMOVE(pf_state_tree_ext_gwy,
-			    &cur->u.s.kif->pfik_ext_gwy, cur);
-			RB_REMOVE(pf_state_tree_lan_ext,
-			    &cur->u.s.kif->pfik_lan_ext, cur);
-			RB_REMOVE(pf_state_tree_id, &tree_id, cur);
+	if (pf_state_expires(cur) <= time_second) {
+		RB_REMOVE(pf_state_tree_ext_gwy,
+		    &cur->u.s.kif->pfik_ext_gwy, cur);
+		RB_REMOVE(pf_state_tree_lan_ext,
+		    &cur->u.s.kif->pfik_lan_ext, cur);
+		RB_REMOVE(pf_state_tree_id, &tree_id, cur);
+		if (cur->src.state == PF_TCPS_PROXY_DST) {
+			pf_send_tcp(cur->rule.ptr, cur->af,
+			    &cur->ext.addr, &cur->lan.addr,
+			    cur->ext.port, cur->lan.port,
+			    cur->src.seqhi, cur->src.seqlo + 1, 0,
+			    TH_RST|TH_ACK, 0, 0);
+		}
 #if NPFSYNC
-			pfsync_delete_state(cur);
+		pfsync_delete_state(cur);
 #endif
-			pf_src_tree_remove_state(cur);
-			if (--cur->rule.ptr->states <= 0 &&
-			    cur->rule.ptr->src_nodes <= 0)
-				pf_rm_rule(NULL, cur->rule.ptr);
-			if (cur->nat_rule.ptr != NULL)
-				if (--cur->nat_rule.ptr->states <= 0 &&
-					cur->nat_rule.ptr->src_nodes <= 0)
-					pf_rm_rule(NULL, cur->nat_rule.ptr);
-			if (cur->anchor.ptr != NULL)
-				if (--cur->anchor.ptr->states <= 0)
-					pf_rm_rule(NULL, cur->anchor.ptr);
-			pf_normalize_tcp_cleanup(cur);
-			pfi_detach_state(cur->u.s.kif);
-			TAILQ_REMOVE(&state_updates, cur, u.s.entry_updates);
-			pool_put(&pf_state_pl, cur);
-			pf_status.fcounters[FCNT_STATE_REMOVALS]++;
-			pf_status.states--;
-		}
+		pf_src_tree_remove_state(cur);
+		if (--cur->rule.ptr->states <= 0 &&
+		    cur->rule.ptr->src_nodes <= 0)
+			pf_rm_rule(NULL, cur->rule.ptr);
+		if (cur->nat_rule.ptr != NULL)
+			if (--cur->nat_rule.ptr->states <= 0 &&
+				cur->nat_rule.ptr->src_nodes <= 0)
+				pf_rm_rule(NULL, cur->nat_rule.ptr);
+		if (cur->anchor.ptr != NULL)
+			if (--cur->anchor.ptr->states <= 0)
+				pf_rm_rule(NULL, cur->anchor.ptr);
+		pf_normalize_tcp_cleanup(cur);
+		pfi_detach_state(cur->u.s.kif);
+		TAILQ_REMOVE(&state_updates, cur, u.s.entry_updates);
+		pool_put(&pf_state_pl, cur);
+		pf_status.fcounters[FCNT_STATE_REMOVALS]++;
+		pf_status.states--;
 	}
+	return(0);
 }
 
+void
+pf_purge_expired_states(void)
+{
+	RB_SCAN(pf_state_tree_id, &tree_id, NULL,
+		pf_purge_expired_states_callback, NULL);
+}
+
+
 int
 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
 {
Index: pf.c
===================================================================
RCS file: /cvs/src/sys/net/pf/pf.c,v
retrieving revision 1.8
diff -u -r1.8 pf.c
--- pf.c	11 Dec 2005 13:00:17 -0000	1.8
+++ pf.c	4 Jul 2006 17:14:59 -0000
@@ -793,51 +793,54 @@
 	s->src_node = s->nat_src_node = NULL;
 }
 
-void
-pf_purge_expired_states(void)
+static int
+pf_purge_expired_states_callback(struct pf_state *cur, void *data __unused)
 {
-	struct pf_state		*cur, *next;
-
-	for (cur = RB_MIN(pf_state_tree_id, &tree_id);
-	    cur; cur = next) {
-		next = RB_NEXT(pf_state_tree_id, &tree_id, cur);
-
-		if (pf_state_expires(cur) <= time_second) {
-			if (cur->src.state == PF_TCPS_PROXY_DST)
-				pf_send_tcp(cur->rule.ptr, cur->af,
-				    &cur->ext.addr, &cur->lan.addr,
-				    cur->ext.port, cur->lan.port,
-				    cur->src.seqhi, cur->src.seqlo + 1, 0,
-				    TH_RST|TH_ACK, 0, 0);
-			RB_REMOVE(pf_state_tree_ext_gwy,
-			    &cur->u.s.kif->pfik_ext_gwy, cur);
-			RB_REMOVE(pf_state_tree_lan_ext,
-			    &cur->u.s.kif->pfik_lan_ext, cur);
-			RB_REMOVE(pf_state_tree_id, &tree_id, cur);
+	if (pf_state_expires(cur) <= time_second) {
+		RB_REMOVE(pf_state_tree_ext_gwy,
+		    &cur->u.s.kif->pfik_ext_gwy, cur);
+		RB_REMOVE(pf_state_tree_lan_ext,
+		    &cur->u.s.kif->pfik_lan_ext, cur);
+		RB_REMOVE(pf_state_tree_id, &tree_id, cur);
+		if (cur->src.state == PF_TCPS_PROXY_DST) {
+			pf_send_tcp(cur->rule.ptr, cur->af,
+			    &cur->ext.addr, &cur->lan.addr,
+			    cur->ext.port, cur->lan.port,
+			    cur->src.seqhi, cur->src.seqlo + 1, 0,
+			    TH_RST|TH_ACK, 0, 0);
+		}
 #if NPFSYNC
-			pfsync_delete_state(cur);
+		pfsync_delete_state(cur);
 #endif
-			pf_src_tree_remove_state(cur);
-			if (--cur->rule.ptr->states <= 0 &&
-			    cur->rule.ptr->src_nodes <= 0)
-				pf_rm_rule(NULL, cur->rule.ptr);
-			if (cur->nat_rule.ptr != NULL)
-				if (--cur->nat_rule.ptr->states <= 0 &&
-					cur->nat_rule.ptr->src_nodes <= 0)
-					pf_rm_rule(NULL, cur->nat_rule.ptr);
-			if (cur->anchor.ptr != NULL)
-				if (--cur->anchor.ptr->states <= 0)
-					pf_rm_rule(NULL, cur->anchor.ptr);
-			pf_normalize_tcp_cleanup(cur);
-			pfi_detach_state(cur->u.s.kif);
-			TAILQ_REMOVE(&state_updates, cur, u.s.entry_updates);
-			pool_put(&pf_state_pl, cur);
-			pf_status.fcounters[FCNT_STATE_REMOVALS]++;
-			pf_status.states--;
-		}
+		pf_src_tree_remove_state(cur);
+		if (--cur->rule.ptr->states <= 0 &&
+		    cur->rule.ptr->src_nodes <= 0)
+			pf_rm_rule(NULL, cur->rule.ptr);
+		if (cur->nat_rule.ptr != NULL)
+			if (--cur->nat_rule.ptr->states <= 0 &&
+				cur->nat_rule.ptr->src_nodes <= 0)
+				pf_rm_rule(NULL, cur->nat_rule.ptr);
+		if (cur->anchor.ptr != NULL)
+			if (--cur->anchor.ptr->states <= 0)
+				pf_rm_rule(NULL, cur->anchor.ptr);
+		pf_normalize_tcp_cleanup(cur);
+		pfi_detach_state(cur->u.s.kif);
+		TAILQ_REMOVE(&state_updates, cur, u.s.entry_updates);
+		pool_put(&pf_state_pl, cur);
+		pf_status.fcounters[FCNT_STATE_REMOVALS]++;
+		pf_status.states--;
 	}
+	return(0);
 }
 
+void
+pf_purge_expired_states(void)
+{
+	RB_SCAN(pf_state_tree_id, &tree_id, NULL,
+		pf_purge_expired_states_callback, NULL);
+}
+
+
 int
 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
 {





More information about the Bugs mailing list