/dev permissions after reboot (and panic)

Matthew Dillon dillon at apollo.backplane.com
Sun Jul 17 00:49:04 PDT 2005


:
:I applied the SOCKBUF_DEBUG patch and rebooted.  After the machine came up,
:I tried a MAKEDEV, sync, waited about a mintue, then rebooted.  I then got
:this panic:
:
:panic: assertion: sb->sb_mb =3D=3D m in sbunlinkmbuf
:mp_lock =3D 00000000; cpuid =3D 0; lapic.id =3D 00000000
:boot() called on cpu#0
:Uptime: 3m22s
:
:Matt, as you know my remote console really sucks, so I wasn't able to
:get everything, but I did get a sucessfuly dump.  That's uploading to
:leaf right now (*.12).
:
:Hopefully it's useful.
:
:--Peter

    Dump looks good.  The sockbuf has clearly been corrupted.  Not only
    that, but the debug code checked that the sockbuf was valid at 
    the beginning of the sbdrop() call and it was fine.

    This is very encouraging!

    That patch set did not entirely solve the blocking problem with
    m_free().  There are still several places where I am calling m_free()
    without resynchronizing the sockbuf state and I broke a record
    delimiter in soreceive().  Here is a new patch to try.  In this patch
    I defer all m_free() calls to the end of the routine to avoid the
    blocking issue.  It also appears that sbdrop() was being called without
    a critical section.  This should not have caused a problem but
    clearly something has raced the sbdrop() call so I'm adding one for
    good measure.

					-Matt


Index: kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.35
diff -u -r1.35 uipc_socket.c
--- kern/uipc_socket.c	15 Jul 2005 17:54:47 -0000	1.35
+++ kern/uipc_socket.c	17 Jul 2005 07:28:57 -0000
@@ -792,18 +792,18 @@
 	struct mbuf **controlp;
 	int *flagsp;
 {
-	struct mbuf *m, **mp;
+	struct mbuf *m, *n, **mp;
+	struct mbuf *free_chain = NULL;
 	int flags, len, error, offset;
 	struct protosw *pr = so->so_proto;
-	struct mbuf *nextrecord;
 	int moff, type = 0;
 	int orig_resid = uio->uio_resid;
 
 	mp = mp0;
 	if (psa)
-		*psa = 0;
+		*psa = NULL;
 	if (controlp)
-		*controlp = 0;
+		*controlp = NULL;
 	if (flagsp)
 		flags = *flagsp &~ MSG_EOR;
 	else
@@ -826,15 +826,15 @@
 		return (error);
 	}
 	if (mp)
-		*mp = (struct mbuf *)0;
+		*mp = NULL;
 	if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
 		so_pru_rcvd(so, 0);
 
 restart:
+	crit_enter();
 	error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
 	if (error)
-		return (error);
-	crit_enter();
+		goto done;
 
 	m = so->so_rcv.sb_mb;
 	/*
@@ -848,12 +848,12 @@
 	 * we have to do the receive in sections, and thus risk returning
 	 * a short count if a timeout or signal occurs after we start.
 	 */
-	if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
+	if (m == NULL || (((flags & MSG_DONTWAIT) == 0 &&
 	    so->so_rcv.sb_cc < uio->uio_resid) &&
 	    (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
 	    ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
 	    m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
-		KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
+		KASSERT(m != NULL || !so->so_rcv.sb_cc, ("receive 1"));
 		if (so->so_error) {
 			if (m)
 				goto dontblock;
@@ -868,11 +868,12 @@
 			else
 				goto release;
 		}
-		for (; m; m = m->m_next)
+		for (; m; m = m->m_next) {
 			if (m->m_type == MT_OOBDATA  || (m->m_flags & M_EOR)) {
 				m = so->so_rcv.sb_mb;
 				goto dontblock;
 			}
+		}
 		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
 		    (pr->pr_flags & PR_CONNREQUIRED)) {
 			error = ENOTCONN;
@@ -886,46 +887,53 @@
 		}
 		sbunlock(&so->so_rcv);
 		error = sbwait(&so->so_rcv);
-		crit_exit();
 		if (error)
-			return (error);
+			goto done;
+		crit_exit();
 		goto restart;
 	}
 dontblock:
 	if (uio->uio_td && uio->uio_td->td_proc)
 		uio->uio_td->td_proc->p_stats->p_ru.ru_msgrcv++;
-	nextrecord = m->m_nextpkt;
+
+	/*
+	 * note: m should be == sb_mb here.  Cache the next record while
+	 * cleaning up.  Note that calling m_free*() will break out critical
+	 * section.
+	 */
+	KKASSERT(m == so->so_rcv.sb_mb);
+
+	/*
+	 * Skip any address mbufs prepending the record.
+	 */
 	if (pr->pr_flags & PR_ADDR) {
 		KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
 		orig_resid = 0;
 		if (psa)
 			*psa = dup_sockaddr(mtod(m, struct sockaddr *));
-		if (flags & MSG_PEEK) {
+		if (flags & MSG_PEEK)
 			m = m->m_next;
-		} else {
-			sbfree(&so->so_rcv, m);
-			m->m_nextpkt = NULL;
-			so->so_rcv.sb_mb = m_free(m);
-			m = so->so_rcv.sb_mb;
-		}
+		else
+			m = sbunlinkmbuf(&so->so_rcv, m, &free_chain);
 	}
+
+	/*
+	 * Skip any control mbufs prepending the record.
+	 */
 #ifdef SCTP
 	if (pr->pr_flags & PR_ADDR_OPT) {
 		/*
 		 * For SCTP we may be getting a
 		 * whole message OR a partial delivery.
 		 */
-		if (m->m_type == MT_SONAME) {
+		if (m && m->m_type == MT_SONAME) {
 			orig_resid = 0;
 			if (psa)
 				*psa = dup_sockaddr(mtod(m, struct sockaddr *));
-			if (flags & MSG_PEEK) {
+			if (flags & MSG_PEEK)
 				m = m->m_next;
-			} else {
-				sbfree(&so->so_rcv, m);
-				so->so_rcv.sb_mb = m_free(m);
-				m = so->so_rcv.sb_mb;
-			}
+			else
+				m = sbunlinkmbuf(&so->so_rcv, m, &free_chain);
 		}
 	}
 #endif /* SCTP */
@@ -933,36 +941,38 @@
 		if (flags & MSG_PEEK) {
 			if (controlp)
 				*controlp = m_copy(m, 0, m->m_len);
-			m = m->m_next;
+			m = m->m_next;	/* XXX race */
 		} else {
-			sbfree(&so->so_rcv, m);
-			m->m_nextpkt = NULL;
 			if (controlp) {
+				n = sbunlinkmbuf(&so->so_rcv, m, NULL);
 				if (pr->pr_domain->dom_externalize &&
 				    mtod(m, struct cmsghdr *)->cmsg_type ==
 				    SCM_RIGHTS)
 				   error = (*pr->pr_domain->dom_externalize)(m);
 				*controlp = m;
-				so->so_rcv.sb_mb = m->m_next;
-				m->m_next = NULL;
-				m = so->so_rcv.sb_mb;
+				m = n;
 			} else {
-				so->so_rcv.sb_mb = m_free(m);
-				m = so->so_rcv.sb_mb;
+				m = sbunlinkmbuf(&so->so_rcv, m, &free_chain);
 			}
 		}
-		if (controlp) {
+		if (controlp && *controlp) {
 			orig_resid = 0;
 			controlp = &(*controlp)->m_next;
 		}
 	}
+
+	/*
+	 * flag OOB data.
+	 */
 	if (m) {
-		if ((flags & MSG_PEEK) == 0)
-			m->m_nextpkt = nextrecord;
 		type = m->m_type;
 		if (type == MT_OOBDATA)
 			flags |= MSG_OOB;
 	}
+
+	/*
+	 * Copy to the UIO or mbuf return chain (*mp).
+	 */
 	moff = 0;
 	offset = 0;
 	while (m && uio->uio_resid > 0 && error == 0) {
@@ -988,14 +998,19 @@
 		 * we must note any additions to the sockbuf when we
 		 * block interrupts again.
 		 */
-		if (mp == 0) {
+		if (mp == NULL) {
 			crit_exit();
 			error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
 			crit_enter();
 			if (error)
 				goto release;
-		} else
+		} else {
 			uio->uio_resid -= len;
+		}
+
+		/*
+		 * Eat the entire mbuf or just a piece of it
+		 */
 		if (len == m->m_len - moff) {
 			if (m->m_flags & M_EOR)
 				flags |= MSG_EOR;
@@ -1007,26 +1022,19 @@
 				m = m->m_next;
 				moff = 0;
 			} else {
-				nextrecord = m->m_nextpkt;
-				m->m_nextpkt = NULL;
-				sbfree(&so->so_rcv, m);
 				if (mp) {
+					n = sbunlinkmbuf(&so->so_rcv, m, NULL);
 					*mp = m;
 					mp = &m->m_next;
-					so->so_rcv.sb_mb = m = m->m_next;
-					*mp = (struct mbuf *)0;
+					m = n;
 				} else {
-					so->so_rcv.sb_mb = m = m_free(m);
+					m = sbunlinkmbuf(&so->so_rcv, m, &free_chain);
 				}
-				if (m)
-					m->m_nextpkt = nextrecord;
-				else
-					so->so_rcv.sb_lastmbuf = NULL;
 			}
 		} else {
-			if (flags & MSG_PEEK)
+			if (flags & MSG_PEEK) {
 				moff += len;
-			else {
+			} else {
 				if (mp)
 					*mp = m_copym(m, 0, len, MB_WAIT);
 				m->m_data += len;
@@ -1056,8 +1064,9 @@
 		 * with a short count but without error.
 		 * Keep sockbuf locked against other readers.
 		 */
-		while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
-		    !sosendallatonce(so) && !nextrecord) {
+		while (flags & MSG_WAITALL && m == NULL && 
+		    uio->uio_resid > 0 && !sosendallatonce(so) && 
+		    so->so_rcv.sb_mb == NULL) {
 			if (so->so_error || so->so_state & SS_CANTRCVMORE)
 				break;
 			/*
@@ -1071,31 +1080,27 @@
 			error = sbwait(&so->so_rcv);
 			if (error) {
 				sbunlock(&so->so_rcv);
-				crit_exit();
-				return (0);
+				error = 0;
+				goto done;
 			}
 			m = so->so_rcv.sb_mb;
-			if (m)
-				nextrecord = m->m_nextpkt;
 		}
 	}
 
+	/*
+	 * If an atomic read was requested but unread data still remains
+	 * in the record, set MSG_TRUNC.
+	 */
 	if (m && pr->pr_flags & PR_ATOMIC)
 		flags |= MSG_TRUNC;
-	if (!(flags & MSG_PEEK)) {
-		if (m == NULL) {
-			so->so_rcv.sb_mb = nextrecord;
-			so->so_rcv.sb_lastmbuf = NULL;
-		} else {
-			if (pr->pr_flags & PR_ATOMIC)
-				sbdroprecord(&so->so_rcv);
-			else if (m->m_nextpkt == NULL) {
-				KASSERT(so->so_rcv.sb_mb == m,
-				    ("sb_mb %p != m %p", so->so_rcv.sb_mb, m));
-				so->so_rcv.sb_lastrecord = m;
-			}
-		}
-		if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
+
+	/*
+	 * Cleanup.  If an atomic read was requested drop any unread data.
+	 */
+	if ((flags & MSG_PEEK) == 0) {
+		if (m && (pr->pr_flags & PR_ATOMIC))
+			sbdroprecord(&so->so_rcv);
+		if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb)
 			so_pru_rcvd(so, flags);
 	}
 
@@ -1110,7 +1115,10 @@
 		*flagsp |= flags;
 release:
 	sbunlock(&so->so_rcv);
+done:
 	crit_exit();
+	if (free_chain)
+		m_freem(free_chain);
 	return (error);
 }
 
Index: kern/uipc_socket2.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
retrieving revision 1.21
diff -u -r1.21 uipc_socket2.c
--- kern/uipc_socket2.c	7 Jun 2005 19:08:55 -0000	1.21
+++ kern/uipc_socket2.c	17 Jul 2005 07:45:20 -0000
@@ -479,22 +479,21 @@
 {
 	struct mbuf *n;
 
-	if (m == NULL)
-		return;
-	n = sb->sb_mb;
-	if (n) {
-		while (n->m_nextpkt)
-			n = n->m_nextpkt;
-		do {
-			if (n->m_flags & M_EOR) {
-				sbappendrecord(sb, m); /* XXXXXX!!!! */
-				return;
-			}
-		} while (n->m_next && (n = n->m_next));
+	if (m) {
+		n = sb->sb_mb;
+		if (n) {
+			while (n->m_nextpkt)
+				n = n->m_nextpkt;
+			do {
+				if (n->m_flags & M_EOR) {
+					/* XXXXXX!!!! */
+					sbappendrecord(sb, m);
+					return;
+				}
+			} while (n->m_next && (n = n->m_next));
+		}
+		sbcompress(sb, m, n);
 	}
-	sbcompress(sb, m, n);
-	if (n == NULL)
-		sb->sb_lastrecord = sb->sb_mb;
 }
 
 /*
@@ -511,29 +510,53 @@
 }
 
 #ifdef SOCKBUF_DEBUG
+
 void
-sbcheck(sb)
-	struct sockbuf *sb;
+_sbcheck(struct sockbuf *sb)
 {
 	struct mbuf *m;
-	struct mbuf *n = 0;
+	struct mbuf *n = NULL;
 	u_long len = 0, mbcnt = 0;
 
 	for (m = sb->sb_mb; m; m = n) {
 	    n = m->m_nextpkt;
+	    if (n == NULL && sb->sb_lastrecord != m) {
+		    printf("sockbuf %p mismatched lastrecord %p vs %p\n", sb, sb->sb_lastrecord, m);
+		    panic("sbcheck1");
+		
+	    }
 	    for (; m; m = m->m_next) {
 		len += m->m_len;
 		mbcnt += MSIZE;
 		if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
 			mbcnt += m->m_ext.ext_size;
+		if (n == NULL && m->m_next == NULL) {
+			if (sb->sb_lastmbuf != m) {
+				printf("sockbuf %p mismatched lastmbuf %p vs %p\n", sb, sb->sb_lastmbuf, m);
+				panic("sbcheck2");
+			}
+		}
+	    }
+	}
+	if (sb->sb_mb == NULL) {
+	    if (sb->sb_lastrecord != NULL) {
+		printf("sockbuf %p is empty, lastrecord not NULL: %p\n",
+			sb, sb->sb_lastrecord);
+		panic("sbcheck3");
+	    }
+	    if (sb->sb_lastmbuf != NULL) {
+		printf("sockbuf %p is empty, lastmbuf not NULL: %p\n",
+			sb, sb->sb_lastmbuf);
+		panic("sbcheck4");
 	    }
 	}
 	if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
-		printf("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
-		    mbcnt, sb->sb_mbcnt);
-		panic("sbcheck");
+		printf("sockbuf %p cc %ld != %ld || mbcnt %ld != %ld\n",
+		    sb, len, sb->sb_cc, mbcnt, sb->sb_mbcnt);
+		panic("sbcheck5");
 	}
 }
+
 #endif
 
 /*
@@ -548,6 +571,8 @@
 	if (m0 == NULL)
 		return;
 
+	sbcheck(sb);
+
 	/*
 	 * Break the first mbuf off from the rest of the mbuf chain.
 	 */
@@ -557,13 +582,15 @@
 
 	/*
 	 * Insert the first mbuf of the m0 mbuf chain as the last record of
-	 * the sockbuf.  Note this permits zero length records!
+	 * the sockbuf.  Note this permits zero length records!  Keep the
+	 * sockbuf state consistent.
 	 */
 	if (sb->sb_mb == NULL)
 		sb->sb_mb = firstmbuf;
 	else
 		sb->sb_lastrecord->m_nextpkt = firstmbuf;
 	sb->sb_lastrecord = firstmbuf;	/* update hint for new last record */
+	sb->sb_lastmbuf = firstmbuf;	/* update hint for new last mbuf */
 
 	if ((firstmbuf->m_flags & M_EOR) && (secondmbuf != NULL)) {
 		/* propagate the EOR flag */
@@ -581,6 +608,7 @@
 	sbcompress(sb, secondmbuf, firstmbuf);
 }
 
+#if 0
 /*
  * As above except that OOB data is inserted at the beginning of the sockbuf,
  * but after any other OOB data.
@@ -591,7 +619,7 @@
 	struct mbuf *m;
 	struct mbuf **mp;
 
-	if (m0 == 0)
+	if (m0 == NULL)
 		return;
 	for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) {
 	    m = *mp;
@@ -619,13 +647,14 @@
 		sb->sb_lastrecord = m0;
 
 	m = m0->m_next;
-	m0->m_next = 0;
+	m0->m_next = NULL;
 	if (m && (m0->m_flags & M_EOR)) {
 		m0->m_flags &= ~M_EOR;
 		m->m_flags |= M_EOR;
 	}
 	sbcompress(sb, m, m0);
 }
+#endif
 
 /*
  * Append address and data, and optionally, control (ancillary) data
@@ -644,6 +673,7 @@
 
 	if (m0 && (m0->m_flags & M_PKTHDR) == 0)
 		panic("sbappendaddr");
+	sbcheck(sb);
 
 	if (m0)
 		space += m0->m_pkthdr.len;
@@ -657,8 +687,9 @@
 	if (asa->sa_len > MLEN)
 		return (0);
 	MGET(m, MB_DONTWAIT, MT_SONAME);
-	if (m == 0)
+	if (m == NULL)
 		return (0);
+	KKASSERT(m->m_nextpkt == NULL);
 	m->m_len = asa->sa_len;
 	bcopy(asa, mtod(m, caddr_t), asa->sa_len);
 	if (n)
@@ -674,6 +705,9 @@
 	else
 		sb->sb_lastrecord->m_nextpkt = m;
 	sb->sb_lastrecord = m;
+	while (m->m_next)
+		m = m->m_next;
+	sb->sb_lastmbuf = m;
 
 	return (1);
 }
@@ -689,6 +723,8 @@
 	u_int length, cmbcnt, m0mbcnt;
 
 	KASSERT(control != NULL, ("sbappendcontrol"));
+	KKASSERT(control->m_nextpkt == NULL);
+	sbcheck(sb);
 
 	length = m_countm(control, &n, &cmbcnt) + m_countm(m0, NULL, &m0mbcnt);
 	if (length > sbspace(sb))
@@ -701,6 +737,7 @@
 	else
 		sb->sb_lastrecord->m_nextpkt = control;
 	sb->sb_lastrecord = control;
+	sb->sb_lastmbuf = m0;
 
 	sb->sb_cc += length;
 	sb->sb_mbcnt += cmbcnt + m0mbcnt;
@@ -717,7 +754,9 @@
 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *tailm)
 {
 	int eor = 0;
+	struct mbuf *free_chain = NULL;
 
+	sbcheck(sb);
 	while (m) {
 		struct mbuf *o;
 
@@ -726,12 +765,18 @@
 		 * Disregard empty mbufs as long as we don't encounter
 		 * an end-of-record or there is a trailing mbuf of
 		 * the same type to propagate the EOR flag to.
+		 *
+		 * Defer the m_free() call because it can block and break
+		 * the atomicy of the sockbuf.
 		 */
 		if (m->m_len == 0 &&
 		    (eor == 0 ||
 		     (((o = m->m_next) || (o = tailm)) &&
 		      o->m_type == m->m_type))) {
-			m = m_free(m);
+			o = m->m_next;
+			m->m_next = free_chain;
+			free_chain = m;
+			m = o;
 			continue;
 		}
 
@@ -745,7 +790,10 @@
 			      (unsigned)m->m_len);
 			tailm->m_len += m->m_len;
 			sb->sb_cc += m->m_len;		/* update sb counter */
-			m = m_free(m);
+			o = m->m_next;
+			m->m_next = free_chain;
+			free_chain = m;
+			m = o;
 			continue;
 		}
 
@@ -753,7 +801,8 @@
 		if (tailm == NULL) {
 			KASSERT(sb->sb_mb == NULL,
 				("sbcompress: sb_mb not NULL"));
-			sb->sb_mb = m;		/* put at front of sockbuf */
+			sb->sb_mb = m;		/* only mbuf in sockbuf */
+			sb->sb_lastrecord = m;	/* new last record */
 		} else {
 			tailm->m_next = m;	/* tack m on following tailm */
 		}
@@ -770,12 +819,23 @@
 		tailm->m_flags &= ~M_EOR;
 	}
 
+	/*
+	 * Propogate EOR to the last mbuf
+	 */
 	if (eor) {
 		if (tailm)
-			tailm->m_flags |= eor;	/* propagate EOR to last mbuf */
+			tailm->m_flags |= eor;
 		else
 			printf("semi-panic: sbcompress");
 	}
+
+	/*
+	 * Clean up any defered frees.
+	 */
+	while (free_chain)
+		free_chain = m_free(free_chain);
+
+	sbcheck(sb);
 }
 
 /*
@@ -812,19 +872,16 @@
 	int len;
 {
 	struct mbuf *m;
-	struct mbuf *nextpkt;
+	struct mbuf *free_chain = NULL;
+
+	sbcheck(sb);
+	crit_enter();
 
+	/*
+	 * Remove mbufs from multiple records until the count is exhausted.
+	 */
 	m = sb->sb_mb;
-	nextpkt = (m != NULL) ? m->m_nextpkt : NULL;
-	while (len > 0) {
-		if (m == NULL) {
-			if (nextpkt == NULL)
-				panic("sbdrop");
-			m = nextpkt;
-			nextpkt = m->m_nextpkt;
-			m->m_nextpkt = NULL;
-			continue;
-		}
+	while (m && len > 0) {
 		if (m->m_len > len) {
 			m->m_len -= len;
 			m->m_data += len;
@@ -832,41 +889,94 @@
 			break;
 		}
 		len -= m->m_len;
-		sbfree(sb, m);
-		m = m_free(m);
+		m = sbunlinkmbuf(sb, m, &free_chain);
+		if (m == NULL && len)
+			m = sb->sb_mb;
 	}
+
+	/*
+	 * Remove any trailing 0-length mbufs in the current record.  If
+	 * the last record for which data was removed is now empty, m will be
+	 * NULL.
+	 */
 	while (m && m->m_len == 0) {
-		sbfree(sb, m);
-		m = m_free(m);
-	}
-	if (m != NULL) {
-		sb->sb_mb = m;
-		m->m_nextpkt = nextpkt;
-	} else {
-		sb->sb_mb = nextpkt;
-		sb->sb_lastmbuf = NULL;		/* invalidate hint */
+		m = sbunlinkmbuf(sb, m, &free_chain);
 	}
+	crit_exit();
+	if (free_chain)
+		m_freem(free_chain);
+	sbcheck(sb);
 }
 
 /*
- * Drop a record off the front of a sockbuf
- * and move the next record to the front.
+ * Drop a record off the front of a sockbuf and move the next record
+ * to the front.
+ *
+ * Must be called while holding a critical section.
  */
 void
 sbdroprecord(sb)
 	struct sockbuf *sb;
 {
 	struct mbuf *m;
+	struct mbuf *n;
 
+	sbcheck(sb);
 	m = sb->sb_mb;
 	if (m) {
-		sb->sb_mb = m->m_nextpkt;
+		if ((sb->sb_mb = m->m_nextpkt) == NULL) {
+			sb->sb_lastrecord = NULL;
+			sb->sb_lastmbuf = NULL;
+		}
 		m->m_nextpkt = NULL;
-		do {
-			sbfree(sb, m);
-			m = m_free(m);
-		} while (m);
+		for (n = m; n; n = n->m_next)
+			sbfree(sb, n);
+		m_freem(m);
+		sbcheck(sb);
+	}
+}
+
+/*
+ * Drop the first mbuf off the sockbuf and move the next mbuf to the front.
+ * Currently only the head mbuf of the sockbuf may be dropped this way.
+ *
+ * The next mbuf in the same record as the mbuf being removed is returned
+ * or NULL if the record is exhausted.  Note that other records may remain
+ * in the sockbuf when NULL is returned.
+ *
+ * Must be called while holding a critical section.
+ */
+struct mbuf *
+sbunlinkmbuf(struct sockbuf *sb, struct mbuf *m, struct mbuf **free_chain)
+{
+	struct mbuf *n;
+
+	KKASSERT(sb->sb_mb == m);
+	sbfree(sb, m);
+	n = m->m_next;
+	if (n) {
+		sb->sb_mb = n;
+		if (sb->sb_lastrecord == m)
+			sb->sb_lastrecord = n;
+		KKASSERT(sb->sb_lastmbuf != m);
+		n->m_nextpkt = m->m_nextpkt;
+	} else {
+		sb->sb_mb = m->m_nextpkt;
+		if (sb->sb_lastrecord == m) {
+			KKASSERT(sb->sb_mb == NULL);
+			sb->sb_lastrecord = NULL;
+		}
+		if (sb->sb_mb == NULL)
+			sb->sb_lastmbuf = NULL;
+	}
+	m->m_nextpkt = NULL;
+	if (free_chain) {
+		m->m_next = *free_chain;
+		*free_chain = m;
+	} else {
+		m->m_next = NULL;
 	}
+	return(n);
 }
 
 /*
Index: sys/socketvar.h
===================================================================
RCS file: /cvs/src/sys/sys/socketvar.h,v
retrieving revision 1.19
diff -u -r1.19 socketvar.h
--- sys/socketvar.h	13 Jul 2005 01:38:53 -0000	1.19
+++ sys/socketvar.h	17 Jul 2005 07:21:24 -0000
@@ -180,6 +180,12 @@
  * Macros for sockets and socket buffering.
  */
 
+#ifdef SOCKBUF_DEBUG
+#define sbcheck(sb)	_sbcheck(sb)
+#else
+#define sbcheck(sb)
+#endif
+
 /*
  * Do we need to notify the other side when I/O is possible?
  */
@@ -337,12 +343,14 @@
 	    struct mbuf *control);
 void	sbappendrecord (struct sockbuf *sb, struct mbuf *m0);
 void	sbappendstream (struct sockbuf *sb, struct mbuf *m);
-void	sbcheck (struct sockbuf *sb);
+void	_sbcheck (struct sockbuf *sb);
 void	sbcompress (struct sockbuf *sb, struct mbuf *m, struct mbuf *n);
 struct mbuf *
 	sbcreatecontrol (caddr_t p, int size, int type, int level);
 void	sbdrop (struct sockbuf *sb, int len);
 void	sbdroprecord (struct sockbuf *sb);
+struct mbuf *
+	sbunlinkmbuf (struct sockbuf *, struct mbuf *, struct mbuf **);
 void	sbflush (struct sockbuf *sb);
 void	sbinsertoob (struct sockbuf *sb, struct mbuf *m0);
 void	sbrelease (struct sockbuf *sb, struct socket *so);





More information about the Bugs mailing list