netif/dc issues
Matthew Dillon
dillon at apollo.backplane.com
Sat Jul 16 10:18:24 PDT 2005
:On -Preview and HEAD
:my Davicom 9102 10/100 cards
:are choking out these messages
:
:...
:mfree: m->m_nextpkt != NULL
:Trace beginning at frame (0xdeadbeef)
:m_free (..) at m_free+0x5e
:m_free (..) at m_free+0x5e
:m_freem (..) at m_freem+0x2e
:dc_start (..) at dc_start+0xd4
:dc_tick (...) at dc_tick+0x148
:softclock_handler (...) at softclock_handler+0xe2
:lwkt_exit() lwkt_exit
:...
:If i get that while on my 2-port KVM switch it locks up the keyboard
:which then needs a hard reset.
:
:In hopes of fixing it, I brought in changes from freebsd (see submit@)
:but still no luck with the updates
:
:The card works fine on NetBSD, OpenBSD, QNX, Linux
:
:--ed
:Eduardo Tongson
I have a tentative patch set that fixes an interlock/blocking issue
with sockbufs. I do not know if this will help your situation but
it's worth trying.
Note that YONETANI Tomokazu reports he gets a panic when he shutsdown
a system with this patch and the debugging enabled, so the patch may
not yet solve all the issues. I expect the patch will go through a
few more revisions before it goes into the tree.
The main purpose of the patch is to fix issues that occur on SMP systems.
The m_nextpkt field is known to not always be properly cleared, but
it's odd that it would occur in the dc_start path.
-Matt
Matthew Dillon
<dillon at xxxxxxxxxxxxx>
Index: conf/options
===================================================================
RCS file: /cvs/src/sys/conf/options,v
retrieving revision 1.34
diff -u -r1.34 options
--- conf/options 15 Jul 2005 17:54:47 -0000 1.34
+++ conf/options 16 Jul 2005 16:46:00 -0000
@@ -413,6 +413,7 @@
SIMPLELOCK_DEBUG opt_global.h
VFS_BIO_DEBUG opt_global.h
DEBUG_INTERRUPTS opt_global.h
+SOCKBUF_DEBUG opt_global.h
# These are VM related options
VM_KMEM_SIZE opt_vm.h
Index: kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.35
diff -u -r1.35 uipc_socket.c
--- kern/uipc_socket.c 15 Jul 2005 17:54:47 -0000 1.35
+++ kern/uipc_socket.c 16 Jul 2005 16:48:49 -0000
@@ -792,10 +792,9 @@
struct mbuf **controlp;
int *flagsp;
{
- struct mbuf *m, **mp;
+ struct mbuf *m, *n, **mp;
int flags, len, error, offset;
struct protosw *pr = so->so_proto;
- struct mbuf *nextrecord;
int moff, type = 0;
int orig_resid = uio->uio_resid;
@@ -848,12 +847,12 @@
* we have to do the receive in sections, and thus risk returning
* a short count if a timeout or signal occurs after we start.
*/
- if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
+ if (m == NULL || (((flags & MSG_DONTWAIT) == 0 &&
so->so_rcv.sb_cc < uio->uio_resid) &&
(so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
- KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
+ KASSERT(m != NULL || !so->so_rcv.sb_cc, ("receive 1"));
if (so->so_error) {
if (m)
goto dontblock;
@@ -868,11 +867,12 @@
else
goto release;
}
- for (; m; m = m->m_next)
+ for (; m; m = m->m_next) {
if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
m = so->so_rcv.sb_mb;
goto dontblock;
}
+ }
if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
(pr->pr_flags & PR_CONNREQUIRED)) {
error = ENOTCONN;
@@ -894,7 +894,17 @@
dontblock:
if (uio->uio_td && uio->uio_td->td_proc)
uio->uio_td->td_proc->p_stats->p_ru.ru_msgrcv++;
- nextrecord = m->m_nextpkt;
+
+ /*
+ * note: m should be == sb_mb here. Cache the next record while
+ * cleaning up. Note that calling m_free*() will break out critical
+ * section.
+ */
+ KKASSERT(m == so->so_rcv.sb_mb);
+
+ /*
+ * Skip any address mbufs prepending the record.
+ */
if (pr->pr_flags & PR_ADDR) {
KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
orig_resid = 0;
@@ -903,12 +913,15 @@
if (flags & MSG_PEEK) {
m = m->m_next;
} else {
- sbfree(&so->so_rcv, m);
- m->m_nextpkt = NULL;
- so->so_rcv.sb_mb = m_free(m);
- m = so->so_rcv.sb_mb;
+ n = sbunlinkmbuf(&so->so_rcv, m);
+ m_free(m);
+ m = n;
}
}
+
+ /*
+ * Skip any control mbufs prepending the record.
+ */
#ifdef SCTP
if (pr->pr_flags & PR_ADDR_OPT) {
/*
@@ -922,9 +935,9 @@
if (flags & MSG_PEEK) {
m = m->m_next;
} else {
- sbfree(&so->so_rcv, m);
- so->so_rcv.sb_mb = m_free(m);
- m = so->so_rcv.sb_mb;
+ n = sbunlinkmbuf(&so->so_rcv, m);
+ m_free(m);
+ m = n;
}
}
}
@@ -935,34 +948,36 @@
*controlp = m_copy(m, 0, m->m_len);
m = m->m_next;
} else {
- sbfree(&so->so_rcv, m);
- m->m_nextpkt = NULL;
+ n = sbunlinkmbuf(&so->so_rcv, m);
if (controlp) {
if (pr->pr_domain->dom_externalize &&
mtod(m, struct cmsghdr *)->cmsg_type ==
SCM_RIGHTS)
error = (*pr->pr_domain->dom_externalize)(m);
*controlp = m;
- so->so_rcv.sb_mb = m->m_next;
- m->m_next = NULL;
- m = so->so_rcv.sb_mb;
} else {
- so->so_rcv.sb_mb = m_free(m);
- m = so->so_rcv.sb_mb;
+ m_free(m);
}
+ m = n;
}
if (controlp) {
orig_resid = 0;
controlp = &(*controlp)->m_next;
}
}
+
+ /*
+ * flag OOB data.
+ */
if (m) {
- if ((flags & MSG_PEEK) == 0)
- m->m_nextpkt = nextrecord;
type = m->m_type;
if (type == MT_OOBDATA)
flags |= MSG_OOB;
}
+
+ /*
+ * Copy to the UIO or mbuf return chain (*mp).
+ */
moff = 0;
offset = 0;
while (m && uio->uio_resid > 0 && error == 0) {
@@ -988,14 +1003,19 @@
* we must note any additions to the sockbuf when we
* block interrupts again.
*/
- if (mp == 0) {
+ if (mp == NULL) {
crit_exit();
error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
crit_enter();
if (error)
goto release;
- } else
+ } else {
uio->uio_resid -= len;
+ }
+
+ /*
+ * Eat the entire mbuf or just a piece of it
+ */
if (len == m->m_len - moff) {
if (m->m_flags & M_EOR)
flags |= MSG_EOR;
@@ -1007,26 +1027,19 @@
m = m->m_next;
moff = 0;
} else {
- nextrecord = m->m_nextpkt;
- m->m_nextpkt = NULL;
- sbfree(&so->so_rcv, m);
+ n = sbunlinkmbuf(&so->so_rcv, m);
if (mp) {
*mp = m;
mp = &m->m_next;
- so->so_rcv.sb_mb = m = m->m_next;
- *mp = (struct mbuf *)0;
} else {
- so->so_rcv.sb_mb = m = m_free(m);
+ m_free(m);
}
- if (m)
- m->m_nextpkt = nextrecord;
- else
- so->so_rcv.sb_lastmbuf = NULL;
+ m = n;
}
} else {
- if (flags & MSG_PEEK)
+ if (flags & MSG_PEEK) {
moff += len;
- else {
+ } else {
if (mp)
*mp = m_copym(m, 0, len, MB_WAIT);
m->m_data += len;
@@ -1056,8 +1069,9 @@
* with a short count but without error.
* Keep sockbuf locked against other readers.
*/
- while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
- !sosendallatonce(so) && !nextrecord) {
+ while (flags & MSG_WAITALL && m == NULL &&
+ uio->uio_resid > 0 && !sosendallatonce(so) &&
+ so->so_rcv.sb_mb == NULL) {
if (so->so_error || so->so_state & SS_CANTRCVMORE)
break;
/*
@@ -1075,27 +1089,23 @@
return (0);
}
m = so->so_rcv.sb_mb;
- if (m)
- nextrecord = m->m_nextpkt;
}
}
+ /*
+ * If an atomic read was requested but unread data still remains
+ * in the record, set MSG_TRUNC.
+ */
if (m && pr->pr_flags & PR_ATOMIC)
flags |= MSG_TRUNC;
- if (!(flags & MSG_PEEK)) {
- if (m == NULL) {
- so->so_rcv.sb_mb = nextrecord;
- so->so_rcv.sb_lastmbuf = NULL;
- } else {
- if (pr->pr_flags & PR_ATOMIC)
- sbdroprecord(&so->so_rcv);
- else if (m->m_nextpkt == NULL) {
- KASSERT(so->so_rcv.sb_mb == m,
- ("sb_mb %p != m %p", so->so_rcv.sb_mb, m));
- so->so_rcv.sb_lastrecord = m;
- }
- }
- if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
+
+ /*
+ * Cleanup. If an atomic read was requested drop any unread data.
+ */
+ if ((flags & MSG_PEEK) == 0) {
+ if (m && (pr->pr_flags & PR_ATOMIC))
+ sbdroprecord(&so->so_rcv);
+ if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb)
so_pru_rcvd(so, flags);
}
Index: kern/uipc_socket2.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
retrieving revision 1.21
diff -u -r1.21 uipc_socket2.c
--- kern/uipc_socket2.c 7 Jun 2005 19:08:55 -0000 1.21
+++ kern/uipc_socket2.c 14 Jul 2005 17:08:01 -0000
@@ -479,22 +479,21 @@
{
struct mbuf *n;
- if (m == NULL)
- return;
- n = sb->sb_mb;
- if (n) {
- while (n->m_nextpkt)
- n = n->m_nextpkt;
- do {
- if (n->m_flags & M_EOR) {
- sbappendrecord(sb, m); /* XXXXXX!!!! */
- return;
- }
- } while (n->m_next && (n = n->m_next));
+ if (m) {
+ n = sb->sb_mb;
+ if (n) {
+ while (n->m_nextpkt)
+ n = n->m_nextpkt;
+ do {
+ if (n->m_flags & M_EOR) {
+ /* XXXXXX!!!! */
+ sbappendrecord(sb, m);
+ return;
+ }
+ } while (n->m_next && (n = n->m_next));
+ }
+ sbcompress(sb, m, n);
}
- sbcompress(sb, m, n);
- if (n == NULL)
- sb->sb_lastrecord = sb->sb_mb;
}
/*
@@ -511,9 +510,9 @@
}
#ifdef SOCKBUF_DEBUG
+
void
-sbcheck(sb)
- struct sockbuf *sb;
+_sbcheck(struct sockbuf *sb)
{
struct mbuf *m;
struct mbuf *n = 0;
@@ -521,19 +520,43 @@
for (m = sb->sb_mb; m; m = n) {
n = m->m_nextpkt;
+ if (n == NULL && sb->sb_lastrecord != m) {
+ printf("sockbuf %p mismatched lastrecord %p vs %p\n", sb->sb_lastrecord, m);
+ panic("sbcheck1");
+
+ }
for (; m; m = m->m_next) {
len += m->m_len;
mbcnt += MSIZE;
if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
mbcnt += m->m_ext.ext_size;
+ if (n == NULL && m->m_next == NULL) {
+ if (sb->sb_lastmbuf != m) {
+ printf("sockbuf %p mismatched lastmbuf %p vs %p\n", sb->sb_lastmbuf, m);
+ panic("sbcheck2");
+ }
+ }
+ }
+ }
+ if (sb->sb_mb == NULL) {
+ if (sb->sb_lastrecord != NULL) {
+ printf("sockbuf %p is empty, lastrecord not NULL: %p\n",
+ sb, sb->sb_lastrecord);
+ panic("sbcheck3");
+ }
+ if (sb->sb_lastmbuf != NULL) {
+ printf("sockbuf %p is empty, lastmbuf not NULL: %p\n",
+ sb, sb->sb_lastmbuf);
+ panic("sbcheck4");
}
}
if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
- printf("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
- mbcnt, sb->sb_mbcnt);
- panic("sbcheck");
+ printf("sockbuf %p cc %ld != %ld || mbcnt %ld != %ld\n",
+ sb, len, sb->sb_cc, mbcnt, sb->sb_mbcnt);
+ panic("sbcheck5");
}
}
+
#endif
/*
@@ -548,6 +571,8 @@
if (m0 == NULL)
return;
+ sbcheck(sb);
+
/*
* Break the first mbuf off from the rest of the mbuf chain.
*/
@@ -557,13 +582,15 @@
/*
* Insert the first mbuf of the m0 mbuf chain as the last record of
- * the sockbuf. Note this permits zero length records!
+ * the sockbuf. Note this permits zero length records! Keep the
+ * sockbuf state consistent.
*/
if (sb->sb_mb == NULL)
sb->sb_mb = firstmbuf;
else
sb->sb_lastrecord->m_nextpkt = firstmbuf;
sb->sb_lastrecord = firstmbuf; /* update hint for new last record */
+ sb->sb_lastmbuf = firstmbuf; /* update hint for new last mbuf */
if ((firstmbuf->m_flags & M_EOR) && (secondmbuf != NULL)) {
/* propagate the EOR flag */
@@ -581,6 +608,7 @@
sbcompress(sb, secondmbuf, firstmbuf);
}
+#if 0
/*
* As above except that OOB data is inserted at the beginning of the sockbuf,
* but after any other OOB data.
@@ -591,7 +619,7 @@
struct mbuf *m;
struct mbuf **mp;
- if (m0 == 0)
+ if (m0 == NULL)
return;
for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) {
m = *mp;
@@ -619,13 +647,14 @@
sb->sb_lastrecord = m0;
m = m0->m_next;
- m0->m_next = 0;
+ m0->m_next = NULL;
if (m && (m0->m_flags & M_EOR)) {
m0->m_flags &= ~M_EOR;
m->m_flags |= M_EOR;
}
sbcompress(sb, m, m0);
}
+#endif
/*
* Append address and data, and optionally, control (ancillary) data
@@ -644,6 +673,7 @@
if (m0 && (m0->m_flags & M_PKTHDR) == 0)
panic("sbappendaddr");
+ sbcheck(sb);
if (m0)
space += m0->m_pkthdr.len;
@@ -657,8 +687,9 @@
if (asa->sa_len > MLEN)
return (0);
MGET(m, MB_DONTWAIT, MT_SONAME);
- if (m == 0)
+ if (m == NULL)
return (0);
+ KKASSERT(m->m_nextpkt == NULL);
m->m_len = asa->sa_len;
bcopy(asa, mtod(m, caddr_t), asa->sa_len);
if (n)
@@ -674,6 +705,9 @@
else
sb->sb_lastrecord->m_nextpkt = m;
sb->sb_lastrecord = m;
+ while (m->m_next)
+ m = m->m_next;
+ sb->sb_lastmbuf = m;
return (1);
}
@@ -689,6 +723,8 @@
u_int length, cmbcnt, m0mbcnt;
KASSERT(control != NULL, ("sbappendcontrol"));
+ KKASSERT(control->m_nextpkt == NULL);
+ sbcheck(sb);
length = m_countm(control, &n, &cmbcnt) + m_countm(m0, NULL, &m0mbcnt);
if (length > sbspace(sb))
@@ -701,6 +737,7 @@
else
sb->sb_lastrecord->m_nextpkt = control;
sb->sb_lastrecord = control;
+ sb->sb_lastmbuf = m0;
sb->sb_cc += length;
sb->sb_mbcnt += cmbcnt + m0mbcnt;
@@ -717,7 +754,9 @@
sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *tailm)
{
int eor = 0;
+ struct mbuf *free_chain = NULL;
+ sbcheck(sb);
while (m) {
struct mbuf *o;
@@ -726,12 +765,18 @@
* Disregard empty mbufs as long as we don't encounter
* an end-of-record or there is a trailing mbuf of
* the same type to propagate the EOR flag to.
+ *
+ * Defer the m_free() call because it can block and break
+ * the atomicy of the sockbuf.
*/
if (m->m_len == 0 &&
(eor == 0 ||
(((o = m->m_next) || (o = tailm)) &&
o->m_type == m->m_type))) {
- m = m_free(m);
+ o = m->m_next;
+ m->m_next = free_chain;
+ free_chain = m;
+ m = o;
continue;
}
@@ -745,7 +790,10 @@
(unsigned)m->m_len);
tailm->m_len += m->m_len;
sb->sb_cc += m->m_len; /* update sb counter */
- m = m_free(m);
+ o = m->m_next;
+ m->m_next = free_chain;
+ free_chain = m;
+ m = o;
continue;
}
@@ -753,7 +801,8 @@
if (tailm == NULL) {
KASSERT(sb->sb_mb == NULL,
("sbcompress: sb_mb not NULL"));
- sb->sb_mb = m; /* put at front of sockbuf */
+ sb->sb_mb = m; /* only mbuf in sockbuf */
+ sb->sb_lastrecord = m; /* new last record */
} else {
tailm->m_next = m; /* tack m on following tailm */
}
@@ -770,12 +819,23 @@
tailm->m_flags &= ~M_EOR;
}
+ /*
+ * Propogate EOR to the last mbuf
+ */
if (eor) {
if (tailm)
- tailm->m_flags |= eor; /* propagate EOR to last mbuf */
+ tailm->m_flags |= eor;
else
printf("semi-panic: sbcompress");
}
+
+ /*
+ * Clean up any defered frees.
+ */
+ while (free_chain)
+ free_chain = m_free(free_chain);
+
+ sbcheck(sb);
}
/*
@@ -812,8 +872,10 @@
int len;
{
struct mbuf *m;
+ struct mbuf *n;
struct mbuf *nextpkt;
+ sbcheck(sb);
m = sb->sb_mb;
nextpkt = (m != NULL) ? m->m_nextpkt : NULL;
while (len > 0) {
@@ -832,20 +894,16 @@
break;
}
len -= m->m_len;
- sbfree(sb, m);
- m = m_free(m);
+ n = sbunlinkmbuf(sb, m);
+ m_free(m);
+ m = n;
}
while (m && m->m_len == 0) {
- sbfree(sb, m);
- m = m_free(m);
- }
- if (m != NULL) {
- sb->sb_mb = m;
- m->m_nextpkt = nextpkt;
- } else {
- sb->sb_mb = nextpkt;
- sb->sb_lastmbuf = NULL; /* invalidate hint */
+ n = sbunlinkmbuf(sb, m);
+ m_free(m);
+ m = n;
}
+ sbcheck(sb);
}
/*
@@ -857,16 +915,51 @@
struct sockbuf *sb;
{
struct mbuf *m;
+ struct mbuf *n;
+ sbcheck(sb);
m = sb->sb_mb;
if (m) {
- sb->sb_mb = m->m_nextpkt;
+ if ((sb->sb_mb = m->m_nextpkt) == NULL) {
+ sb->sb_lastrecord = NULL;
+ sb->sb_lastmbuf = NULL;
+ }
m->m_nextpkt = NULL;
- do {
- sbfree(sb, m);
- m = m_free(m);
- } while (m);
+ for (n = m; n; n = n->m_next)
+ sbfree(sb, n);
+ m_freem(m);
+ sbcheck(sb);
+ }
+}
+
+/*
+ * Drop the first mbuf off the sockbuf and move the next mbuf to the front.
+ * Currently only the head mbuf of the sockbuf may be dropped this way.
+ */
+struct mbuf *
+sbunlinkmbuf(struct sockbuf *sb, struct mbuf *m)
+{
+ struct mbuf *n;
+
+ KKASSERT(sb->sb_mb == m);
+ sbfree(sb, m);
+ n = m->m_next;
+ m->m_next = NULL;
+ if (n) {
+ sb->sb_mb = n;
+ if (sb->sb_lastrecord == m)
+ sb->sb_lastrecord = n; /* n may be NULL */
+ KKASSERT(sb->sb_lastmbuf != m);
+ n->m_nextpkt = m->m_nextpkt;
+ } else {
+ sb->sb_mb = m->m_nextpkt;
+ if (sb->sb_lastrecord == m)
+ sb->sb_lastrecord = m->m_nextpkt;
+ if (sb->sb_mb == NULL)
+ sb->sb_lastmbuf = NULL;
}
+ m->m_nextpkt = NULL;
+ return(n);
}
/*
Index: sys/socketvar.h
===================================================================
RCS file: /cvs/src/sys/sys/socketvar.h,v
retrieving revision 1.19
diff -u -r1.19 socketvar.h
--- sys/socketvar.h 13 Jul 2005 01:38:53 -0000 1.19
+++ sys/socketvar.h 14 Jul 2005 17:02:08 -0000
@@ -180,6 +180,12 @@
* Macros for sockets and socket buffering.
*/
+#ifdef SOCKBUF_DEBUG
+#define sbcheck(sb) _sbcheck(sb)
+#else
+#define sbcheck(sb)
+#endif
+
/*
* Do we need to notify the other side when I/O is possible?
*/
@@ -337,12 +343,14 @@
struct mbuf *control);
void sbappendrecord (struct sockbuf *sb, struct mbuf *m0);
void sbappendstream (struct sockbuf *sb, struct mbuf *m);
-void sbcheck (struct sockbuf *sb);
+void _sbcheck (struct sockbuf *sb);
void sbcompress (struct sockbuf *sb, struct mbuf *m, struct mbuf *n);
struct mbuf *
sbcreatecontrol (caddr_t p, int size, int type, int level);
void sbdrop (struct sockbuf *sb, int len);
void sbdroprecord (struct sockbuf *sb);
+struct mbuf *
+ sbunlinkmbuf (struct sockbuf *sb, struct mbuf *m);
void sbflush (struct sockbuf *sb);
void sbinsertoob (struct sockbuf *sb, struct mbuf *m0);
void sbrelease (struct sockbuf *sb, struct socket *so);
More information about the Bugs
mailing list