ccd on NATA problem - ata0: FAILURE - oversized DMA transfer attempt 73728 > 65536
Matthew Dillon
dillon at apollo.backplane.com
Wed Jul 18 21:24:04 PDT 2007
Please test this patch. It survived a couple of a buildworld runs
on my test box with the chunk size forced to 4K but it does mess
with low level disk I/O so make sure anything important is backed up
first.
-Matt
Index: kern/subr_diskgpt.c
===================================================================
RCS file: /cvs/src/sys/kern/subr_diskgpt.c,v
retrieving revision 1.3
diff -u -p -r1.3 subr_diskgpt.c
--- kern/subr_diskgpt.c 19 Jun 2007 06:07:57 -0000 1.3
+++ kern/subr_diskgpt.c 19 Jul 2007 02:42:50 -0000
@@ -136,6 +136,9 @@ error = EINVAL;
goto done;
}
+ /*
+ * XXX subject to device dma size limitations
+ */
bp2 = geteblk((int)(table_blocks * info->d_media_blksize));
bp2->b_bio1.bio_offset = (off_t)table_lba * info->d_media_blksize;
bp2->b_bcount = table_blocks * info->d_media_blksize;
Index: kern/subr_disklabel64.c
===================================================================
RCS file: /cvs/src/sys/kern/subr_disklabel64.c,v
retrieving revision 1.4
diff -u -p -r1.4 subr_disklabel64.c
--- kern/subr_disklabel64.c 19 Jun 2007 06:39:06 -0000 1.4
+++ kern/subr_disklabel64.c 19 Jul 2007 02:43:56 -0000
@@ -118,6 +118,9 @@ size_t dlpcrcsize;
size_t bpsize;
int secsize;
+ /*
+ * XXX I/O size is subject to device DMA limitations
+ */
secsize = info->d_media_blksize;
bpsize = (sizeof(*dlp) + secsize - 1) & ~(secsize - 1);
@@ -289,6 +292,9 @@ int secsize;
lp = lpx.lab64;
+ /*
+ * XXX I/O size is subject to device DMA limitations
+ */
secsize = ssp->dss_secsize;
bpsize = (sizeof(*lp) + secsize - 1) & ~(secsize - 1);
Index: kern/vfs_aio.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_aio.c,v
retrieving revision 1.41
diff -u -p -r1.41 vfs_aio.c
--- kern/vfs_aio.c 29 Jun 2007 21:54:08 -0000 1.41
+++ kern/vfs_aio.c 19 Jul 2007 02:49:35 -0000
@@ -944,8 +944,12 @@ bp->b_error = 0;
crit_exit();
- /* Perform transfer. */
- dev_dstrategy(vp->v_rdev, &bp->b_bio1);
+ /*
+ * Perform the transfer. vn_strategy must be used even though we
+ * know we have a device in order to deal with requests which exceed
+ * device DMA limitations.
+ */
+ vn_strategy(vp, &bp->b_bio1);
notify = 0;
crit_enter();
Index: vfs/specfs/spec_vnops.c
===================================================================
RCS file: /cvs/src/sys/vfs/specfs/spec_vnops.c,v
retrieving revision 1.51
diff -u -p -r1.51 spec_vnops.c
--- vfs/specfs/spec_vnops.c 9 May 2007 00:53:36 -0000 1.51
+++ vfs/specfs/spec_vnops.c 19 Jul 2007 02:39:25 -0000
@@ -60,6 +60,15 @@ #include <sys/buf2.h>
#include <sys/thread2.h>
+/*
+ * Specfs chained debugging (bitmask)
+ *
+ * 0 - disable debugging
+ * 1 - report chained I/Os
+ * 2 - force 4K chained I/Os
+ */
+#define SPEC_CHAIN_DEBUG 0
+
static int spec_advlock (struct vop_advlock_args *);
static int spec_bmap (struct vop_bmap_args *);
static int spec_close (struct vop_close_args *);
@@ -75,6 +84,7 @@
static int spec_read (struct vop_read_args *);
static int spec_strategy (struct vop_strategy_args *);
static int spec_write (struct vop_write_args *);
+static void spec_strategy_done(struct bio *nbio);
struct vop_ops spec_vnode_vops = {
.vop_default = vop_defaultop,
@@ -438,7 +448,9 @@ return (0);
}
/*
- * Just call the device strategy routine
+ * Convert a vnode strategy call into a device strategy call. Vnode strategy
+ * calls are not limited to device DMA limits so we have to deal with the
+ * case.
*
* spec_strategy(struct vnode *a_vp, struct bio *a_bio)
*/
@@ -447,8 +459,11 @@ */
{
struct bio *bio = ap->a_bio;
struct buf *bp = bio->bio_buf;
+ struct buf *nbp;
struct vnode *vp;
struct mount *mp;
+ int chunksize;
+ int maxiosize;
if (bp->b_cmd != BUF_CMD_READ &&
(LIST_FIRST(&bp->b_dep)) != NULL && bioops.io_start) {
@@ -474,11 +489,151 @@ else
mp->mnt_stat.f_syncwrites++;
}
}
- dev_dstrategy_chain(vp->v_rdev, bio);
+
+ /*
+ * Device iosize limitations only apply to read and write. Shortcut
+ * the I/O if it fits.
+ */
+ maxiosize = vp->v_rdev->si_iosize_max;
+#if SPEC_CHAIN_DEBUG & 2
+ maxiosize = 4096;
+#endif
+ if (bp->b_bcount <= maxiosize ||
+ (bp->b_cmd != BUF_CMD_READ && bp->b_cmd != BUF_CMD_WRITE)) {
+ dev_dstrategy_chain(vp->v_rdev, bio);
+ return (0);
+ }
+
+ /*
+ * Clone the buffer and set up an I/O chain to chunk up the I/O.
+ */
+ nbp = kmalloc(sizeof(*bp), M_DEVBUF, M_INTWAIT|M_ZERO);
+ initbufbio(nbp);
+ LIST_INIT(&nbp->b_dep);
+ BUF_LOCKINIT(nbp);
+ BUF_LOCK(nbp, LK_EXCLUSIVE);
+ BUF_KERNPROC(nbp);
+ nbp->b_vp = vp;
+ nbp->b_flags = B_PAGING | (bp->b_flags & B_BNOCLIP);
+ nbp->b_data = bp->b_data;
+ nbp->b_bio1.bio_done = spec_strategy_done;
+ nbp->b_bio1.bio_offset = bio->bio_offset;
+ nbp->b_bio1.bio_caller_info1.ptr = bio;
+
+ /*
+ * Start the first transfer
+ */
+ if (vn_isdisk(vp, NULL))
+ chunksize = vp->v_rdev->si_bsize_phys;
+ else
+ chunksize = DEV_BSIZE;
+ chunksize = maxiosize / chunksize * chunksize;
+#if SPEC_CHAIN_DEBUG & 1
+ kprintf("spec_strategy chained I/O chunksize=%d\n", chunksize);
+#endif
+ nbp->b_cmd = bp->b_cmd;
+ nbp->b_bcount = chunksize;
+ nbp->b_bufsize = chunksize; /* used to detect a short I/O */
+ nbp->b_bio1.bio_caller_info2.index = chunksize;
+
+#if SPEC_CHAIN_DEBUG & 1
+ kprintf("spec_strategy: chain %p offset %d/%d bcount %d\n",
+ bp, 0, bp->b_bcount, nbp->b_bcount);
+#endif
+
+ dev_dstrategy(vp->v_rdev, &nbp->b_bio1);
return (0);
}
/*
+ * Chunked up transfer completion routine - chain transfers until done
+ */
+static
+void
+spec_strategy_done(struct bio *nbio)
+{
+ struct buf *nbp = nbio->bio_buf;
+ struct bio *bio = nbio->bio_caller_info1.ptr; /* original bio */
+ struct buf *bp = bio->bio_buf; /* original bp */
+ int chunksize = nbio->bio_caller_info2.index; /* chunking */
+ int boffset = nbp->b_data - bp->b_data;
+
+ if (nbp->b_flags & B_ERROR) {
+ /*
+ * An error terminates the chain, propogate the error back
+ * to the original bp
+ */
+ bp->b_flags |= B_ERROR;
+ bp->b_error = nbp->b_error;
+ bp->b_resid = bp->b_bcount - boffset +
+ (nbp->b_bcount - nbp->b_resid);
+#if SPEC_CHAIN_DEBUG & 1
+ kprintf("spec_strategy: chain %p error %d bcount %d/%d\n",
+ bp, bp->b_error, bp->b_bcount,
+ bp->b_bcount - bp->b_resid);
+#endif
+ kfree(nbp, M_DEVBUF);
+ biodone(bio);
+ } else if (nbp->b_resid) {
+ /*
+ * A short read or write terminates the chain
+ */
+ bp->b_error = nbp->b_error;
+ bp->b_resid = bp->b_bcount - boffset +
+ (nbp->b_bcount - nbp->b_resid);
+#if SPEC_CHAIN_DEBUG & 1
+ kprintf("spec_strategy: chain %p short read(1) bcount %d/%d\n",
+ bp, bp->b_bcount - bp->b_resid, bp->b_bcount);
+#endif
+ kfree(nbp, M_DEVBUF);
+ biodone(bio);
+ } else if (nbp->b_bcount != nbp->b_bufsize) {
+ /*
+ * A short read or write can also occur by truncating b_bcount
+ */
+#if SPEC_CHAIN_DEBUG & 1
+ kprintf("spec_strategy: chain %p short read(2) bcount %d/%d\n",
+ bp, nbp->b_bcount + boffset, bp->b_bcount);
+#endif
+ bp->b_error = 0;
+ bp->b_bcount = nbp->b_bcount + boffset;
+ bp->b_resid = nbp->b_resid;
+ kfree(nbp, M_DEVBUF);
+ biodone(bio);
+ } else if (nbp->b_bcount + boffset == bp->b_bcount) {
+ /*
+ * No more data terminates the chain
+ */
+#if SPEC_CHAIN_DEBUG & 1
+ kprintf("spec_strategy: chain %p finished bcount %d\n",
+ bp, bp->b_bcount);
+#endif
+ bp->b_error = 0;
+ bp->b_resid = 0;
+ kfree(nbp, M_DEVBUF);
+ biodone(bio);
+ } else {
+ /*
+ * Continue the chain
+ */
+ boffset += nbp->b_bcount;
+ nbp->b_data = bp->b_data + boffset;
+ nbp->b_bcount = bp->b_bcount - boffset;
+ if (nbp->b_bcount > chunksize)
+ nbp->b_bcount = chunksize;
+ nbp->b_bio1.bio_done = spec_strategy_done;
+ nbp->b_bio1.bio_offset = bio->bio_offset + boffset;
+
+#if SPEC_CHAIN_DEBUG & 1
+ kprintf("spec_strategy: chain %p offset %d/%d bcount %d\n",
+ bp, boffset, bp->b_bcount, nbp->b_bcount);
+#endif
+
+ dev_dstrategy(nbp->b_vp->v_rdev, &nbp->b_bio1);
+ }
+}
+
+/*
* spec_freeblks(struct vnode *a_vp, daddr_t a_addr, daddr_t a_length)
*/
static int
Index: vm/vm_swap.c
===================================================================
RCS file: /cvs/src/sys/vm/vm_swap.c,v
retrieving revision 1.35
diff -u -p -r1.35 vm_swap.c
--- vm/vm_swap.c 15 May 2007 22:44:21 -0000 1.35
+++ vm/vm_swap.c 19 Jul 2007 02:47:43 -0000
@@ -142,8 +142,9 @@ * Issue a strategy call on the appropr
* bp->b_vp is not modified. Strategy code is always supposed to
* use the passed vp.
*
- * XXX do a dev_dstrategy() call on sp->sw_device instead of on
- * sp->sw_vp ?
+ * We have to use vn_strategy() here even if we know we have a
+ * device in order to properly break up requests which exceed the
+ * device's DMA limits.
*/
vn_strategy(sp->sw_vp, nbio);
return 0;
More information about the Bugs
mailing list