ext2fs updates from ffs

Pedro F. Giffuni pfgshield-freebsd at yahoo.com
Fri Sep 11 03:24:59 PDT 2009


Hi again;

Here is an updated patch.
diff -u ext2fs.orig/ext2_alloc.c ext2fs/ext2_alloc.c
--- ext2fs.orig/ext2_alloc.c	2009-09-10 13:05:31.000000000 +0000
+++ ext2fs/ext2_alloc.c	2009-09-10 13:41:09.000000000 +0000
@@ -441,7 +441,7 @@
 	/* if the next block is actually what we thought it is,
 	   then set the goal to what we thought it should be
 	*/
-	if(ip->i_next_alloc_block == lbn)
+	if(ip->i_next_alloc_block == lbn && ip->i_next_alloc_goal != 0)
 		return ip->i_next_alloc_goal;
 
 	/* now check whether we were provided with an array that basically
diff -u ext2fs.orig/ext2_inode.c ext2fs/ext2_inode.c
--- ext2fs.orig/ext2_inode.c	2009-09-10 13:05:31.000000000 +0000
+++ ext2fs/ext2_inode.c	2009-09-10 13:42:21.000000000 +0000
@@ -126,16 +126,11 @@
 	long count, nblocks, blocksreleased = 0;
 	int aflags, error, i, allerror;
 	off_t osize;
-/*
-printf("ext2_truncate called %d to %d\n", VTOI(ovp)->i_number, length);
-*/	/* 
-	 * negative file sizes will totally break the code below and
-	 * are not meaningful anyways.
-	 */
+
+	oip = VTOI(ovp);	 
 	if (length < 0)
-	    return EFBIG;
+		return (EINVAL);
 
-	oip = VTOI(ovp);
 	if (ovp->v_type == VLNK &&
 	    oip->i_size < ovp->v_mount->mnt_maxsymlinklen) {
 #ifdef DIAGNOSTIC
@@ -157,23 +152,29 @@
 	/*
 	 * Lengthen the size of the file. We must ensure that the
 	 * last byte of the file is allocated. Since the smallest
-	 * value of oszie is 0, length will be at least 1.
+	 * value of osize is 0, length will be at least 1.
 	 */
 	if (osize < length) {
 		if (length > oip->i_e2fs->fs_maxfilesize)
 			return (EFBIG);
+		vnode_pager_setsize(ovp, length);
 		offset = blkoff(fs, length - 1);
 		lbn = lblkno(fs, length - 1);
 		aflags = B_CLRBUF;
 		if (flags & IO_SYNC)
 			aflags |= B_SYNC;
-		vnode_pager_setsize(ovp, length);
-		if ((error = ext2_balloc(oip, lbn, offset + 1, cred, &bp,
-		    aflags)) != 0)
+		error = ext2_balloc(oip, lbn, offset + 1, cred, &bp, aflags);
+		if (error) {
+			vnode_pager_setsize(vp, osize);
 			return (error);
+		}
 		oip->i_size = length;
-		if (aflags & IO_SYNC)
+		if (bp->b_bufsize == fs->s_blocksize)
+			bp->b_flags |= B_CLUSTEROK;
+		if (aflags & B_SYNC)
 			bwrite(bp);
+		else if (ovp->v_mount->mnt_flag & MNT_ASYNC)
+			bdwrite(bp);
 		else
 			bawrite(bp);
 		oip->i_flag |= IN_CHANGE | IN_UPDATE;
@@ -195,15 +196,19 @@
 		aflags = B_CLRBUF;
 		if (flags & IO_SYNC)
 			aflags |= B_SYNC;
-		if ((error = ext2_balloc(oip, lbn, offset, cred, &bp,
-		    aflags)) != 0)
+		error = ext2_balloc(oip, lbn, offset, cred, &bp, aflags);
+		if (error)
 			return (error);
 		oip->i_size = length;
 		size = blksize(fs, oip, lbn);
 		bzero((char *)bp->b_data + offset, (u_int)(size - offset));
 		allocbuf(bp, size);
-		if (aflags & IO_SYNC)
+		if (bp->b_bufsize == fs->s_blocksize)
+			bp->b_flags |= B_CLUSTEROK;
+		if (aflags & B_SYNC)
 			bwrite(bp);
+		else if (ovp->v_mount->mnt_flag & MNT_ASYNC)
+			bdwrite(bp);
 		else
 			bawrite(bp);
 	}
@@ -247,6 +252,7 @@
 	error = vtruncbuf(ovp, cred, td, length, (int)fs->s_blocksize);
 	if (error && (allerror == 0))
 		allerror = error;
+	vnode_pager_setsize(ovp, length);
 
 	/*
 	 * Indirect blocks first.
diff -u ext2fs.orig/ext2_readwrite.c ext2fs/ext2_readwrite.c
--- ext2fs.orig/ext2_readwrite.c	2009-09-10 13:05:31.000000000 +0000
+++ ext2fs/ext2_readwrite.c	2009-09-10 14:13:41.000000000 +0000
@@ -36,6 +36,7 @@
  * $FreeBSD: src/sys/gnu/fs/ext2fs/ext2_readwrite.c,v 1.31.20.1 2009/04/15 03:14:26 kensmith Exp $
  */
 
+/* XXX TODO: remove these obfuscations (as in ffs_vnops.c). */
 #define	BLKSIZE(a, b, c)	blksize(a, b, c)
 #define	FS			struct ext2_sb_info
 #define	I_FS			i_e2fs
@@ -47,7 +48,6 @@
 /*
  * Vnode op for reading.
  */
-/* ARGSUSED */
 static int
 READ(ap)
 	struct vop_read_args /* {
@@ -65,8 +65,8 @@
 	daddr_t lbn, nextlbn;
 	off_t bytesinfile;
 	long size, xfersize, blkoffset;
-	int error, orig_resid;
-	int seqcount = ap->a_ioflag >> IO_SEQSHIFT;
+	int error, orig_resid, seqcount;
+	seqcount = ap->a_ioflag >> IO_SEQSHIFT;
 	u_short mode;
 
 	vp = ap->a_vp;
@@ -84,11 +84,14 @@
 	} else if (vp->v_type != VREG && vp->v_type != VDIR)
 		panic("%s: type %d", READ_S, vp->v_type);
 #endif
-	fs = ip->I_FS;
-	if ((uoff_t)uio->uio_offset > fs->fs_maxfilesize)
-		return (EFBIG);
-
 	orig_resid = uio->uio_resid;
+	KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0"));
+	if (orig_resid == 0)
+		return (0);
+	KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0"));
+	fs = ip->I_FS;
+	if (uio->uio_offset < ip->i_size && uio->uio_offset >= fs->fs_maxfilesize)
+		return (EOVERFLOW);
 	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
 		if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
 			break;
@@ -106,9 +109,8 @@
 		if (lblktosize(fs, nextlbn) >= ip->i_size)
 			error = bread(vp, lbn, size, NOCRED, &bp);
 		else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0)
-			error = cluster_read(vp,
-			    ip->i_size, lbn, size, NOCRED,
-			    uio->uio_resid, (ap->a_ioflag >> IO_SEQSHIFT), &bp);
+			error = cluster_read(vp, ip->i_size, lbn, size, NOCRED,
+			    blkoffset + uio->uio_resid, seqcount, &bp);
 		else if (seqcount > 1) {
 			int nextsize = BLKSIZE(fs, ip, nextlbn);
 			error = breadn(vp, lbn,
@@ -134,8 +136,8 @@
 				break;
 			xfersize = size;
 		}
-		error =
-		    uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
+		error = uiomove((char *)bp->b_data + blkoffset,
+			(int)xfersize, uio);
 		if (error)
 			break;
 
@@ -143,7 +145,7 @@
 	}
 	if (bp != NULL)
 		bqrelse(bp);
-	if (orig_resid > 0 && (error == 0 || uio->uio_resid != orig_resid) &&
+	if ((error == 0 || uio->uio_resid != orig_resid) &&
 	    (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
 		ip->i_flag |= IN_ACCESS;
 	return (error);
@@ -169,11 +171,10 @@
 	struct thread *td;
 	daddr_t lbn;
 	off_t osize;
-	int seqcount;
-	int blkoffset, error, flags, ioflag, resid, size, xfersize;
+	int blkoffset, error, flags, ioflag, resid, size, seqcount, xfersize;
 
 	ioflag = ap->a_ioflag;
-	seqcount = ap->a_ioflag >> IO_SEQSHIFT;
+	seqcount = ioflag >> IO_SEQSHIFT;
 	uio = ap->a_uio;
 	vp = ap->a_vp;
 	ip = VTOI(vp);
@@ -194,15 +195,15 @@
 		break;
 	case VDIR:
 		if ((ioflag & IO_SYNC) == 0)
-			panic("%s: nonsync dir write", WRITE_S);
+			panic("ext2_write: nonsync dir write");
 		break;
 	default:
-		panic("%s: type", WRITE_S);
+		panic("ext2_write: type %p %d (%jd,%d)", (void *)vp, vp->v_type,
+			(intmax_t)uio->uio_offset, uio->uio_resid);
 	}
 
 	fs = ip->I_FS;
-	if (uio->uio_offset < 0 ||
-	    (uoff_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize)
+	if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize)
 		return (EFBIG);
 	/*
 	 * Maybe this should be above the vnode op call, but so long as
@@ -236,26 +237,19 @@
 
 		/*
 		 * Avoid a data-consistency race between write() and mmap()
-		 * by ensuring that newly allocated blocks are zerod.  The
+		 * by ensuring that newly allocated blocks are zeroed.  The
 		 * race can occur even in the case where the write covers
 		 * the entire block.
 		 */
 		flags |= B_CLRBUF;
-#if 0
-		if (fs->s_frag_size > xfersize)
-			flags |= B_CLRBUF;
-		else
-			flags &= ~B_CLRBUF;
-#endif
 
-		error = ext2_balloc(ip,
-		    lbn, blkoffset + xfersize, ap->a_cred, &bp, flags);
+		error = ext2_balloc(ip, lbn, blkoffset + xfersize,
+		    ap->a_cred, &bp, flags);
 		if (error)
 			break;
 
-		if (uio->uio_offset + xfersize > ip->i_size) {
+		if (uio->uio_offset + xfersize > ip->i_size)
 			ip->i_size = uio->uio_offset + xfersize;
-		}
 
 		size = BLKSIZE(fs, ip, lbn) - bp->b_resid;
 		if (size < xfersize)
@@ -264,7 +258,7 @@
 		error =
 		    uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
 		if ((ioflag & IO_VMIO) &&
-		   (LIST_FIRST(&bp->b_dep) == NULL)) /* in ext2fs? */
+		   LIST_FIRST(&bp->b_dep) == NULL) /* in ext2fs? */
 			bp->b_flags |= B_RELBUF;
 
 		if (ioflag & IO_SYNC) {
@@ -282,12 +276,15 @@
 		}
 		if (error || xfersize == 0)
 			break;
-		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 	}
 	/*
 	 * If we successfully wrote any data, and we are not the superuser
 	 * we clear the setuid and setgid bits as a precaution against
 	 * tampering.
+	 * XXX too late, the tamperer may have opened the file while we
+	 * were writing the data (or before).
+	 * XXX too early, if (error && ioflag & IO_UNIT) then we will
+	 * unwrite the data.
 	 */
 	if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
 		ip->i_mode &= ~(ISUID | ISGID);
@@ -298,7 +295,11 @@
 			uio->uio_offset -= resid - uio->uio_resid;
 			uio->uio_resid = resid;
 		}
-	} else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
-		error = ext2_update(vp, 1);
+	}
+	if (uio->uio_resid != resid) {
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		if (ioflag & IO_SYNC)
+			error = ext2_update(vp, 1);
+	}
 	return (error);
 }
diff -u ext2fs.orig/ext2_vfsops.c ext2fs/ext2_vfsops.c
--- ext2fs.orig/ext2_vfsops.c	2009-09-10 13:05:31.000000000 +0000
+++ ext2fs/ext2_vfsops.c	2009-09-10 13:41:09.000000000 +0000
@@ -171,8 +171,6 @@
 			flags = WRITECLOSE;
 			if (mp->mnt_flag & MNT_FORCE)
 				flags |= FORCECLOSE;
-			if (vfs_busy(mp, LK_NOWAIT, 0, td))
-				return (EBUSY);
 			error = ext2_flushfiles(mp, flags, td);
 			vfs_unbusy(mp, td);
 			if (!error && fs->s_wasvalid) {
@@ -500,6 +498,7 @@
  *	4) invalidate all inactive vnodes.
  *	5) invalidate all cached file data.
  *	6) re-read inode data for all active vnodes.
+ * XXX we are missing some steps, in particular # 3
  */
 static int
 ext2_reload(struct mount *mp, struct thread *td)
@@ -1007,8 +1006,8 @@
 		 * still zero, it will be unlinked and returned to the free
 		 * list by vput().
 		 */
-		vput(vp);
 		brelse(bp);
+		vput(vp);
 		*vpp = NULL;
 		return (error);
 	}
@@ -1032,7 +1031,7 @@
 /*
 	ext2_print_inode(ip);
 */
-	brelse(bp);
+	bqrelse(bp);
 
 	/*
 	 * Initialize the vnode from the inode, check for aliases.




More information about the Submit mailing list