nullfs stabilization I

Simon 'corecode' Schubert corecode at fs.ei.tum.de
Mon Jan 9 07:29:20 PST 2006


hey,

I know Matt wants a generic cache coherency layer, but until this is in 
place, I propose attached patch to fix this 
rename-race-and-file-vanishes bug in nullfs.  Furthermore, you can't 
mount a nullfs from a nullfs as this will lead to a recursion stack 
overflow double fault.  This has been fixed as well.

I'm not sure if this single linked list is actually enough, because 
renames on the lower layer won't trigger cache_inval's on the upper 
layer at the moment.

What do people think?

cheers
  simon
--
Serve - BSD     +++  RENT this banner advert  +++    ASCII Ribbon   /"\
Work - Mac      +++  space for low €€€ NOW!1  +++      Campaign     \ /
Party Enjoy Relax   |   http://dragonflybsd.org      Against  HTML   \
Dude 2c 2 the max   !   http://golden-apple.biz       Mail + News   / \
Index: kern/vfs_cache.c
===================================================================
RCS file: /space/cvs/dragonfly/src/sys/kern/vfs_cache.c,v
retrieving revision 1.59
diff -u -r1.59 vfs_cache.c
--- kern/vfs_cache.c	17 Sep 2005 08:29:42 -0000	1.59
+++ kern/vfs_cache.c	7 Jan 2006 16:55:40 -0000
@@ -354,6 +354,10 @@
 	didwarn = 0;
 	td = curthread;
 
+	/* Lock layered cache entries bottom-up */
+	if (ncp->nc_shadowed != NULL)
+		cache_lock(ncp->nc_shadowed);
+
 	for (;;) {
 		if (ncp->nc_exlocks == 0) {
 			ncp->nc_exlocks = 1;
@@ -398,9 +402,18 @@
 cache_lock_nonblock(struct namecache *ncp)
 {
 	thread_t td;
+	int error;
 
 	KKASSERT(ncp->nc_refs != 0);
 	td = curthread;
+
+	/* Lock layered cache entries bottom-up */
+	if (ncp->nc_shadowed != NULL) {
+		error = cache_lock_nonblock(ncp->nc_shadowed);
+		if (error)
+			return(error);
+	}
+
 	if (ncp->nc_exlocks == 0) {
 		ncp->nc_exlocks = 1;
 		ncp->nc_locktd = td;
@@ -415,6 +428,8 @@
 			vhold(ncp->nc_vp);
 		return(0);
 	} else {
+		if (ncp->nc_shadowed != NULL)
+			cache_unlock(ncp->nc_shadowed);
 		return(EWOULDBLOCK);
 	}
 }
@@ -427,6 +442,7 @@
 	KKASSERT(ncp->nc_refs > 0);
 	KKASSERT(ncp->nc_exlocks > 0);
 	KKASSERT(ncp->nc_locktd == td);
+
 	if (--ncp->nc_exlocks == 0) {
 		if (ncp->nc_vp)
 			vdrop(ncp->nc_vp);
@@ -436,6 +452,9 @@
 			wakeup(ncp);
 		}
 	}
+
+	if (ncp->nc_shadowed != NULL)
+		cache_unlock(ncp->nc_shadowed);
 }
 
 /*
@@ -455,7 +474,10 @@
 	/* XXX MP */
 	if (ncp->nc_exlocks == 0 || ncp->nc_locktd == curthread) {
 		_cache_hold(ncp);
-		cache_lock(ncp);
+		if (ncp->nc_exlocks == 0)
+			cache_lock_nonblock(ncp); /* doesn't work recursive */
+		else
+			cache_lock(ncp);
 		return(0);
 	}
 	return(EWOULDBLOCK);
@@ -1199,6 +1221,8 @@
 			ncp->nc_flag &= ~NCF_HASHED;
 			LIST_REMOVE(ncp, nc_hash);
 		}
+		if (ncp->nc_shadowed != NULL)
+			cache_put(ncp->nc_shadowed);
 		if ((par = ncp->nc_parent) != NULL) {
 			par = cache_hold(par);
 			TAILQ_REMOVE(&par->nc_list, ncp, nc_entry);
@@ -1383,6 +1407,9 @@
 	LIST_INSERT_HEAD(nchpp, ncp, nc_hash);
 	ncp->nc_flag |= NCF_HASHED;
 	cache_link_parent(ncp, par);
+	if (ncp->nc_mount != NULL &&
+	    ncp->nc_mount->mnt_op->vfs_getshadow != NULL)
+		ncp->nc_mount->mnt_op->vfs_getshadow(par, ncp);
 found:
 	/*
 	 * stats and namecache size management
Index: kern/vfs_default.c
===================================================================
RCS file: /space/cvs/dragonfly/src/sys/kern/vfs_default.c,v
retrieving revision 1.28
diff -u -r1.28 vfs_default.c
--- kern/vfs_default.c	17 Sep 2005 07:43:00 -0000	1.28
+++ kern/vfs_default.c	7 Jan 2006 20:14:09 -0000
@@ -236,6 +236,7 @@
 		VOP_UNLOCK(dvp, 0, curthread);
 	if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) {
 		/* was resolved by another process while we were unlocked */
+		/* XXX how can this happen?  we never unlock ncp */
 		if (error == 0)
 			vrele(vp);
 	} else if (error == 0) {
@@ -722,7 +723,7 @@
  *			  struct ucred *a_cred,
  *			  int a_flags }
  *
- * Issie a whiteout operation (create, lookup, or delete).  Compatibility 
+ * Issue a whiteout operation (create, lookup, or delete).  Compatibility 
  * requires us to issue the appropriate VOP_OLD_LOOKUP before we issue 
  * VOP_OLD_WHITEOUT in order to setup the directory inode's i_offset and i_count
  * (e.g. in UFS) for the NAMEI_CREATE and NAMEI_DELETE ops.  For NAMEI_LOOKUP
Index: kern/vfs_syscalls.c
===================================================================
RCS file: /space/cvs/dragonfly/src/sys/kern/vfs_syscalls.c,v
retrieving revision 1.76
diff -u -r1.76 vfs_syscalls.c
--- kern/vfs_syscalls.c	4 Jan 2006 18:11:26 -0000	1.76
+++ kern/vfs_syscalls.c	7 Jan 2006 17:46:02 -0000
@@ -361,6 +361,8 @@
 		cache_setunresolved(mp->mnt_ncp);
 		mp->mnt_ncp->nc_flag |= NCF_MOUNTPT;
 		mp->mnt_ncp->nc_mount = mp;
+		if (mp->mnt_op->vfs_getshadow != NULL)
+			mp->mnt_op->vfs_getshadow(ncp, mp->mnt_ncp);
 		cache_drop(ncp);
 		/* XXX get the root of the fs and cache_setvp(mnt_ncp...) */
 		vp->v_flag &= ~VMOUNT;
Index: sys/bus.h
===================================================================
RCS file: /space/cvs/dragonfly/src/sys/sys/bus.h,v
retrieving revision 1.19
diff -u -r1.19 bus.h
--- sys/bus.h	28 Nov 2005 17:13:47 -0000	1.19
+++ sys/bus.h	8 Jan 2006 14:23:08 -0000
@@ -30,11 +30,13 @@
 #ifndef _SYS_BUS_H_
 #define _SYS_BUS_H_
 
-#ifdef _KERNEL
+#if defined(_KERNEL) || defined(_KERNEL_STRUCTURES)
 
 #include <sys/queue.h>
 #include <sys/kobj.h>
+#ifdef _KERNEL
 #include <sys/serialize.h>
+#endif
 
 /*
  * Forward declarations
@@ -96,6 +98,9 @@
 };
 SLIST_HEAD(resource_list, resource_list_entry);
 
+#endif	/* _KERNEL || _KERNEL_STRUCTURES */
+#ifdef _KERNEL
+
 /*
  * Initialise a resource list.
  */
Index: sys/mount.h
===================================================================
RCS file: /space/cvs/dragonfly/src/sys/sys/mount.h,v
retrieving revision 1.21
diff -u -r1.21 mount.h
--- sys/mount.h	26 Jul 2005 15:43:35 -0000	1.21
+++ sys/mount.h	7 Jan 2006 16:03:34 -0000
@@ -394,6 +394,7 @@
 typedef int vfs_uninit_t(struct vfsconf *);
 typedef int vfs_extattrctl_t(struct mount *mp, int cmd,const char *attrname,
 	            caddr_t arg, struct thread *td);
+typedef int vfs_getshadow_t(struct namecache *par, struct namecache *ncp);
 
 struct vfsops {
 	vfs_mount_t 	*vfs_mount;
@@ -410,6 +411,7 @@
 	vfs_init_t  	*vfs_init;
 	vfs_uninit_t 	*vfs_uninit;
 	vfs_extattrctl_t 	*vfs_extattrctl;
+	vfs_getshadow_t	*vfs_getshadow;
 };
 
 #define VFS_MOUNT(MP, PATH, DATA, P) \
Index: sys/namecache.h
===================================================================
RCS file: /space/cvs/dragonfly/src/sys/sys/namecache.h,v
retrieving revision 1.22
diff -u -r1.22 namecache.h
--- sys/namecache.h	17 Sep 2005 07:43:01 -0000	1.22
+++ sys/namecache.h	6 Jan 2006 23:53:30 -0000
@@ -102,6 +102,7 @@
     TAILQ_ENTRY(namecache) nc_vnode;	/* scan via vnode->v_namecache */
     struct namecache_list  nc_list;	/* list of children */
     struct namecache *nc_parent;	/* namecache entry for parent */
+    struct namecache *nc_shadowed;	/* lower layer entry in layered fs */
     struct	vnode *nc_vp;		/* vnode representing name or NULL */
     int		nc_refs;		/* ref count prevents deletion */
     u_short	nc_flag;
Index: vfs/nullfs/null.h
===================================================================
RCS file: /space/cvs/dragonfly/src/sys/vfs/nullfs/null.h,v
retrieving revision 1.7
diff -u -r1.7 null.h
--- vfs/nullfs/null.h	4 Jan 2006 03:09:53 -0000	1.7
+++ vfs/nullfs/null.h	7 Jan 2006 17:32:22 -0000
@@ -46,6 +46,7 @@
 struct null_mount {
 	struct mount	*nullm_vfs;
 	struct vnode	*nullm_rootvp;	/* Reference to root null_node */
+	struct namecache *nullm_rootncp; /* Reference to rootvp's ncp */
 };
 
 #ifdef _KERNEL
Index: vfs/nullfs/null_vfsops.c
===================================================================
RCS file: /space/cvs/dragonfly/src/sys/vfs/nullfs/null_vfsops.c,v
retrieving revision 1.19
diff -u -r1.19 null_vfsops.c
--- vfs/nullfs/null_vfsops.c	4 Jan 2006 03:09:53 -0000	1.19
+++ vfs/nullfs/null_vfsops.c	7 Jan 2006 21:01:09 -0000
@@ -98,20 +98,22 @@
 	 * Get argument
 	 */
 	error = copyin(data, (caddr_t)&args, sizeof(struct null_args));
-	if (error)
-		return (error);
 
 	/*
-	 * Find lower node
+	 * Find lower ncp and node
 	 */
 	rootvp = NULL;
-	error = nlookup_init(&nd, args.target, UIO_USERSPACE, NLC_FOLLOW);
+	if (error == 0)
+		error = nlookup_init(&nd, args.target, UIO_USERSPACE,
+				     NLC_FOLLOW);
 	if (error == 0)
 		error = nlookup(&nd);
 	if (error == 0) {
 		error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, 
 					&rootvp);
 	}
+	if (error)
+		return(error);
 
 	xmp = (struct null_mount *) malloc(sizeof(struct null_mount),
 				M_NULLFSMNT, M_WAITOK);	/* XXX */
@@ -124,6 +126,9 @@
 	 * -- via the vnode -- is not good enough anymore...
 	 */
 	xmp->nullm_vfs = nd.nl_ncp->nc_mount;
+	xmp->nullm_rootncp = nd.nl_ncp;
+	cache_unlock(nd.nl_ncp);
+	nd.nl_ncp = NULL;
 	nlookup_done(&nd);
 
 	vfs_add_vnodeops(mp, &mp->mnt_vn_norm_ops, 
@@ -161,7 +166,7 @@
 static int
 nullfs_unmount(struct mount *mp, int mntflags, struct thread *td)
 {
-	void *mntdata;
+	struct null_mount *mntdata;
 	int flags = 0;
 
 	NULLFSDEBUG("nullfs_unmount: mp = %p\n", (void *)mp);
@@ -172,7 +177,9 @@
 	/*
 	 * Finally, throw away the null_mount structure
 	 */
-	mntdata = mp->mnt_data;
+	mntdata = MOUNTTONULLMOUNT(mp);
+	vrele(mntdata->nullm_rootvp);
+	cache_drop(mntdata->nullm_rootncp);
 	mp->mnt_data = 0;
 	free(mntdata, M_NULLFSMNT);
 	return 0;
@@ -261,6 +268,33 @@
 	    arg, td);
 }
 
+static int
+nullfs_getshadow(struct namecache *par, struct namecache *ncp)
+{
+	struct nlcomponent nlc;
+
+	KKASSERT(ncp->nc_shadowed == NULL);
+	KKASSERT(ncp->nc_exlocks > 0);
+
+	/* If nc_nlen == 0, we're looking up the mountpoint ncp */
+	if (ncp->nc_nlen == 0) {
+		ncp->nc_shadowed =
+		    MOUNTTONULLMOUNT(ncp->nc_mount)->nullm_rootncp;
+		cache_get(ncp->nc_shadowed);
+	} else {
+		KKASSERT(par->nc_shadowed != NULL);
+
+		nlc.nlc_nameptr = ncp->nc_name;
+		nlc.nlc_namelen = ncp->nc_nlen;
+		ncp->nc_shadowed = cache_nlookup(par->nc_shadowed, &nlc);
+	}
+
+	KKASSERT(ncp->nc_exlocks > 0);
+	KKASSERT(ncp->nc_shadowed->nc_exlocks > 0);
+
+	return(0);
+}
+
 
 static struct vfsops null_vfsops = {
 	.vfs_mount =   	 	nullfs_mount,
@@ -270,7 +304,8 @@
 	.vfs_statfs =    	nullfs_statfs,
 	.vfs_sync =     	vfs_stdsync,
 	.vfs_checkexp =  	nullfs_checkexp,
-	.vfs_extattrctl =  	nullfs_extattrctl
+	.vfs_extattrctl =  	nullfs_extattrctl,
+	.vfs_getshadow =	nullfs_getshadow,
 };
 
 VFS_SET(null_vfsops, null, VFCF_LOOPBACK);
Index: vfs/nullfs/null_vnops.c
===================================================================
RCS file: /space/cvs/dragonfly/src/sys/vfs/nullfs/null_vnops.c,v
retrieving revision 1.24
diff -u -r1.24 null_vnops.c
--- vfs/nullfs/null_vnops.c	4 Jan 2006 03:09:53 -0000	1.24
+++ vfs/nullfs/null_vnops.c	7 Jan 2006 21:17:39 -0000
@@ -125,87 +125,203 @@
 static int
 null_nresolve(struct vop_nresolve_args *ap)
 {
-	ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+	struct namecache *ncp, *lowerncp;
 
-	return vop_nresolve_ap(ap);
+	KKASSERT(ap->a_ncp->nc_exlocks > 0);
+	KKASSERT(ap->a_ncp->nc_shadowed->nc_exlocks > 0);
+
+	ncp = ap->a_ncp;
+	lowerncp = ncp->nc_shadowed;
+	ncp->nc_error = cache_resolve(lowerncp, ap->a_cred);
+	if (ncp->nc_error == 0) {
+		cache_setvp(ncp, lowerncp->nc_vp);
+	} else if (ncp->nc_error == ENOENT &&
+	    (lowerncp->nc_flag & NCF_WHITEOUT) != 0) {
+		cache_setvp(ncp, NULL);
+		ncp->nc_flag |= NCF_WHITEOUT;
+	}
+
+	KKASSERT(ap->a_ncp->nc_exlocks > 0);
+	KKASSERT(ap->a_ncp->nc_shadowed->nc_exlocks > 0);
+
+	return(ncp->nc_error);
 }
 
 static int
 null_ncreate(struct vop_ncreate_args *ap)
 {
-	ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+	int error;
+	struct namecache *ncp, *lowerncp;
 
-	return vop_ncreate_ap(ap);
+	ncp = ap->a_ncp;
+	ap->a_ncp = lowerncp = ncp->nc_shadowed;
+	ap->a_head.a_ops = ap->a_ncp->nc_mount->mnt_vn_norm_ops;
+
+	error = vop_ncreate_ap(ap);
+	if (error == 0) {
+		cache_setunresolved(ncp);
+		cache_setvp(ncp, lowerncp->nc_vp);
+	}
+	ap->a_ncp = ncp;
+	return(error);
 }
 
 static int
 null_nmkdir(struct vop_nmkdir_args *ap)
 {
-	ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+	int error;
+	struct namecache *ncp, *lowerncp;
 
-	return vop_nmkdir_ap(ap);
+	ncp = ap->a_ncp;
+	ap->a_ncp = lowerncp = ncp->nc_shadowed;
+	ap->a_head.a_ops = ap->a_ncp->nc_mount->mnt_vn_norm_ops;
+
+	error = vop_nmkdir_ap(ap);
+	if (error == 0) {
+		cache_setunresolved(ncp);
+		cache_setvp(ncp, lowerncp->nc_vp);
+	}
+	ap->a_ncp = ncp;
+	return(error);
 }
 
 static int
 null_nmknod(struct vop_nmknod_args *ap)
 {
-	ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+	int error;
+	struct namecache *ncp, *lowerncp;
 
-	return vop_nmknod_ap(ap);
+	ncp = ap->a_ncp;
+	ap->a_ncp = lowerncp = ncp->nc_shadowed;
+	ap->a_head.a_ops = ap->a_ncp->nc_mount->mnt_vn_norm_ops;
+
+	error = vop_nmknod_ap(ap);
+	if (error == 0) {
+		cache_setunresolved(ncp);
+		cache_setvp(ncp, lowerncp->nc_vp);
+	}
+	ap->a_ncp = ncp;
+	return(error);
 }
 
 static int
 null_nlink(struct vop_nlink_args *ap)
 {
-	ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+	int error;
+	struct namecache *ncp, *lowerncp;
 
-	return vop_nlink_ap(ap);
+	ncp = ap->a_ncp;
+	ap->a_ncp = lowerncp = ncp->nc_shadowed;
+	ap->a_head.a_ops = ap->a_ncp->nc_mount->mnt_vn_norm_ops;
+
+	error = vop_nlink_ap(ap);
+	if (error == 0) {
+		cache_setunresolved(ncp);
+		cache_setvp(ncp, lowerncp->nc_vp);
+	}
+	ap->a_ncp = ncp;
+	return(error);
 }
 
 static int
 null_nsymlink(struct vop_nsymlink_args *ap)
 {
-	ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+	int error;
+	struct namecache *ncp, *lowerncp;
 
-	return vop_nsymlink_ap(ap);
+	ncp = ap->a_ncp;
+	ap->a_ncp = lowerncp = ncp->nc_shadowed;
+	ap->a_head.a_ops = ap->a_ncp->nc_mount->mnt_vn_norm_ops;
+
+	error = vop_nsymlink_ap(ap);
+	if (error == 0) {
+		cache_setunresolved(ncp);
+		cache_setvp(ncp, lowerncp->nc_vp);
+	}
+	ap->a_ncp = ncp;
+	return(error);
 }
 
 static int
 null_nwhiteout(struct vop_nwhiteout_args *ap)
 {
-	ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+	int error;
+	struct namecache *ncp, *lowerncp;
 
-	return vop_nwhiteout_ap(ap);
+	ncp = ap->a_ncp;
+	ap->a_ncp = lowerncp = ncp->nc_shadowed;
+	ap->a_head.a_ops = ap->a_ncp->nc_mount->mnt_vn_norm_ops;
+
+	error = vop_nwhiteout_ap(ap);
+	if (error == 0 &&
+	    (ap->a_flags == NAMEI_DELETE || ap->a_flags == NAMEI_CREATE)) {
+		cache_setunresolved(ncp);
+	}
+	ap->a_ncp = ncp;
+	return(error);
 }
 
 static int
 null_nremove(struct vop_nremove_args *ap)
 {
-	ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+	int error;
+	struct namecache *ncp, *lowerncp;
 
-	return vop_nremove_ap(ap);
+	ncp = ap->a_ncp;
+	ap->a_ncp = lowerncp = ncp->nc_shadowed;
+	ap->a_head.a_ops = ap->a_ncp->nc_mount->mnt_vn_norm_ops;
+
+	error = vop_nremove_ap(ap);
+	if (error == 0) {
+		cache_setunresolved(ncp);
+		cache_setvp(ncp, lowerncp->nc_vp);
+	}
+	ap->a_ncp = ncp;
+	return(error);
 }
 
 static int
 null_nrmdir(struct vop_nrmdir_args *ap)
 {
-	ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+	int error;
+	struct namecache *ncp, *lowerncp;
 
-	return vop_nrmdir_ap(ap);
+	ncp = ap->a_ncp;
+	ap->a_ncp = lowerncp = ncp->nc_shadowed;
+	ap->a_head.a_ops = ap->a_ncp->nc_mount->mnt_vn_norm_ops;
+
+	error = vop_nrmdir_ap(ap);
+	if (error == 0)
+		cache_inval(ncp, CINV_DESTROY);
+	ap->a_ncp = ncp;
+	return(error);
 }
 
 static int
 null_nrename(struct vop_nrename_args *ap)
 {
-	struct mount *lmp;
-
-	lmp = MOUNTTONULLMOUNT(ap->a_fncp->nc_mount)->nullm_vfs;
-	if (lmp != MOUNTTONULLMOUNT(ap->a_tncp->nc_mount)->nullm_vfs)
-		return (EINVAL);
-
-	ap->a_head.a_ops = lmp->mnt_vn_norm_ops;
-
-	return vop_nrename_ap(ap);
+	int error;
+	struct namecache *fncp;
+	struct namecache *tncp, *lowertncp;
+
+	if (ap->a_fncp->nc_mount != ap->a_tncp->nc_mount)
+		return(EINVAL);
+
+	fncp = ap->a_fncp;
+	ap->a_fncp = fncp->nc_shadowed;
+	tncp = ap->a_tncp;
+	ap->a_tncp = lowertncp = tncp->nc_shadowed;
+
+	ap->a_head.a_ops = ap->a_fncp->nc_mount->mnt_vn_norm_ops;
+
+	error = vop_nrename_ap(ap);
+	if (error == 0) {
+		cache_rename(fncp, tncp);
+		cache_setvp(tncp, lowertncp->nc_vp);
+	}
+	ap->a_fncp = fncp;
+	ap->a_tncp = tncp;
+	return(error);
 }
 
 /*




More information about the Kernel mailing list