a take at cache coherency [patch, rfc]
Csaba Henk
csaba.henk at creo.hu
Mon Jan 23 02:44:40 PST 2006
Hi,
For explanatation/discussion please see related post in kernel at .
Patch from signature till bottom.
Csaba
# HG changeset patch
# User csaba@
# Node ID 6db92777ea99902079b1f31fe330b0a5aac96d88
# Parent 820a1f1d791e95af17294ccd87e5f76dbc1be68c
imported patch cachecoh
diff -r 820a1f1d791e -r 6db92777ea99 sys/emulation/linux/linux_stats.c
--- a/sys/emulation/linux/linux_stats.c Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/emulation/linux/linux_stats.c Mon Jan 23 05:16:56 2006 +0100
@@ -250,8 +250,8 @@ linux_statfs(struct linux_statfs_args *a
if (error == 0)
error = kern_statfs(&nd, &statfs);
if (error == 0) {
- if (nd.nl_ncp->nc_vp != NULL)
- error = vn_get_namelen(nd.nl_ncp->nc_vp, &namelen);
+ if (cache_grphead_l(nd.nl_ncp)->nc_vp != NULL)
+ error = vn_get_namelen(cache_grphead_l(nd.nl_ncp)->nc_vp, &namelen);
else
error = EINVAL;
}
diff -r 820a1f1d791e -r 6db92777ea99 sys/emulation/svr4/svr4_misc.c
--- a/sys/emulation/svr4/svr4_misc.c Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/emulation/svr4/svr4_misc.c Mon Jan 23 05:16:56 2006 +0100
@@ -1395,8 +1395,8 @@ svr4_sys_statvfs(struct svr4_sys_statvfs
if (error == 0)
error = kern_statfs(&nd, &bfs);
if (error == 0) {
- if (nd.nl_ncp->nc_vp != NULL)
- error = vn_get_namelen(nd.nl_ncp->nc_vp, &namelen);
+ if (cache_grphead_l(nd.nl_ncp)->nc_vp != NULL)
+ error = vn_get_namelen(cache_grphead_l(nd.nl_ncp)->nc_vp, &namelen);
else
error = EINVAL;
}
diff -r 820a1f1d791e -r 6db92777ea99 sys/kern/uipc_usrreq.c
--- a/sys/kern/uipc_usrreq.c Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/kern/uipc_usrreq.c Mon Jan 23 05:16:56 2006 +0100
@@ -600,7 +600,7 @@ unp_bind(struct unpcb *unp, struct socka
error = nlookup_init(&nd, buf, UIO_SYSSPACE, NLC_LOCKVP|NLC_CREATE);
if (error == 0)
error = nlookup(&nd);
- if (error == 0 && nd.nl_ncp->nc_vp != NULL)
+ if (error == 0 && cache_grphead_l(nd.nl_ncp)->nc_vp != NULL)
error = EADDRINUSE;
if (error)
goto done;
diff -r 820a1f1d791e -r 6db92777ea99 sys/kern/vfs_cache.c
--- a/sys/kern/vfs_cache.c Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/kern/vfs_cache.c Mon Jan 23 05:16:56 2006 +0100
@@ -198,6 +198,25 @@ SYSCTL_PROC(_vfs_cache, OID_AUTO, nchsta
static void cache_zap(struct namecache *ncp);
+static __inline
+struct namecache *
+_cache_grphead_l(struct namecache *ncp)
+{
+#ifdef INVARIANTS
+ struct namecache *startncp = ncp;
+#endif
+
+ while (ncp->nc_shadowed) {
+ ncp = ncp->nc_shadowed;
+ KKASSERT(startncp != ncp);
+ KKASSERT(ncp->nc_refs > 0);
+ }
+
+ KKASSERT(ncp->nc_exlocks > 0);
+ KKASSERT(ncp->nc_locktd == curthread);
+ return(ncp);
+}
+
/*
* cache_hold() and cache_drop() prevent the premature deletion of a
* namecache entry but do not prevent operations (such as zapping) on
@@ -222,15 +241,60 @@ _cache_drop(struct namecache *ncp)
{
KKASSERT(ncp->nc_refs > 0);
if (ncp->nc_refs == 1 &&
- (ncp->nc_flag & NCF_UNRESOLVED) &&
+ (ncp->nc_flag & NCF_UNRESOLVED || ncp->nc_shadowed) &&
TAILQ_EMPTY(&ncp->nc_list)
) {
- KKASSERT(ncp->nc_exlocks == 0);
cache_lock(ncp);
- cache_zap(ncp);
- } else {
+ KKASSERT(_cache_grphead_l(ncp)->nc_exlocks == 1);
+ if (_cache_grphead_l(ncp)->nc_flag & NCF_UNRESOLVED) {
+ cache_zap(ncp);
+ return;
+ }
+ cache_unlock(ncp);
+ }
+ --ncp->nc_refs;
+}
+
+static __inline
+struct namecache *
+_cache_grphead(struct namecache *ncp)
+{
+ if (ncp->nc_shadowed) {
+ /*
+ * We need a ref to please the locking routine, but
+ * we get rid of that ASAP. Doing that directly saves
+ * us from a lot of headache (and some CPU cycles).
+ */
+ cache_get(ncp);
--ncp->nc_refs;
- }
+ ncp = _cache_grphead_l(ncp);
+ cache_unlock(ncp);
+ }
+
+ return(ncp);
+}
+
+/*
+ * Get the head of the shadow group when it's known to be locked.
+ *
+ * (XXX We need this routine in the API only if recursive locking
+ * is considered bad. The additional costs of the general grphead
+ * routine seem to be negligible, especially for "normal"
+ * (nc_shadowless) namecache entries.)
+ */
+struct namecache *
+cache_grphead_l(struct namecache *ncp)
+{
+ return(_cache_grphead_l(ncp));
+}
+
+/*
+ * Get the head of the shadow group.
+ */
+struct namecache *
+cache_grphead(struct namecache *ncp)
+{
+ return(_cache_grphead(ncp));
}
/*
@@ -324,6 +388,9 @@ cache_drop(struct namecache *ncp)
_cache_drop(ncp);
}
+static void cache_lock_one(struct namecache *ncp);
+static void cache_unlock_one(struct namecache *ncp);
+
/*
* Namespace locking. The caller must already hold a reference to the
* namecache structure in order to lock/unlock it. This function prevents
@@ -346,6 +413,29 @@ cache_drop(struct namecache *ncp)
*/
void
cache_lock(struct namecache *ncp)
+{
+ struct namecache *oncp;
+#ifdef INVARIANTS
+ struct namecache *startncp = ncp;
+#endif
+
+ for (;;) {
+ cache_lock_one(ncp);
+ oncp = ncp;
+ if (! (ncp = ncp->nc_shadowed))
+ break;
+ KKASSERT(ncp != startncp);
+ /*
+ * The individual lock was used just to protect the transition.
+ * Now that we safely know who's next, unlock the entry
+ * and move on.
+ */
+ cache_unlock_one(oncp);
+ }
+}
+
+static void
+cache_lock_one(struct namecache *ncp)
{
thread_t td;
int didwarn;
@@ -398,12 +488,27 @@ cache_lock_nonblock(struct namecache *nc
cache_lock_nonblock(struct namecache *ncp)
{
thread_t td;
-
+ struct namecache *oncp;
+#ifdef INVARIANTS
+ struct namecache *startncp = ncp;
+#endif
+
+ td = curthread;
+
+step_one:
KKASSERT(ncp->nc_refs != 0);
- td = curthread;
if (ncp->nc_exlocks == 0) {
ncp->nc_exlocks = 1;
ncp->nc_locktd = td;
+
+ if (ncp->nc_shadowed) {
+ oncp = ncp;
+ ncp = ncp->nc_shadowed;
+ KKASSERT(startncp != ncp);
+ cache_unlock_one(oncp);
+ goto step_one;
+ }
+
/*
* The vp associated with a locked ncp must be held
* to prevent it from being recycled (which would
@@ -422,6 +527,12 @@ void
void
cache_unlock(struct namecache *ncp)
{
+ cache_unlock_one(_cache_grphead_l(ncp));
+}
+
+static void
+cache_unlock_one(struct namecache *ncp)
+{
thread_t td = curthread;
KKASSERT(ncp->nc_refs > 0);
@@ -452,13 +563,17 @@ int
int
cache_get_nonblock(struct namecache *ncp)
{
+ int error;
+
/* XXX MP */
- if (ncp->nc_exlocks == 0 || ncp->nc_locktd == curthread) {
- _cache_hold(ncp);
- cache_lock(ncp);
- return(0);
- }
- return(EWOULDBLOCK);
+ _cache_hold(ncp);
+ /*
+ * We can't test easily whether locking would block
+ * so we just make a try to get the lock.
+ */
+ if ((error = cache_lock_nonblock(ncp)))
+ _cache_drop(ncp);
+ return(error);
}
void
@@ -469,6 +584,61 @@ cache_put(struct namecache *ncp)
}
/*
+ * Join ncp into the shadow group of sncp.
+ *
+ * Both entries must be locked on entry. Caller also has to hold a dedicated
+ * reference of sncp.
+ *
+ * The routine will fail and return ELOOP if the intended shadowing association
+ * yielded a loop in the shadow chain.
+ *
+ * - On success ncp will be a representative of the joint shadow group, which
+ * then will be locked.
+ * - On failure the namecache entries will exist separately just as they did
+ * before, in the same state.
+ */
+int
+cache_shadow_attach(struct namecache *ncp, struct namecache *sncp)
+{
+ KKASSERT(! ncp->nc_shadowed);
+ KKASSERT(! ncp->nc_vp);
+ KKASSERT(ncp->nc_flag & NCF_UNRESOLVED);
+
+ if (_cache_grphead_l(sncp) == ncp) {
+ /* ncp->nc_error = ELOOP; */
+ return(ELOOP);
+ }
+
+ ncp->nc_shadowed = sncp;
+ cache_unlock_one(ncp);
+ return(0);
+}
+
+/*
+ * Take out namecache entry from its shadow group.
+ *
+ * ncp must really shadow someone, and the shadow group must be locked
+ * upon entry.
+ *
+ * After the routine returns, ncp will be the head of a new (possibly singleton)
+ * shadow group. The routine returns the former successor of ncp in the original
+ * shadow group in a locked+ref'd state.
+ */
+struct namecache *
+cache_shadow_detach(struct namecache *ncp)
+{
+ struct namecache *sncp = ncp->nc_shadowed;
+
+ KKASSERT(sncp);
+
+ cache_lock_one(ncp);
+ ncp->nc_shadowed = NULL;
+ cache_setunresolved(ncp);
+
+ return(sncp);
+}
+
+/*
* Resolve an unresolved ncp by associating a vnode with it. If the
* vnode is NULL, a negative cache entry is created.
*
@@ -477,6 +647,8 @@ void
void
cache_setvp(struct namecache *ncp, struct vnode *vp)
{
+ ncp = _cache_grphead_l(ncp);
+
KKASSERT(ncp->nc_flag & NCF_UNRESOLVED);
ncp->nc_vp = vp;
if (vp != NULL) {
@@ -517,6 +689,8 @@ void
void
cache_settimeout(struct namecache *ncp, int nticks)
{
+ ncp = _cache_grphead_l(ncp);
+
if ((ncp->nc_timeout = ticks + nticks) == 0)
ncp->nc_timeout = 1;
}
@@ -542,6 +716,8 @@ cache_setunresolved(struct namecache *nc
cache_setunresolved(struct namecache *ncp)
{
struct vnode *vp;
+
+ ncp = _cache_grphead_l(ncp);
if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) {
ncp->nc_flag |= NCF_UNRESOLVED;
@@ -619,11 +795,11 @@ cache_inval(struct namecache *ncp, int f
struct namecache *nextkid;
int rcnt = 0;
- KKASSERT(ncp->nc_exlocks);
+ KKASSERT(_cache_grphead_l(ncp)->nc_exlocks);
cache_setunresolved(ncp);
if (flags & CINV_DESTROY)
- ncp->nc_flag |= NCF_DESTROYED;
+ _cache_grphead_l(ncp)->nc_flag |= NCF_DESTROYED;
if ((flags & CINV_CHILDREN) &&
(kid = TAILQ_FIRST(&ncp->nc_list)) != NULL
@@ -634,7 +810,8 @@ cache_inval(struct namecache *ncp, int f
if ((nextkid = TAILQ_NEXT(kid, nc_entry)) != NULL)
cache_hold(nextkid);
if ((kid->nc_flag & NCF_UNRESOLVED) == 0 ||
- TAILQ_FIRST(&kid->nc_list)
+ TAILQ_FIRST(&kid->nc_list) ||
+ kid->nc_shadowed
) {
cache_lock(kid);
rcnt += cache_inval(kid, flags & ~CINV_DESTROY);
@@ -650,7 +827,7 @@ cache_inval(struct namecache *ncp, int f
* Someone could have gotten in there while ncp was unlocked,
* retry if so.
*/
- if ((ncp->nc_flag & NCF_UNRESOLVED) == 0)
+ if ((_cache_grphead_l(ncp)->nc_flag & NCF_UNRESOLVED) == 0)
++rcnt;
return (rcnt);
}
@@ -774,6 +951,8 @@ cache_vget(struct namecache *ncp, struct
struct vnode *vp;
int error;
+ ncp = _cache_grphead(ncp);
+
again:
vp = NULL;
if (ncp->nc_flag & NCF_UNRESOLVED) {
@@ -805,6 +984,8 @@ cache_vref(struct namecache *ncp, struct
{
struct vnode *vp;
int error;
+
+ ncp = _cache_grphead(ncp);
again:
vp = NULL;
@@ -1121,9 +1302,10 @@ again:
goto again;
}
if (rncp) {
+ struct namecache *srncp = _cache_grphead_l(rncp);
vrele(pvp);
- if (rncp->nc_flag & NCF_UNRESOLVED) {
- cache_setvp(rncp, dvp);
+ if (srncp->nc_flag & NCF_UNRESOLVED) {
+ cache_setvp(srncp, dvp);
if (ncvp_debug >= 2) {
printf("cache_inefficient_scan: setvp %s/%s = %p\n",
ncp->nc_name, rncp->nc_name, dvp);
@@ -1132,11 +1314,11 @@ again:
if (ncvp_debug >= 2) {
printf("cache_inefficient_scan: setvp %s/%s already set %p/%p\n",
ncp->nc_name, rncp->nc_name, dvp,
- rncp->nc_vp);
+ srncp->nc_vp);
}
}
- if (rncp->nc_vp == NULL)
- error = rncp->nc_error;
+ if (srncp->nc_vp == NULL)
+ error = srncp->nc_error;
cache_put(rncp);
} else {
printf("cache_inefficient_scan: dvp %p NOT FOUND in %s\n",
@@ -1179,7 +1361,7 @@ cache_zap(struct namecache *ncp)
* We only scrap unref'd (other then our ref) unresolved entries,
* we do not scrap 'live' entries.
*/
- while (ncp->nc_flag & NCF_UNRESOLVED) {
+ while (_cache_grphead_l(ncp)->nc_flag & NCF_UNRESOLVED) {
/*
* Someone other then us has a ref, stop.
*/
@@ -1206,6 +1388,9 @@ cache_zap(struct namecache *ncp)
if (par->nc_vp && TAILQ_EMPTY(&par->nc_list))
vdrop(par->nc_vp);
}
+
+ if (ncp->nc_shadowed)
+ cache_put(cache_shadow_detach(ncp));
/*
* ncp should not have picked up any refs. Physically
@@ -1303,6 +1488,7 @@ cache_nlookup(struct namecache *par, str
cache_nlookup(struct namecache *par, struct nlcomponent *nlc)
{
struct namecache *ncp;
+ struct namecache *sncp;
struct namecache *new_ncp;
struct nchashhead *nchpp;
u_int32_t hash;
@@ -1319,15 +1505,16 @@ cache_nlookup(struct namecache *par, str
new_ncp = NULL;
restart:
LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
+ sncp = _cache_grphead(ncp);
numchecks++;
/*
* Zap entries that have timed out.
*/
- if (ncp->nc_timeout &&
- (int)(ncp->nc_timeout - ticks) < 0 &&
- (ncp->nc_flag & NCF_UNRESOLVED) == 0 &&
- ncp->nc_exlocks == 0
+ if (sncp->nc_timeout &&
+ (int)(sncp->nc_timeout - ticks) < 0 &&
+ (sncp->nc_flag & NCF_UNRESOLVED) == 0 &&
+ sncp->nc_exlocks == 0
) {
cache_zap(cache_get(ncp));
goto restart;
@@ -1341,7 +1528,7 @@ restart:
if (ncp->nc_parent == par &&
ncp->nc_nlen == nlc->nlc_namelen &&
bcmp(ncp->nc_name, nlc->nlc_nameptr, ncp->nc_nlen) == 0 &&
- (ncp->nc_flag & NCF_DESTROYED) == 0
+ (sncp->nc_flag & NCF_DESTROYED) == 0
) {
if (cache_get_nonblock(ncp) == 0) {
if (new_ncp)
@@ -1414,15 +1601,15 @@ int
int
cache_resolve(struct namecache *ncp, struct ucred *cred)
{
- struct namecache *par;
+ struct namecache *par, *sncp;
int error;
restart:
/*
* If the ncp is already resolved we have nothing to do.
*/
- if ((ncp->nc_flag & NCF_UNRESOLVED) == 0)
- return (ncp->nc_error);
+ if ((_cache_grphead_l(ncp)->nc_flag & NCF_UNRESOLVED) == 0)
+ return (_cache_grphead_l(ncp)->nc_error);
/*
* Mount points need special handling because the parent does not
@@ -1461,17 +1648,20 @@ restart:
* not occur all that often, or if it does not have to go back too
* many nodes to resolve the ncp.
*/
- while (ncp->nc_parent->nc_vp == NULL) {
+ while (_cache_grphead(ncp->nc_parent)->nc_vp == NULL) {
+ struct namecache *spar;
+
/*
* This case can occur if a process is CD'd into a
* directory which is then rmdir'd. If the parent is marked
* destroyed there is no point trying to resolve it.
*/
- if (ncp->nc_parent->nc_flag & NCF_DESTROYED)
+ if (_cache_grphead(ncp->nc_parent)->nc_flag & NCF_DESTROYED)
return(ENOENT);
par = ncp->nc_parent;
- while (par->nc_parent && par->nc_parent->nc_vp == NULL)
+ while (par->nc_parent &&
+ _cache_grphead(par->nc_parent)->nc_vp == NULL)
par = par->nc_parent;
if (par->nc_parent == NULL) {
printf("EXDEV case 2 %*.*s\n",
@@ -1488,20 +1678,23 @@ restart:
* will handle any moves.
*/
cache_get(par);
+ spar = _cache_grphead_l(par);
if (par->nc_flag & NCF_MOUNTPT) {
cache_resolve_mp(par);
- } else if (par->nc_parent->nc_vp == NULL) {
+ } else if (_cache_grphead(par->nc_parent)->nc_vp == NULL) {
printf("[diagnostic] cache_resolve: raced on %*.*s\n", par->nc_nlen, par->nc_nlen, par->nc_name);
cache_put(par);
continue;
- } else if (par->nc_flag & NCF_UNRESOLVED) {
- par->nc_error = VOP_NRESOLVE(par, cred);
- }
- if ((error = par->nc_error) != 0) {
- if (par->nc_error != EAGAIN) {
+ } else if (spar->nc_flag & NCF_UNRESOLVED) {
+ error = VOP_NRESOLVE(par, cred);
+ spar = _cache_grphead_l(par);
+ spar->nc_error = error;
+ }
+ if ((error = spar->nc_error) != 0) {
+ if (spar->nc_error != EAGAIN) {
printf("EXDEV case 3 %*.*s error %d\n",
par->nc_nlen, par->nc_nlen, par->nc_name,
- par->nc_error);
+ spar->nc_error);
cache_put(par);
return(error);
}
@@ -1521,14 +1714,16 @@ restart:
* ncp must already be resolved.
*/
KKASSERT((ncp->nc_flag & NCF_MOUNTPT) == 0);
- ncp->nc_error = VOP_NRESOLVE(ncp, cred);
- /*vop_nresolve(*ncp->nc_parent->nc_vp->v_ops, ncp, cred);*/
- if (ncp->nc_error == EAGAIN) {
+ error = VOP_NRESOLVE(ncp, cred);
+ sncp = _cache_grphead_l(ncp);
+ sncp->nc_error = error;
+ /*vop_nresolve(*_cache_grphead_unlokced(ncp->nc_parent)->nc_vp->v_ops, ncp, cred);*/
+ if (error == EAGAIN) {
printf("[diagnostic] cache_resolve: EAGAIN ncp %p %*.*s\n",
ncp, ncp->nc_nlen, ncp->nc_nlen, ncp->nc_name);
goto restart;
}
- return(ncp->nc_error);
+ return(error);
}
/*
@@ -1549,6 +1744,8 @@ cache_resolve_mp(struct namecache *ncp)
struct vnode *vp;
struct mount *mp = ncp->nc_mount;
int error;
+
+ ncp = _cache_grphead_l(ncp);
KKASSERT(mp != NULL);
if (ncp->nc_flag & NCF_UNRESOLVED) {
diff -r 820a1f1d791e -r 6db92777ea99 sys/kern/vfs_default.c
--- a/sys/kern/vfs_default.c Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/kern/vfs_default.c Mon Jan 23 05:16:56 2006 +0100
@@ -203,7 +203,7 @@ vop_compat_nresolve(struct vop_nresolve_
return(EPERM);
if (ncp->nc_parent == NULL)
return(EPERM);
- if ((dvp = ncp->nc_parent->nc_vp) == NULL)
+ if ((dvp = cache_grphead(ncp->nc_parent)->nc_vp) == NULL)
return(EPERM);
/*
@@ -234,7 +234,7 @@ vop_compat_nresolve(struct vop_nresolve_
VOP_UNLOCK(vp, 0, curthread);
if ((cnp.cn_flags & CNP_PDIRUNLOCK) == 0)
VOP_UNLOCK(dvp, 0, curthread);
- if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) {
+ if ((cache_grphead_l(ncp)->nc_flag & NCF_UNRESOLVED) == 0) {
/* was resolved by another process while we were unlocked */
if (error == 0)
vrele(vp);
@@ -245,7 +245,7 @@ vop_compat_nresolve(struct vop_nresolve_
} else if (error == ENOENT) {
KKASSERT(vp == NULL);
if (cnp.cn_flags & CNP_ISWHITEOUT)
- ncp->nc_flag |= NCF_WHITEOUT;
+ cache_grphead_l(ncp)->nc_flag |= NCF_WHITEOUT;
cache_setvp(ncp, NULL);
}
vrele(dvp);
@@ -338,7 +338,7 @@ vop_compat_ncreate(struct vop_ncreate_ar
return(EPERM);
if (ncp->nc_parent == NULL)
return(EPERM);
- if ((dvp = ncp->nc_parent->nc_vp) == NULL)
+ if ((dvp = cache_grphead(ncp->nc_parent)->nc_vp) == NULL)
return(EPERM);
if ((error = vget(dvp, LK_EXCLUSIVE, td)) != 0) {
@@ -421,7 +421,7 @@ vop_compat_nmkdir(struct vop_nmkdir_args
return(EPERM);
if (ncp->nc_parent == NULL)
return(EPERM);
- if ((dvp = ncp->nc_parent->nc_vp) == NULL)
+ if ((dvp = cache_grphead(ncp->nc_parent)->nc_vp) == NULL)
return(EPERM);
if ((error = vget(dvp, LK_EXCLUSIVE, td)) != 0) {
@@ -504,7 +504,7 @@ vop_compat_nmknod(struct vop_nmknod_args
return(EPERM);
if (ncp->nc_parent == NULL)
return(EPERM);
- if ((dvp = ncp->nc_parent->nc_vp) == NULL)
+ if ((dvp = cache_grphead(ncp->nc_parent)->nc_vp) == NULL)
return(EPERM);
if ((error = vget(dvp, LK_EXCLUSIVE, td)) != 0) {
@@ -586,7 +586,7 @@ vop_compat_nlink(struct vop_nlink_args *
return(EPERM);
if (ncp->nc_parent == NULL)
return(EPERM);
- if ((dvp = ncp->nc_parent->nc_vp) == NULL)
+ if ((dvp = cache_grphead(ncp->nc_parent)->nc_vp) == NULL)
return(EPERM);
if ((error = vget(dvp, LK_EXCLUSIVE, td)) != 0) {
@@ -660,7 +660,7 @@ vop_compat_nsymlink(struct vop_nsymlink_
return(EPERM);
if (ncp->nc_parent == NULL)
return(EPERM);
- if ((dvp = ncp->nc_parent->nc_vp) == NULL)
+ if ((dvp = cache_grphead(ncp->nc_parent)->nc_vp) == NULL)
return(EPERM);
if ((error = vget(dvp, LK_EXCLUSIVE, td)) != 0) {
@@ -746,7 +746,7 @@ vop_compat_nwhiteout(struct vop_nwhiteou
return(EPERM);
if (ncp->nc_parent == NULL)
return(EPERM);
- if ((dvp = ncp->nc_parent->nc_vp) == NULL)
+ if ((dvp = cache_grphead(ncp->nc_parent)->nc_vp) == NULL)
return(EPERM);
if ((error = vget(dvp, LK_EXCLUSIVE, td)) != 0) {
@@ -836,7 +836,7 @@ vop_compat_nremove(struct vop_nremove_ar
return(EPERM);
if (ncp->nc_parent == NULL)
return(EPERM);
- if ((dvp = ncp->nc_parent->nc_vp) == NULL)
+ if ((dvp = cache_grphead(ncp->nc_parent)->nc_vp) == NULL)
return(EPERM);
if ((error = vget(dvp, LK_EXCLUSIVE, td)) != 0) {
@@ -912,7 +912,7 @@ vop_compat_nrmdir(struct vop_nrmdir_args
return(EPERM);
if (ncp->nc_parent == NULL)
return(EPERM);
- if ((dvp = ncp->nc_parent->nc_vp) == NULL)
+ if ((dvp = cache_grphead(ncp->nc_parent)->nc_vp) == NULL)
return(EPERM);
if ((error = vget(dvp, LK_EXCLUSIVE, td)) != 0) {
@@ -1005,7 +1005,7 @@ vop_compat_nrename(struct vop_nrename_ar
return(EPERM);
if (fncp->nc_parent == NULL)
return(EPERM);
- if ((fdvp = fncp->nc_parent->nc_vp) == NULL)
+ if ((fdvp = cache_grphead(fncp->nc_parent)->nc_vp) == NULL)
return(EPERM);
/*
@@ -1064,7 +1064,7 @@ vop_compat_nrename(struct vop_nrename_ar
error = EPERM;
if (tncp->nc_parent == NULL)
error = EPERM;
- if ((tdvp = tncp->nc_parent->nc_vp) == NULL)
+ if ((tdvp = cache_grphead(tncp->nc_parent)->nc_vp) == NULL)
error = EPERM;
if (error) {
vrele(fdvp);
diff -r 820a1f1d791e -r 6db92777ea99 sys/kern/vfs_journal.c
--- a/sys/kern/vfs_journal.c Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/kern/vfs_journal.c Mon Jan 23 05:16:56 2006 +0100
@@ -1824,7 +1824,7 @@ jrecord_write_vnode_ref(struct jrecord *
struct namecache *ncp;
TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) {
- if ((ncp->nc_flag & (NCF_UNRESOLVED|NCF_DESTROYED)) == 0)
+ if ((cache_grphead(ncp)->nc_flag & (NCF_UNRESOLVED|NCF_DESTROYED)) == 0)
break;
}
if (ncp)
@@ -1840,7 +1840,7 @@ jrecord_write_vnode_link(struct jrecord
TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) {
if (ncp == notncp)
continue;
- if ((ncp->nc_flag & (NCF_UNRESOLVED|NCF_DESTROYED)) == 0)
+ if ((cache_grphead(ncp)->nc_flag & (NCF_UNRESOLVED|NCF_DESTROYED)) == 0)
break;
}
if (ncp)
@@ -2533,7 +2533,7 @@ journal_nremove(struct vop_nremove_args
mp = ap->a_head.a_ops->vv_mount;
if (jreclist_init(mp, &jreclist, &jreccache, JTYPE_REMOVE) &&
- ap->a_ncp->nc_vp
+ cache_grphead(ap->a_ncp)->nc_vp
) {
jreclist_undo_file(&jreclist, ap->a_ncp->nc_vp,
JRUNDO_ALL|JRUNDO_GETVP|JRUNDO_CONDLINK, 0, -1);
@@ -2599,7 +2599,7 @@ journal_nrmdir(struct vop_nrmdir_args *a
mp = ap->a_head.a_ops->vv_mount;
if (jreclist_init(mp, &jreclist, &jreccache, JTYPE_RMDIR)) {
- jreclist_undo_file(&jreclist, ap->a_ncp->nc_vp,
+ jreclist_undo_file(&jreclist, cache_grphead(ap->a_ncp)->nc_vp,
JRUNDO_VATTR|JRUNDO_GETVP, 0, 0);
}
error = vop_journal_operate_ap(&ap->a_head);
@@ -2628,9 +2628,9 @@ journal_nrename(struct vop_nrename_args
mp = ap->a_head.a_ops->vv_mount;
if (jreclist_init(mp, &jreclist, &jreccache, JTYPE_RENAME) &&
- ap->a_tncp->nc_vp
+ cache_grphead(ap->a_tncp)->nc_vp
) {
- jreclist_undo_file(&jreclist, ap->a_tncp->nc_vp,
+ jreclist_undo_file(&jreclist, cache_grphead(ap->a_tncp)->nc_vp,
JRUNDO_ALL|JRUNDO_GETVP|JRUNDO_CONDLINK, 0, -1);
}
error = vop_journal_operate_ap(&ap->a_head);
diff -r 820a1f1d791e -r 6db92777ea99 sys/kern/vfs_nlookup.c
--- a/sys/kern/vfs_nlookup.c Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/kern/vfs_nlookup.c Mon Jan 23 05:16:56 2006 +0100
@@ -381,13 +381,15 @@ nlookup(struct nlookupdata *nd)
ncp = cache_get(ncp);
} else {
while ((ncp->nc_flag & NCF_MOUNTPT) && ncp != nd->nl_rootncp) {
- if (ncp->nc_parent->nc_flag & NCF_DESTROYED)
+ if (cache_grphead(ncp->nc_parent)->nc_flag &
+ NCF_DESTROYED)
break;
ncp = ncp->nc_parent; /* get to underlying node */
KKASSERT(ncp != NULL && 1);
}
if (ncp != nd->nl_rootncp) {
- if (ncp->nc_parent->nc_flag & NCF_DESTROYED) {
+ if (cache_grphead(ncp->nc_parent)->nc_flag &
+ NCF_DESTROYED) {
error = EINVAL;
break;
}
@@ -421,11 +423,11 @@ nlookup(struct nlookupdata *nd)
* XXX neither '.' nor '..' should return EAGAIN since they were
* previously resolved and thus cannot be newly created ncp's.
*/
- if (ncp->nc_flag & NCF_UNRESOLVED) {
+ if (cache_grphead_l(ncp)->nc_flag & NCF_UNRESOLVED) {
error = cache_resolve(ncp, nd->nl_cred);
KKASSERT(error != EAGAIN);
} else {
- error = ncp->nc_error;
+ error = cache_grphead_l(ncp)->nc_error;
}
/*
@@ -459,7 +461,7 @@ nlookup(struct nlookupdata *nd)
* element or it is the last element and we are allowed to
* follow symlinks, resolve the symlink.
*/
- if ((ncp->nc_flag & NCF_ISSYMLINK) &&
+ if ((cache_grphead_l(ncp)->nc_flag & NCF_ISSYMLINK) &&
(*ptr || (nd->nl_flags & NLC_FOLLOW))
) {
if (nd->nl_loopcnt++ >= MAXSYMLINKS) {
@@ -509,24 +511,31 @@ nlookup(struct nlookupdata *nd)
*
* XXX NOCROSSMOUNT
*/
- while ((ncp->nc_flag & NCF_ISDIR) && ncp->nc_vp->v_mountedhere &&
+ while ((cache_grphead_l(ncp)->nc_flag & NCF_ISDIR) &&
+ cache_grphead_l(ncp)->nc_vp->v_mountedhere &&
(nd->nl_flags & NLC_NOCROSSMOUNT) == 0
) {
struct mount *mp;
struct vnode *tdp;
- mp = ncp->nc_vp->v_mountedhere;
+ mp = cache_grphead_l(ncp)->nc_vp->v_mountedhere;
cache_put(ncp);
ncp = cache_get(mp->mnt_ncp);
- if (ncp->nc_flag & NCF_UNRESOLVED) {
+ if (cache_grphead_l(ncp)->nc_flag & NCF_UNRESOLVED) {
while (vfs_busy(mp, 0, nd->nl_td))
;
error = VFS_ROOT(mp, &tdp);
vfs_unbusy(mp, nd->nl_td);
if (error)
break;
- cache_setvp(ncp, tdp);
+ /*
+ * The VFS_ROOT call might have the side effect of
+ * resolving the ncp. Or is that declared a sin
+ * anywhere?
+ */
+ if (cache_grphead_l(ncp)->nc_flag & NCF_UNRESOLVED)
+ cache_setvp(ncp, tdp);
vput(tdp);
}
}
@@ -543,7 +552,7 @@ nlookup(struct nlookupdata *nd)
* to the failure case below.
*/
while (*ptr == '/') {
- if ((ncp->nc_flag & NCF_ISDIR) == 0 &&
+ if ((cache_grphead_l(ncp)->nc_flag & NCF_ISDIR) == 0 &&
!(nd->nl_flags & NLC_WILLBEDIR)
) {
break;
@@ -555,7 +564,7 @@ nlookup(struct nlookupdata *nd)
* Continuation case: additional elements and the current
* element is a directory.
*/
- if (*ptr && (ncp->nc_flag & NCF_ISDIR)) {
+ if (*ptr && (cache_grphead_l(ncp)->nc_flag & NCF_ISDIR)) {
cache_drop(nd->nl_ncp);
cache_unlock(ncp);
nd->nl_ncp = ncp;
@@ -619,7 +628,7 @@ nlookup_mp(struct mount *mp, struct name
error = 0;
ncp = mp->mnt_ncp;
cache_get(ncp);
- if (ncp->nc_flag & NCF_UNRESOLVED) {
+ if (cache_grphead_l(ncp)->nc_flag & NCF_UNRESOLVED) {
while (vfs_busy(mp, 0, curthread))
;
error = VFS_ROOT(mp, &vp);
@@ -655,7 +664,7 @@ nreadsymlink(struct nlookupdata *nd, str
nlc->nlc_nameptr = NULL;
nlc->nlc_namelen = 0;
- if (ncp->nc_vp == NULL)
+ if (cache_grphead_l(ncp)->nc_vp == NULL)
return(ENOENT);
if ((error = cache_vget(ncp, nd->nl_cred, LK_SHARED, &vp)) != 0)
return(error);
@@ -713,13 +722,14 @@ int
int
naccess(struct namecache *ncp, int vmode, struct ucred *cred)
{
- struct namecache *par;
+ struct namecache *par, *oncp = ncp;
struct vnode *vp;
struct vattr va;
int error;
- if (ncp->nc_flag & NCF_UNRESOLVED) {
+ if (ncp->nc_flag & NCF_UNRESOLVED || ncp->nc_shadowed) {
cache_lock(ncp);
+ ncp = cache_grphead_l(ncp);
cache_resolve(ncp, cred);
cache_unlock(ncp);
}
@@ -728,7 +738,7 @@ naccess(struct namecache *ncp, int vmode
if (((vmode & VCREATE) && ncp->nc_vp == NULL) ||
((vmode & VDELETE) && ncp->nc_vp != NULL)
) {
- if ((par = ncp->nc_parent) == NULL) {
+ if ((par = oncp->nc_parent) == NULL) {
if (error != EAGAIN)
error = EINVAL;
} else {
diff -r 820a1f1d791e -r 6db92777ea99 sys/kern/vfs_syscalls.c
--- a/sys/kern/vfs_syscalls.c Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/kern/vfs_syscalls.c Mon Jan 23 05:16:56 2006 +0100
@@ -140,7 +140,7 @@ mount(struct mount_args *uap)
error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
if (error == 0) {
if ((error = nlookup(&nd)) == 0) {
- if (nd.nl_ncp->nc_vp == NULL)
+ if (cache_grphead_l(nd.nl_ncp)->nc_vp == NULL)
error = ENOENT;
}
}
@@ -159,7 +159,7 @@ mount(struct mount_args *uap)
/*
* now we have the locked ref'd ncp and unreferenced vnode.
*/
- vp = ncp->nc_vp;
+ vp = cache_grphead_l(ncp)->nc_vp;
if ((error = vget(vp, LK_EXCLUSIVE, td)) != 0) {
cache_put(ncp);
return (error);
@@ -1050,9 +1050,9 @@ fchdir(struct fchdir_args *uap)
while (!error && (mp = vp->v_mountedhere) != NULL) {
error = nlookup_mp(mp, &nct);
if (error == 0) {
+ vput(vp);
+ vp = cache_grphead_l(nct)->nc_vp;
cache_unlock(nct); /* leave ref intact */
- vput(vp);
- vp = nct->nc_vp;
error = vget(vp, LK_SHARED, td);
KKASSERT(error == 0);
cache_drop(ncp);
@@ -1086,7 +1086,7 @@ kern_chdir(struct nlookupdata *nd)
if ((error = nlookup(nd)) != 0)
return (error);
- if ((vp = nd->nl_ncp->nc_vp) == NULL)
+ if ((vp = cache_grphead_l(nd->nl_ncp)->nc_vp) == NULL)
return (ENOENT);
if ((error = vget(vp, LK_SHARED, td)) != 0)
return (error);
@@ -1192,7 +1192,7 @@ kern_chroot(struct namecache *ncp)
if ((error = chroot_refuse_vdir_fds(fdp)) != 0)
return (error);
}
- if ((vp = ncp->nc_vp) == NULL)
+ if ((vp = cache_grphead_l(ncp)->nc_vp) == NULL)
return (ENOENT);
if ((error = vget(vp, LK_SHARED, td)) != 0)
@@ -1464,7 +1464,7 @@ kern_mknod(struct nlookupdata *nd, int m
if ((error = nlookup(nd)) != 0)
return (error);
ncp = nd->nl_ncp;
- if (ncp->nc_vp)
+ if (cache_grphead_l(ncp)->nc_vp)
return (EEXIST);
VATTR_NULL(&vattr);
@@ -1536,7 +1536,7 @@ kern_mkfifo(struct nlookupdata *nd, int
if ((error = nlookup(nd)) != 0)
return (error);
ncp = nd->nl_ncp;
- if (ncp->nc_vp)
+ if (cache_grphead_l(ncp)->nc_vp)
return (EEXIST);
VATTR_NULL(&vattr);
@@ -1633,7 +1633,7 @@ kern_link(struct nlookupdata *nd, struct
bwillwrite();
if ((error = nlookup(nd)) != 0)
return (error);
- vp = nd->nl_ncp->nc_vp;
+ vp = cache_grphead_l(nd->nl_ncp)->nc_vp;
KKASSERT(vp != NULL);
if (vp->v_type == VDIR)
return (EPERM); /* POSIX */
@@ -1654,7 +1654,7 @@ kern_link(struct nlookupdata *nd, struct
vput(vp);
return (error);
}
- if (linknd->nl_ncp->nc_vp) {
+ if (cache_grphead_l(linknd->nl_ncp)->nc_vp) {
vput(vp);
return (EEXIST);
}
@@ -1704,7 +1704,7 @@ kern_symlink(struct nlookupdata *nd, cha
if ((error = nlookup(nd)) != 0)
return (error);
ncp = nd->nl_ncp;
- if (ncp->nc_vp)
+ if (cache_grphead_l(ncp)->nc_vp)
return (EEXIST);
VATTR_NULL(&vattr);
@@ -1922,7 +1922,7 @@ kern_stat(struct nlookupdata *nd, struct
if ((error = nlookup(nd)) != 0)
return (error);
again:
- if ((vp = nd->nl_ncp->nc_vp) == NULL)
+ if ((vp = cache_grphead_l(nd->nl_ncp)->nc_vp) == NULL)
return (ENOENT);
td = curthread;
@@ -2718,13 +2718,17 @@ kern_rename(struct nlookupdata *fromnd,
* Due to relocking of the source, fromnd->nl_ncp->nc_vp might have
* become NULL.
*/
- if (tond->nl_ncp->nc_vp) {
- if (fromnd->nl_ncp->nc_vp == NULL) {
+ /*
+ * XXX I was lazy to find out who is locked exactly, so just dumbly
+ * cache_grphead() the parties...
+ */
+ if (cache_grphead(tond->nl_ncp)->nc_vp) {
+ if (cache_grphead(fromnd->nl_ncp)->nc_vp == NULL) {
error = ENOENT;
- } else if (fromnd->nl_ncp->nc_vp->v_type == VDIR) {
- if (tond->nl_ncp->nc_vp->v_type != VDIR)
+ } else if (cache_grphead(fromnd->nl_ncp)->nc_vp->v_type == VDIR) {
+ if (cache_grphead(tond->nl_ncp)->nc_vp->v_type != VDIR)
error = ENOTDIR;
- } else if (tond->nl_ncp->nc_vp->v_type == VDIR) {
+ } else if (cache_grphead(tond->nl_ncp)->nc_vp->v_type == VDIR) {
error = EISDIR;
}
}
@@ -2753,7 +2757,7 @@ kern_rename(struct nlookupdata *fromnd,
* when we detect the situation.
*/
if (error == 0) {
- if (fromnd->nl_ncp->nc_vp == tond->nl_ncp->nc_vp) {
+ if (cache_grphead(fromnd->nl_ncp)->nc_vp == cache_grphead(tond->nl_ncp)->nc_vp) {
error = VOP_NREMOVE(fromnd->nl_ncp, fromnd->nl_cred);
} else {
error = VOP_NRENAME(fromnd->nl_ncp, tond->nl_ncp,
@@ -2802,7 +2806,7 @@ kern_mkdir(struct nlookupdata *nd, int m
return (error);
ncp = nd->nl_ncp;
- if (ncp->nc_vp)
+ if (cache_grphead_l(ncp)->nc_vp)
return (EEXIST);
VATTR_NULL(&vattr);
diff -r 820a1f1d791e -r 6db92777ea99 sys/kern/vfs_vnops.c
--- a/sys/kern/vfs_vnops.c Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/kern/vfs_vnops.c Mon Jan 23 05:16:56 2006 +0100
@@ -169,7 +169,7 @@ vn_open(struct nlookupdata *nd, struct f
*/
again:
if (fmode & O_CREAT) {
- if (ncp->nc_vp == NULL) {
+ if (cache_grphead_l(ncp)->nc_vp == NULL) {
VATTR_NULL(vap);
vap->va_type = VREG;
vap->va_mode = cmode;
diff -r 820a1f1d791e -r 6db92777ea99 sys/sys/namecache.h
--- a/sys/sys/namecache.h Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/sys/namecache.h Mon Jan 23 05:16:56 2006 +0100
@@ -102,6 +102,7 @@ struct namecache {
TAILQ_ENTRY(namecache) nc_vnode; /* scan via vnode->v_namecache */
struct namecache_list nc_list; /* list of children */
struct namecache *nc_parent; /* namecache entry for parent */
+ struct namecache *nc_shadowed; /* lower layer entry in layered fs */
struct vnode *nc_vp; /* vnode representing name or NULL */
int nc_refs; /* ref count prevents deletion */
u_short nc_flag;
@@ -150,6 +151,10 @@ void cache_lock(struct namecache *ncp);
void cache_lock(struct namecache *ncp);
int cache_lock_nonblock(struct namecache *ncp);
void cache_unlock(struct namecache *ncp);
+struct namecache *cache_grphead_l(struct namecache *ncp);
+struct namecache *cache_grphead(struct namecache *ncp);
+int cache_shadow_attach(struct namecache *ncp, struct namecache *sncp);
+struct namecache *cache_shadow_detach(struct namecache *ncp);
void cache_setvp(struct namecache *ncp, struct vnode *vp);
void cache_settimeout(struct namecache *ncp, int nticks);
void cache_setunresolved(struct namecache *ncp);
diff -r 820a1f1d791e -r 6db92777ea99 sys/vfs/nfs/nfs_serv.c
--- a/sys/vfs/nfs/nfs_serv.c Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/vfs/nfs/nfs_serv.c Mon Jan 23 05:16:56 2006 +0100
@@ -2183,8 +2183,8 @@ nfsrv_rename(struct nfsrv_descript *nfsd
}
fromnd.nl_flags |= NLC_NCPISLOCKED;
- tvp = tond.nl_ncp->nc_vp;
- fvp = fromnd.nl_ncp->nc_vp;
+ tvp = cache_grphead_l(tond.nl_ncp)->nc_vp;
+ fvp = cache_grphead_l(fromnd.nl_ncp)->nc_vp;
if (tvp != NULL) {
if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
diff -r 820a1f1d791e -r 6db92777ea99 sys/vfs/nfs/nfs_subs.c
--- a/sys/vfs/nfs/nfs_subs.c Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/vfs/nfs/nfs_subs.c Mon Jan 23 05:16:56 2006 +0100
@@ -1671,7 +1671,7 @@ nfs_namei(struct nlookupdata *nd, struct
error = ENXIO;
}
}
- if (vpp && ncp->nc_vp) {
+ if (vpp && cache_grphead_l(ncp)->nc_vp) {
error = cache_vget(ncp, nd->nl_cred, LK_EXCLUSIVE, vpp);
}
if (error) {
diff -r 820a1f1d791e -r 6db92777ea99 sys/vfs/nfs/nfs_vnops.c
--- a/sys/vfs/nfs/nfs_vnops.c Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/vfs/nfs/nfs_vnops.c Mon Jan 23 05:16:56 2006 +0100
@@ -883,8 +883,8 @@ nfs_nresolve(struct vop_nresolve_args *a
cred = ap->a_cred;
ncp = ap->a_ncp;
- KKASSERT(ncp->nc_parent && ncp->nc_parent->nc_vp);
- dvp = ncp->nc_parent->nc_vp;
+ KKASSERT(ncp->nc_parent && cache_grphead(ncp->nc_parent)->nc_vp);
+ dvp = cache_grphead(ncp->nc_parent)->nc_vp;
if ((error = vget(dvp, LK_SHARED, td)) != 0)
return (error);
diff -r 820a1f1d791e -r 6db92777ea99 sys/vfs/nullfs/null.h
--- a/sys/vfs/nullfs/null.h Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/vfs/nullfs/null.h Mon Jan 23 05:16:56 2006 +0100
@@ -43,18 +43,19 @@ struct null_args {
char *target; /* Target of loopback */
};
-struct null_mount {
- struct mount *nullm_vfs;
- struct vnode *nullm_rootvp; /* Reference to root null_node */
-};
-
#ifdef _KERNEL
-#define MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data))
#ifdef NULLFS_DEBUG
-#define NULLFSDEBUG(format, args...) printf(format ,## args)
+#define NULLFSDEBUG(format, args...) \
+ printf("[nullfs] %s:%d: " format, __func__, __LINE__, ## args)
+#define NULLNCDEBUG(ncp) \
+ NULLFSDEBUG(#ncp " %p: name %s, refs %d, exlocks %d, " \
+ "nc_mount %p, nc_shadowed %p\n", \
+ (ncp), (ncp)->nc_name, (ncp)->nc_refs, (ncp)->nc_exlocks, \
+ (ncp)->nc_mount, (ncp)->nc_shadowed);
#else
#define NULLFSDEBUG(format, args...)
+#define NULLNCDEBUG(ncp)
#endif /* NULLFS_DEBUG */
#endif /* _KERNEL */
diff -r 820a1f1d791e -r 6db92777ea99 sys/vfs/nullfs/null_vfsops.c
--- a/sys/vfs/nullfs/null_vfsops.c Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/vfs/nullfs/null_vfsops.c Mon Jan 23 05:16:56 2006 +0100
@@ -80,12 +80,10 @@ nullfs_mount(struct mount *mp, char *pat
{
int error = 0;
struct null_args args;
- struct vnode *rootvp;
- struct null_mount *xmp;
u_int size;
struct nlookupdata nd;
- NULLFSDEBUG("nullfs_mount(mp = %p)\n", (void *)mp);
+ NULLFSDEBUG("mp %p\n", (void *)mp);
/*
* Update is a no-op
@@ -98,118 +96,118 @@ nullfs_mount(struct mount *mp, char *pat
* Get argument
*/
error = copyin(data, (caddr_t)&args, sizeof(struct null_args));
- if (error)
- return (error);
-
- /*
- * Find lower node
- */
- rootvp = NULL;
- error = nlookup_init(&nd, args.target, UIO_USERSPACE, NLC_FOLLOW);
+
+ /*
+ * Do a lookup just to see if things are not fundamentally broken...
+ * but it's too early to make a proper use of the result.
+ */
+ if (error == 0)
+ error = nlookup_init(&nd, args.target, UIO_USERSPACE,
+ NLC_FOLLOW);
if (error == 0)
error = nlookup(&nd);
- if (error == 0) {
- error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE,
- &rootvp);
- }
-
- xmp = (struct null_mount *) malloc(sizeof(struct null_mount),
- M_NULLFSMNT, M_WAITOK); /* XXX */
-
- /*
- * Save reference to underlying FS
- */
- /*
- * As lite stacking enters the scene, the old way of doing this
- * -- via the vnode -- is not good enough anymore...
- */
- xmp->nullm_vfs = nd.nl_ncp->nc_mount;
+ if (error)
+ return(error);
+
nlookup_done(&nd);
- vfs_add_vnodeops(mp, &mp->mnt_vn_norm_ops,
- null_vnodeop_entries, 0);
-
- VOP_UNLOCK(rootvp, 0, td);
-
- /*
- * Keep a held reference to the root vnode.
- * It is vrele'd in nullfs_unmount.
- */
- xmp->nullm_rootvp = rootvp;
- /*
- * XXX What's the proper safety condition for querying
- * the underlying mount? Is this flag tuning necessary
- * at all?
- */
- if (xmp->nullm_vfs->mnt_flag & MNT_LOCAL)
- mp->mnt_flag |= MNT_LOCAL;
- mp->mnt_data = (qaddr_t) xmp;
- vfs_getnewfsid(mp);
+ vfs_add_vnodeops(mp, &mp->mnt_vn_norm_ops, null_vnodeop_entries, 0);
+
+ /*
+ * Heck it, let it just be local. I bet I need only five minutes to
+ * find out a sound sounding meaning for "local" by which null mounts
+ * are always local.
+ */
+ mp->mnt_flag |= MNT_LOCAL; vfs_getnewfsid(mp);
(void) copyinstr(args.target, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
&size);
bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
- (void)nullfs_statfs(mp, &mp->mnt_stat, td);
- NULLFSDEBUG("nullfs_mount: lower %s, alias at %s\n",
- mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntfromname);
+ NULLFSDEBUG("lower %s, alias at %s\n",
+ mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
return (0);
}
-/*
- * Free reference to null layer
- */
static int
nullfs_unmount(struct mount *mp, int mntflags, struct thread *td)
{
- void *mntdata;
- int flags = 0;
-
- NULLFSDEBUG("nullfs_unmount: mp = %p\n", (void *)mp);
-
- if (mntflags & MNT_FORCE)
- flags |= FORCECLOSE;
-
- /*
- * Finally, throw away the null_mount structure
- */
- mntdata = mp->mnt_data;
- mp->mnt_data = 0;
- free(mntdata, M_NULLFSMNT);
+ NULLFSDEBUG("mp %p\n", (void *)mp);
+
+ cache_lock(mp->mnt_ncp);
+ cache_put(cache_shadow_detach(mp->mnt_ncp));
+ cache_unlock(mp->mnt_ncp);
+
return 0;
}
static int
+nullfs_start(struct mount *mp, int flags, struct thread *td)
+{
+ int error;
+ struct nlookupdata nd;
+
+ NULLFSDEBUG("nlookup %s\n", mp->mnt_stat.f_mntfromname);
+
+ error = nlookup_init(&nd, mp->mnt_stat.f_mntfromname,
+ UIO_SYSSPACE, NLC_FOLLOW);
+ if (error == 0)
+ error = nlookup(&nd);
+ if (error)
+ return(error);
+
+ cache_hold(nd.nl_ncp);
+ cache_lock(mp->mnt_ncp);
+
+ error = mp->mnt_ncp->nc_shadowed ?
+ EINVAL :
+ cache_shadow_attach(mp->mnt_ncp, nd.nl_ncp);
+
+ nlookup_done(&nd);
+
+ NULLNCDEBUG(mp->mnt_ncp);
+#ifdef NULLFS_DEBUG
+ if (mp->mnt_ncp->nc_shadowed)
+ NULLNCDEBUG(mp->mnt_ncp->nc_shadowed);
+#endif
+
+ return (error);
+}
+
+/*
+ * As the mount won't get aborted if VFS_START fails, we have to check in each
+ * VFS call whether it has succeeded...
+ */
+
+static int
nullfs_root(struct mount *mp, struct vnode **vpp)
{
- struct thread *td = curthread; /* XXX */
- struct vnode *vp;
-
- NULLFSDEBUG("nullfs_root(mp = %p, vp = %p)\n", (void *)mp,
- (void *)MOUNTTONULLMOUNT(mp)->nullm_rootvp);
-
- /*
- * Return locked reference to root.
- */
- vp = MOUNTTONULLMOUNT(mp)->nullm_rootvp;
- vref(vp);
-
-#ifdef NULLFS_DEBUG
- if (VOP_ISLOCKED(vp, NULL)) {
- Debugger("root vnode is locked.\n");
- vrele(vp);
- return (EDEADLK);
- }
-#endif
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
- *vpp = vp;
- return 0;
+ int error;
+
+ if (! mp->mnt_ncp || ! mp->mnt_ncp->nc_shadowed)
+ return (ENXIO);
+
+ error = cache_vget(mp->mnt_ncp, crhold(proc0.p_ucred),
+ LK_EXCLUSIVE | LK_RETRY, vpp);
+ crfree(proc0.p_ucred);
+
+ return (error);
+}
+
+static __inline
+struct mount *
+nullfs_lowermount_0(struct mount *mp)
+{
+ return (mp->mnt_ncp->nc_shadowed->nc_mount);
}
static int
nullfs_quotactl(struct mount *mp, int cmd, uid_t uid, caddr_t arg,
struct thread *td)
{
- return VFS_QUOTACTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd, uid, arg, td);
+ if (! mp->mnt_ncp || ! mp->mnt_ncp->nc_shadowed)
+ return (ENXIO);
+
+ return VFS_QUOTACTL(nullfs_lowermount_0(mp), cmd, uid, arg, td);
}
static int
@@ -218,12 +216,15 @@ nullfs_statfs(struct mount *mp, struct s
int error;
struct statfs mstat;
- NULLFSDEBUG("nullfs_statfs(mp = %p, vp = %p)\n", (void *)mp,
- (void *)MOUNTTONULLMOUNT(mp)->nullm_rootvp);
+ if (! mp->mnt_ncp || ! mp->mnt_ncp->nc_shadowed)
+ return (ENXIO);
+
+ NULLFSDEBUG("mp %p, ncp %p, lower mp %p\n",
+ mp, mp->mnt_ncp, nullfs_lowermount_0(mp));
bzero(&mstat, sizeof(mstat));
- error = VFS_STATFS(MOUNTTONULLMOUNT(mp)->nullm_vfs, &mstat, td);
+ error = VFS_STATFS(nullfs_lowermount_0(mp), &mstat, td);
if (error)
return (error);
@@ -248,23 +249,27 @@ nullfs_checkexp(struct mount *mp, struct
nullfs_checkexp(struct mount *mp, struct sockaddr *nam, int *extflagsp,
struct ucred **credanonp)
{
-
- return VFS_CHECKEXP(MOUNTTONULLMOUNT(mp)->nullm_vfs, nam,
- extflagsp, credanonp);
+ if (! mp->mnt_ncp || ! mp->mnt_ncp->nc_shadowed)
+ return (ENXIO);
+
+ return VFS_CHECKEXP(nullfs_lowermount_0(mp), nam, extflagsp, credanonp);
}
static int
nullfs_extattrctl(struct mount *mp, int cmd, const char *attrname, caddr_t arg,
struct thread *td)
{
- return VFS_EXTATTRCTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd, attrname,
- arg, td);
+ if (! mp->mnt_ncp || ! mp->mnt_ncp->nc_shadowed)
+ return (ENXIO);
+
+ return VFS_EXTATTRCTL(nullfs_lowermount_0(mp), cmd, attrname, arg, td);
}
static struct vfsops null_vfsops = {
.vfs_mount = nullfs_mount,
.vfs_unmount = nullfs_unmount,
+ .vfs_start = nullfs_start,
.vfs_root = nullfs_root,
.vfs_quotactl = nullfs_quotactl,
.vfs_statfs = nullfs_statfs,
diff -r 820a1f1d791e -r 6db92777ea99 sys/vfs/nullfs/null_vnops.c
--- a/sys/vfs/nullfs/null_vnops.c Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/vfs/nullfs/null_vnops.c Mon Jan 23 05:16:56 2006 +0100
@@ -109,6 +109,8 @@
#include <sys/namei.h>
#include <sys/malloc.h>
#include <sys/buf.h>
+#include <sys/namecache.h>
+#include <sys/nlookup.h>
#include "null.h"
static int null_nresolve(struct vop_nresolve_args *ap);
@@ -122,18 +124,93 @@ static int null_nrmdir(struct vop_nrmdir
static int null_nrmdir(struct vop_nrmdir_args *ap);
static int null_nrename(struct vop_nrename_args *ap);
+static __inline
+struct mount *
+nullfs_lowermount_l(struct namecache *ncp)
+{
+ /*
+ * The code in use below allows allows passing through lower mounts.
+ * If we didn't want to do that, we could use
+ *
+ * ncp->nc_mount->mnt_ncp->nc_shadowed->nc_mount
+ *
+ * Eventually, the choice might be configurable.
+ *
+ * - - -
+ *
+ * Matt says in
+ * http://leaf.dragonflybsd.org/mailarchive/kernel/2006-01/msg00023.html
+ * :
+
+ The ncp->nc_mount field was never meant to be used by the VFS code...
+ only to be used internally by cache_*(). It looks like I broke my own
+ rule... I have two references in NFS, but that's for later.
+
+ * Note that both approaches still use nc_mount:
+ *
+ * - If we wanna pass through lower mounts, we do have to find the
+ * the lower fs ncp-wise, we simply don't have choice.
+ *
+ * - If we just work with a fixed lower fs, we are able to access
+ * that if we are willing to use nc_mount. Hence it just seems to be
+ * stupid to keep around a direct reference to the lower fs, but
+ * that's of course feasible.
+ */
+ return (ncp->nc_shadowed->nc_mount);
+}
+
static int
null_nresolve(struct vop_nresolve_args *ap)
{
- ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
-
- return vop_nresolve_ap(ap);
+ struct namecache *ncp = ap->a_ncp;
+ struct mount *lmp;
+
+ if (! ncp->nc_shadowed) {
+ struct nlcomponent nlc;
+ struct namecache *sncp;
+
+ nlc.nlc_nameptr = ncp->nc_name;
+ nlc.nlc_namelen = ncp->nc_nlen;
+
+ KKASSERT(ncp->nc_parent->nc_shadowed);
+ sncp = cache_nlookup(ncp->nc_parent->nc_shadowed, &nlc);
+
+ if ((ncp->nc_error = cache_shadow_attach(ncp, sncp))) {
+ cache_put(sncp);
+ cache_setvp(ncp, NULL);
+ return (ncp->nc_error);
+ }
+
+ NULLNCDEBUG(ncp);
+ NULLNCDEBUG(ncp->nc_shadowed);
+ }
+
+ /*
+ * XXX Querying/ensuring usability of lower fs still not got right.
+ * As a quick hack, we do a simple test here, that will do for
+ * avoiding most obvious fallacies.
+ */
+ if ((lmp = nullfs_lowermount_l(ncp)) &&
+ (ap->a_head.a_ops = lmp->mnt_vn_use_ops)) {
+ /*
+ * Moving down in the shadow chain is for avoiding a recursed
+ * loop (ending up in exhausting the kernel stack).
+ *
+ * Otherwise it's the same whether we use ncp or
+ * ncp->nc_shadowed -- we go for group shared ncp attributes.
+ */
+ ap->a_ncp = ncp->nc_shadowed;
+ return vop_nresolve_ap(ap);
+ }
+
+ return (ENXIO);
}
static int
null_ncreate(struct vop_ncreate_args *ap)
{
- ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+ ap->a_head.a_ops = nullfs_lowermount_l(ap->a_ncp)->mnt_vn_use_ops;
+ ap->a_ncp = ap->a_ncp->nc_shadowed;
return vop_ncreate_ap(ap);
}
@@ -141,7 +218,8 @@ static int
static int
null_nmkdir(struct vop_nmkdir_args *ap)
{
- ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+ ap->a_head.a_ops = nullfs_lowermount_l(ap->a_ncp)->mnt_vn_use_ops;
+ ap->a_ncp = ap->a_ncp->nc_shadowed;
return vop_nmkdir_ap(ap);
}
@@ -149,7 +227,8 @@ static int
static int
null_nmknod(struct vop_nmknod_args *ap)
{
- ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+ ap->a_head.a_ops = nullfs_lowermount_l(ap->a_ncp)->mnt_vn_use_ops;
+ ap->a_ncp = ap->a_ncp->nc_shadowed;
return vop_nmknod_ap(ap);
}
@@ -157,7 +236,8 @@ static int
static int
null_nlink(struct vop_nlink_args *ap)
{
- ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+ ap->a_head.a_ops = nullfs_lowermount_l(ap->a_ncp)->mnt_vn_use_ops;
+ ap->a_ncp = ap->a_ncp->nc_shadowed;
return vop_nlink_ap(ap);
}
@@ -165,7 +245,8 @@ static int
static int
null_nsymlink(struct vop_nsymlink_args *ap)
{
- ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+ ap->a_head.a_ops = nullfs_lowermount_l(ap->a_ncp)->mnt_vn_use_ops;
+ ap->a_ncp = ap->a_ncp->nc_shadowed;
return vop_nsymlink_ap(ap);
}
@@ -173,7 +254,8 @@ static int
static int
null_nwhiteout(struct vop_nwhiteout_args *ap)
{
- ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+ ap->a_head.a_ops = nullfs_lowermount_l(ap->a_ncp)->mnt_vn_use_ops;
+ ap->a_ncp = ap->a_ncp->nc_shadowed;
return vop_nwhiteout_ap(ap);
}
@@ -181,7 +263,8 @@ static int
static int
null_nremove(struct vop_nremove_args *ap)
{
- ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+ ap->a_head.a_ops = nullfs_lowermount_l(ap->a_ncp)->mnt_vn_use_ops;
+ ap->a_ncp = ap->a_ncp->nc_shadowed;
return vop_nremove_ap(ap);
}
@@ -189,7 +272,8 @@ static int
static int
null_nrmdir(struct vop_nrmdir_args *ap)
{
- ap->a_head.a_ops = MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+ ap->a_head.a_ops = nullfs_lowermount_l(ap->a_ncp)->mnt_vn_use_ops;
+ ap->a_ncp = ap->a_ncp->nc_shadowed;
return vop_nrmdir_ap(ap);
}
@@ -197,15 +281,32 @@ static int
static int
null_nrename(struct vop_nrename_args *ap)
{
+ struct namecache *fncp = ap->a_fncp;
+ struct namecache *tncp = ap->a_tncp;
struct mount *lmp;
-
- lmp = MOUNTTONULLMOUNT(ap->a_fncp->nc_mount)->nullm_vfs;
- if (lmp != MOUNTTONULLMOUNT(ap->a_tncp->nc_mount)->nullm_vfs)
- return (EINVAL);
-
- ap->a_head.a_ops = lmp->mnt_vn_norm_ops;
-
- return vop_nrename_ap(ap);
+ int error;
+
+ lmp = nullfs_lowermount_l(fncp);
+ if (lmp != nullfs_lowermount_l(tncp))
+ return (EXDEV);
+
+ ap->a_head.a_ops = lmp->mnt_vn_use_ops;
+
+ /*
+ * Parent-children relations are not subject to shadowing, therefore
+ * we can't just dumbassly delegate, we have some work in both layers.
+ * However, shadowing still saves us from locking or dealing with
+ * vnodes.
+ */
+ ap->a_fncp = fncp->nc_shadowed;
+ ap->a_tncp = tncp->nc_shadowed;
+
+ if ((error = vop_nrename_ap(ap)))
+ return (error);
+
+ cache_rename(fncp, tncp);
+
+ return (error);
}
/*
@@ -224,4 +325,3 @@ struct vnodeopv_entry_desc null_vnodeop_
{ &vop_nrename_desc, (vnodeopv_entry_t) null_nrename },
{ NULL, NULL }
};
-
More information about the Submit
mailing list