Patch for inode SLIST conversion

Michael Neumann mneumann at ntecs.de
Mon Jan 21 07:03:49 PST 2008


Matthew Dillon wrote:
:If I understand serialization tokens correctly, they lead in case of the 
:two functions above to mutual exclusive execution, because none of them 
:voluntarily blocks or calls a blocking method. Is this assumption correct?

    Correct.

:                 /*
:                  * We must check to see if the inode has been ripped
:                  * out from under us after blocking.
:                  */
:                 INOFIND(ip, dev, inum);
:                 if (ip == NULL || ITOV(ip) != vp) {
:        			vput(vp);
:..
:
:See the comment on "ripped out from under us". This can only happen 
:because vget might block, right?

    Correct, though once you start calling random procedures which are
    not specifically documented as being non-blocking, it can get dangerous
    because the assumption might not hold if the code is changed in the
    future.
    But here... yes, correct.

:Now, ufs_ihashget doesn't modify the hashtable at all. So why not store 
:a "transaction id" in each hash table bucket, which gets increased by 
:ufs_ihashins and ufs_ihashrem. Then in ufs_ihashget I read in this id 
:*before* calling vget and check afterwards if it has changed. If it is 
:unchanged, I can omit a second pass over the linked list.
:
:It's actually quite easy to implement and if all my assumptions apply, 
:the correct way of doing it. Please correct me if I'm wrong.
:
:
:Regards,
:
:   Michael

    Ok, three things.  First, the transaction id will be fairly granular,
    meaning that if it does not match you have to loop.  So the code
    is going to wind up being very slightly more complex then it is now.
    Second, the transaction id idea has been used in BSD code before,
    particular with regards to dealing with vm_map's.  It will be
    higher-performing, but not by a lot, and it has the problem of 
    obfuscating the code a bit (meaning you have to carefully document
    the algorithm in comments) and it is also somewhat algorithmically
    fragile because there is no direct check against duplicate nodes.
In case the token has been used by another thread, the check for 
duplicate nodes will be performed anyway. But only in that case!
So I don't think that duplicate nodes can happen at all.

    Third, the hash table is sized such that it has or should have very
    short chain lengths.  The relookup will all be in the cpu's L1 cache
    and I doubt you'd notice any improvement in performance.
That's true. I couldn't measure any performance increase/decrease.

    I would personally like to keep the code as is and not make any major
    changes to it, simply because we know it works and it is extremely
    fragile.  If you really want to redo the code, you can, but I would
    like to wait until after the 2.0 release to commit it.
I'm just learning by doing, so it's not important to put those changes
back. Nevertheless IMHO the slist conversion changes (without the 
staleness checks) make the code slightly more readable.

Regards,

  Michael
Index: ufs_ihash.c
===================================================================
RCS file: /home/dcvs/src/sys/vfs/ufs/ufs_ihash.c,v
retrieving revision 1.20
diff -u -r1.20 ufs_ihash.c
--- ufs_ihash.c	14 Oct 2006 16:26:40 -0000	1.20
+++ ufs_ihash.c	20 Jan 2008 02:03:22 -0000
@@ -51,13 +51,50 @@
 /*
  * Structures associated with inode cacheing.
  */
-static struct inode **ihashtbl;
-static u_long	ihash;		/* size of hash table - 1 */
+SLIST_HEAD(ihashtbl_bucket, inode);
+static struct ihashtbl_bucket *ihashtbl;
+static u_long ihash;		/* size of hash table - 1 */
 static struct lwkt_token ufs_ihash_token;
 
+/*
+ * This counter is used to detect changes to the hashtable during the execution
+ * of ufs_ihashget in case it is blocked and looses its serialization token.
+ * 
+ * The counter is incremented by ufs_ihashins and ufs_ihashrem, the only two
+ * functions that actually modify the hashtable.
+ */
+static int32_t change_count;
+
+/*
+ * Keeps the previous element around while iterating through the list.
+ */
+#define SLIST_FOREACH_WITH_PREV(var, prev, head, field)			\
+	for (((prev) = NULL, (var) = SLIST_FIRST((head)));		\
+	    (var);							\
+	    ((prev) = (var), (var) = SLIST_NEXT((var), field)))
+
+/*
+ * Return pointer to inode hashtbl bucket for (device, inum).
+ */
 #define	INOHASH(device, inum)	(&ihashtbl[(minor(device) + (inum)) & ihash])
 
 /*
+ * Test for equality of ino and (device, inum).
+ */
+#define INOEQL(ino, device, inum)					\
+	((inum) == (ino)->i_number && (device) == (ino)->i_dev)
+
+/*
+ * Find the inode that equals (device, inum) in the hashtbl. 
+ * Returns in "var" the pointer to the inode or NULL.
+ */
+#define INOFIND(var, device, inum)					\
+	SLIST_FOREACH(var, INOHASH(device, inum), i_next) {	\
+		if (INOEQL(var, device, inum))				\
+			break;						\
+	}
+
+/*
  * Initialize inode hash table.
  */
 void
@@ -66,8 +103,10 @@
 	ihash = 16;
 	while (ihash < desiredvnodes)
 		ihash <<= 1;
-	ihashtbl = kmalloc(sizeof(void *) * ihash, M_UFSIHASH, M_WAITOK|M_ZERO);
+	ihashtbl = kmalloc(sizeof(struct ihashtbl_bucket) * ihash, M_UFSIHASH,
+			M_WAITOK|M_ZERO);
 	--ihash;
+	change_count = 0;
 	lwkt_token_init(&ufs_ihash_token);
 }
 
@@ -94,10 +133,7 @@
 	lwkt_tokref ilock;
 
 	lwkt_gettoken(&ilock, &ufs_ihash_token);
-	for (ip = *INOHASH(dev, inum); ip; ip = ip->i_next) {
-		if (inum == ip->i_number && dev == ip->i_dev)
-			break;
-	}
+	INOFIND(ip, dev, inum);
 	lwkt_reltoken(&ilock);
 	if (ip)
 		return (ITOV(ip));
@@ -119,12 +155,22 @@
 	lwkt_tokref ilock;
 	struct inode *ip;
 	struct vnode *vp;
+	int32_t old_change_count; 
 
 	lwkt_gettoken(&ilock, &ufs_ihash_token);
+
 loop:
-	for (ip = *INOHASH(dev, inum); ip; ip = ip->i_next) {
-		if (inum != ip->i_number || dev != ip->i_dev)
-			continue;
+	INOFIND(ip, dev, inum);
+	vp = NULL;
+	if (ip)
+	{
+		/*
+		 * save old change_count before invoking a potentially 
+		 * blocking operation (vget) to detect modifications 
+		 * when the blocking operation returns. 
+		 */
+		old_change_count = change_count;
+
 		vp = ITOV(ip);
 		if (vget(vp, LK_EXCLUSIVE))
 			goto loop;
@@ -132,19 +178,17 @@
 		 * We must check to see if the inode has been ripped
 		 * out from under us after blocking.
 		 */
-		for (ip = *INOHASH(dev, inum); ip; ip = ip->i_next) {
-			if (inum == ip->i_number && dev == ip->i_dev)
-				break;
-		}
-		if (ip == NULL || ITOV(ip) != vp) {
-			vput(vp);
-			goto loop;
+		if (old_change_count != change_count) {
+			INOFIND(ip, dev, inum);
+			if (ip == NULL || ITOV(ip) != vp) {
+				vput(vp);
+				goto loop;
+			}
 		}
-		lwkt_reltoken(&ilock);
-		return (vp);
 	}
+
 	lwkt_reltoken(&ilock);
-	return (NULL);
+	return (vp);
 }
 
 /*
@@ -159,10 +203,7 @@
 	struct inode *ip;
 
 	lwkt_gettoken(&ilock, &ufs_ihash_token);
-	for (ip = *INOHASH(dev, inum); ip; ip = ip->i_next) {
-		if (inum == ip->i_number && dev == ip->i_dev)
-			break;
-	}
+	INOFIND(ip, dev, inum)
 	lwkt_reltoken(&ilock);
 	return(ip ? 1 : 0);
 }
@@ -173,22 +214,26 @@
 int
 ufs_ihashins(struct inode *ip)
 {
-	struct inode **ipp;
 	struct inode *iq;
+	struct inode *prev;
+	struct ihashtbl_bucket *head; 
 	lwkt_tokref ilock;
 
 	KKASSERT((ip->i_flag & IN_HASHED) == 0);
 	lwkt_gettoken(&ilock, &ufs_ihash_token);
-	ipp = INOHASH(ip->i_dev, ip->i_number);
-	while ((iq = *ipp) != NULL) {
-		if (ip->i_dev == iq->i_dev && ip->i_number == iq->i_number) {
+
+	++change_count;	/* mark hashtable as modified */
+	head = INOHASH(ip->i_dev, ip->i_number);
+
+	SLIST_FOREACH_WITH_PREV(iq, prev, head, i_next) {
+		if (INOEQL(ip, iq->i_dev, iq->i_number)) {
 			lwkt_reltoken(&ilock);
 			return(EBUSY);
 		}
-		ipp = &iq->i_next;
 	}
-	ip->i_next = NULL;
-	*ipp = ip;
+	if (prev) SLIST_NEXT(prev, i_next) = ip; 
+	else      SLIST_FIRST(head) = ip; 
+	SLIST_NEXT(ip, i_next) = NULL;
 	ip->i_flag |= IN_HASHED;
 	lwkt_reltoken(&ilock);
 	return(0);
@@ -201,22 +246,14 @@
 ufs_ihashrem(struct inode *ip)
 {
 	lwkt_tokref ilock;
-	struct inode **ipp;
-	struct inode *iq;
 
 	lwkt_gettoken(&ilock, &ufs_ihash_token);
 	if (ip->i_flag & IN_HASHED) {
-		ipp = INOHASH(ip->i_dev, ip->i_number);
-		while ((iq = *ipp) != NULL) {
-			if (ip == iq)
-				break;
-			ipp = &iq->i_next;
-		}
-		KKASSERT(ip == iq);
-		*ipp = ip->i_next;
-		ip->i_next = NULL;
+		++change_count;	/* mark hashtable as modified */
+		SLIST_REMOVE(INOHASH(ip->i_dev, ip->i_number), ip,
+			inode, i_next);
+		SLIST_NEXT(ip, i_next) = NULL;
 		ip->i_flag &= ~IN_HASHED;
 	}
 	lwkt_reltoken(&ilock);
 }
-




More information about the Kernel mailing list