kern_objcache.c patch tokens -> spinlocks

Matthew Dillon dillon at apollo.backplane.com
Fri Nov 17 12:02:22 PST 2006


    I've been meaning to do this for a while.  In order to be able to safely
    use the object cache from interrupts we have to use spin locks instead
    of tokens to access the backing depot.  And, as I just posted in another
    missif, spinlocks really ough to be what we use when locking up short
    code sections.  The best-case code path is still lock-free, of course.

    Please review and test.  This will go in Monday.

					-Matt
					Matthew Dillon 
					<dillon at xxxxxxxxxxxxx>


Index: kern/kern_objcache.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_objcache.c,v
retrieving revision 1.11
diff -u -r1.11 kern_objcache.c
--- kern/kern_objcache.c	5 Sep 2006 03:48:12 -0000	1.11
+++ kern/kern_objcache.c	17 Nov 2006 19:31:37 -0000
@@ -40,8 +40,10 @@
 #include <sys/malloc.h>
 #include <sys/queue.h>
 #include <sys/objcache.h>
+#include <sys/spinlock.h>
 #include <sys/thread.h>
 #include <sys/thread2.h>
+#include <sys/spinlock2.h>
 
 static MALLOC_DEFINE(M_OBJCACHE, "objcache", "Object Cache");
 static MALLOC_DEFINE(M_OBJMAG, "objcache magazine", "Object Cache Magazine");
@@ -51,6 +53,7 @@
 struct magazine {
 	int			 rounds;
 	int			 capacity;
+	int			 cleaning;
 	SLIST_ENTRY(magazine)	 nextmagazine;
 	void			*objects[];
 };
@@ -59,7 +62,8 @@
 
 /*
  * per-cluster cache of magazines
- * All fields in this structure are protected by the token.
+ *
+ * All fields in this structure are protected by the spinlock.
  */
 struct magazinedepot {
 	/*
@@ -72,7 +76,7 @@
 	int			magcapacity;
 
 	/* protect this structure */
-	struct lwkt_token	token;
+	struct spinlock		spin;
 
 	/* magazines not yet allocated towards limit */
 	int			unallocated_objects;
@@ -134,7 +138,7 @@
 	struct percpu_objcache	cache_percpu[];		/* per-cpu caches */
 };
 
-static struct lwkt_token objcachelist_token;
+static struct spinlock objcachelist_spin;
 static SLIST_HEAD(objcachelist, objcache) allobjcaches;
 
 static struct magazine *
@@ -146,6 +150,7 @@
 			M_OBJMAG, M_INTWAIT | M_ZERO);
 	mag->capacity = capacity;
 	mag->rounds = 0;
+	mag->cleaning = 0;
 	return (mag);
 }
 
@@ -160,7 +165,6 @@
 {
 	struct objcache *oc;
 	struct magazinedepot *depot;
-	lwkt_tokref olock;
 	int cpuid;
 
 	/* allocate object cache structure */
@@ -176,7 +180,7 @@
 	/* initialize depots */
 	depot = &oc->depot[0];
 
-	lwkt_token_init(&depot->token);
+	spin_init(&depot->spin);
 	SLIST_INIT(&depot->fullmagazines);
 	SLIST_INIT(&depot->emptymagazines);
 
@@ -204,9 +208,9 @@
 		cache_percpu->loaded_magazine = mag_alloc(mag_capacity);
 		cache_percpu->previous_magazine = mag_alloc(mag_capacity);
 	}
-	lwkt_gettoken(&olock, &objcachelist_token);
+	spin_lock_wr(&objcachelist_spin);
 	SLIST_INSERT_HEAD(&allobjcaches, oc, oc_next);
-	lwkt_reltoken(&olock);
+	spin_unlock_wr(&objcachelist_spin);
 
 	return (oc);
 }
@@ -247,7 +251,6 @@
 	struct magazine *emptymag;
 	void *obj;
 	struct magazinedepot *depot;
-	lwkt_tokref ilock;
 
 	crit_enter();
 	++cpucache->gets_cumulative;
@@ -267,6 +270,8 @@
 
 	/* Previous magazine has an object. */
 	if (MAGAZINE_NOTEMPTY(cpucache->previous_magazine)) {
+		KKASSERT(cpucache->previous_magazine->cleaning +
+			 cpucache->loaded_magazine->cleaning == 0);
 		swap(cpucache->loaded_magazine, cpucache->previous_magazine);
 		loadedmag = cpucache->loaded_magazine;
 		obj = loadedmag->objects[--loadedmag->rounds];
@@ -278,20 +283,19 @@
 	 * Both magazines empty.  Get a full magazine from the depot and
 	 * move one of the empty ones to the depot.
 	 *
-	 * Obtain the depot token.
+	 * Obtain the depot spinlock.
 	 */
 	depot = &oc->depot[myclusterid];
-	lwkt_gettoken(&ilock, &depot->token);
+	spin_lock_wr(&depot->spin);
 
 	/*
-	 * We might have blocked obtaining the token, we must recheck
-	 * the cpucache before potentially falling through to the blocking
-	 * code or we might deadlock the tsleep() on a low-memory machine.
+	 * Recheck the cpucache after obtaining the depot spinlock.  This
+	 * shouldn't be necessary now but don't take any chances.
 	 */
 	if (MAGAZINE_NOTEMPTY(cpucache->loaded_magazine) ||
 	    MAGAZINE_NOTEMPTY(cpucache->previous_magazine)
 	) {
-		lwkt_reltoken(&ilock);
+		spin_unlock_wr(&depot->spin);
 		goto retry;
 	}
 
@@ -308,7 +312,7 @@
 		KKASSERT(MAGAZINE_EMPTY(emptymag));
 		SLIST_INSERT_HEAD(&depot->emptymagazines,
 				  emptymag, nextmagazine);
-		lwkt_reltoken(&ilock);
+		spin_unlock_wr(&depot->spin);
 		goto retry;
 	}
 
@@ -322,7 +326,7 @@
 	 */
 	if (depot->unallocated_objects) {
 		--depot->unallocated_objects;
-		lwkt_reltoken(&ilock);
+		spin_unlock_wr(&depot->spin);
 		crit_exit();
 
 		obj = oc->alloc(oc->allocator_args, ocflags);
@@ -330,11 +334,11 @@
 			if (oc->ctor(obj, oc->private, ocflags))
 				return (obj);
 			oc->free(obj, oc->allocator_args);
-			lwkt_gettoken(&ilock, &depot->token);
+			spin_lock_wr(&depot->spin);
 			++depot->unallocated_objects;
+			spin_unlock_wr(&depot->spin);
 			if (depot->waiting)
 				wakeup(depot);
-			lwkt_reltoken(&ilock);
 			obj = NULL;
 		}
 		if (obj == NULL) {
@@ -356,10 +360,10 @@
 	if ((ocflags & (M_WAITOK|M_NULLOK)) == M_WAITOK) {
 		++cpucache->waiting;
 		++depot->waiting;
-		tsleep(depot, 0, "objcache_get", 0);
+		msleep(depot, &depot->spin, 0, "objcache_get", 0);
 		--cpucache->waiting;
 		--depot->waiting;
-		lwkt_reltoken(&ilock);
+		spin_unlock_wr(&depot->spin);
 		goto retry;
 	}
 
@@ -369,7 +373,7 @@
 	++cpucache->gets_null;
 	--cpucache->gets_cumulative;
 	crit_exit();
-	lwkt_reltoken(&ilock);
+	spin_unlock_wr(&depot->spin);
 	return (NULL);
 }
 
@@ -417,7 +421,6 @@
 	struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
 	struct magazine *loadedmag;
 	struct magazinedepot *depot;
-	lwkt_tokref ilock;
 
 	crit_enter();
 	++cpucache->puts_cumulative;
@@ -450,6 +453,8 @@
 	 * Current magazine full, but previous magazine has room.  XXX
 	 */
 	if (!MAGAZINE_FULL(cpucache->previous_magazine)) {
+		KKASSERT(cpucache->previous_magazine->cleaning +
+			 cpucache->loaded_magazine->cleaning == 0);
 		swap(cpucache->loaded_magazine, cpucache->previous_magazine);
 		loadedmag = cpucache->loaded_magazine;
 		loadedmag->objects[loadedmag->rounds++] = obj;
@@ -463,19 +468,21 @@
 	 * Both magazines full.  Get an empty magazine from the depot and
 	 * move a full loaded magazine to the depot.  Even though the
 	 * magazine may wind up with space available after we block on
-	 * the token, we still cycle it through to avoid the non-optimal
+	 * the spinlock, we still cycle it through to avoid the non-optimal
 	 * corner-case.
 	 *
-	 * Obtain the depot token.
+	 * Obtain the depot spinlock.
 	 */
 	depot = &oc->depot[myclusterid];
-	lwkt_gettoken(&ilock, &depot->token);
+	spin_lock_wr(&depot->spin);
 
 	/*
 	 * If an empty magazine is available in the depot, cycle it
 	 * through and retry.
 	 */
 	if (!SLIST_EMPTY(&depot->emptymagazines)) {
+		KKASSERT(cpucache->previous_magazine->cleaning +
+			 cpucache->loaded_magazine->cleaning == 0);
 		loadedmag = cpucache->previous_magazine;
 		cpucache->previous_magazine = cpucache->loaded_magazine;
 		cpucache->loaded_magazine = SLIST_FIRST(&depot->emptymagazines);
@@ -488,13 +495,14 @@
 		if (MAGAZINE_EMPTY(loadedmag)) {
 			SLIST_INSERT_HEAD(&depot->emptymagazines,
 					  loadedmag, nextmagazine);
+			spin_unlock_wr(&depot->spin);
 		} else {
 			SLIST_INSERT_HEAD(&depot->fullmagazines,
 					  loadedmag, nextmagazine);
+			spin_unlock_wr(&depot->spin);
 			if (depot->waiting)
 				wakeup(depot);
 		}
-		lwkt_reltoken(&ilock);
 		goto retry;
 	}
 
@@ -504,9 +512,9 @@
 	 * to allocate a mag, just free the object.
 	 */
 	++depot->unallocated_objects;
+	spin_unlock_wr(&depot->spin);
 	if (depot->waiting)
 		wakeup(depot);
-	lwkt_reltoken(&ilock);
 	crit_exit();
 	oc->dtor(obj, oc->private);
 	oc->free(obj, oc->allocator_args);
@@ -521,14 +529,13 @@
 objcache_dtor(struct objcache *oc, void *obj)
 {
 	struct magazinedepot *depot;
-	lwkt_tokref ilock;
 
 	depot = &oc->depot[myclusterid];
-	lwkt_gettoken(&ilock, &depot->token);
+	spin_lock_wr(&depot->spin);
 	++depot->unallocated_objects;
+	spin_unlock_wr(&depot->spin);
 	if (depot->waiting)
 		wakeup(depot);
-	lwkt_reltoken(&ilock);
 	oc->dtor(obj, oc->private);
 	oc->free(obj, oc->allocator_args);
 }
@@ -549,64 +556,101 @@
 }
 
 /*
- * De-construct and de-allocate objects in a magazine.
- * Returns the number of objects freed.
- * Does not de-allocate the magazine itself.
+ * Deallocate all objects in a magazine and free the magazine if requested.
+ * The magazine must already be disassociated from the depot.
+ *
+ * Must be called with a critical section held when called with a per-cpu
+ * magazine.  The magazine may be indirectly modified during the loop.
+ *
+ * The number of objects freed is returned.
  */
 static int
-mag_purge(struct objcache *oc, struct magazine *mag)
+mag_purge(struct objcache *oc, struct magazine *mag, int freeit)
 {
-	int ndeleted;
+	int count;
 	void *obj;
 
-	ndeleted = 0;
-	crit_enter();
+	count = 0;
+	++mag->cleaning;
 	while (mag->rounds) {
 		obj = mag->objects[--mag->rounds];
-		crit_exit();
-		oc->dtor(obj, oc->private);
-		oc->free(obj, oc->allocator_args);
-		++ndeleted;
-		crit_enter();
+		oc->dtor(obj, oc->private);		/* MAY BLOCK */
+		oc->free(obj, oc->allocator_args);	/* MAY BLOCK */
+		++count;
+
+		/*
+		 * Cycle for interrupts
+		 */
+		if ((count & 15) == 0) {
+			crit_exit();
+			crit_enter();
+		}
 	}
-	crit_exit();
-	return(ndeleted);
+	--mag->cleaning;
+	if (freeit)
+		kfree(mag, M_OBJMAG);
+	return(count);
 }
 
 /*
- * De-allocate all magazines in a magazine list.
- * Returns number of objects de-allocated.
+ * Disassociate zero or more magazines from a magazine list associated with
+ * the depot, update the depot, and move the magazines to a temporary
+ * list.
+ *
+ * The caller must check the depot for waiters and wake it up, typically
+ * after disposing of the magazines this function loads onto the temporary
+ * list.
+ */
+static void
+maglist_disassociate(struct magazinedepot *depot, struct magazinelist *maglist,
+		     struct magazinelist *tmplist, boolean_t purgeall)
+{
+	struct magazine *mag;
+
+	while ((mag = SLIST_FIRST(maglist)) != NULL) {
+		SLIST_REMOVE_HEAD(maglist, nextmagazine);
+		SLIST_INSERT_HEAD(tmplist, mag, nextmagazine);
+		depot->unallocated_objects += mag->rounds;
+	}
+}
+			
+/*
+ * Deallocate all magazines and their contents from the passed temporary
+ * list.  The magazines have already been accounted for by their depots.
+ *
+ * The total number of rounds freed is returned.  This number is typically
+ * only used to determine whether a wakeup on the depot is needed or not.
  */
 static int
-maglist_purge(struct objcache *oc, struct magazinelist *maglist,
-	      boolean_t purgeall)
+maglist_purge(struct objcache *oc, struct magazinelist *maglist)
 {
 	struct magazine *mag;
-	int ndeleted = 0;
+	int count = 0;
 
-	/* can't use SLIST_FOREACH because blocking releases the depot token */
-	while ((mag = SLIST_FIRST(maglist))) {
+	/*
+	 * can't use SLIST_FOREACH because blocking releases the depot
+	 * spinlock 
+	 */
+	while ((mag = SLIST_FIRST(maglist)) != NULL) {
 		SLIST_REMOVE_HEAD(maglist, nextmagazine);
-		ndeleted += mag_purge(oc, mag);		/* could block! */
-		kfree(mag, M_OBJMAG);			/* could block! */
-		if (!purgeall && ndeleted > 0)
-			break;
+		count += mag_purge(oc, mag, TRUE);
 	}
-	return (ndeleted);
+	return(count);
 }
 
 /*
  * De-allocates all magazines on the full and empty magazine lists.
+ *
+ * Because this routine is called with a spinlock held, the magazines
+ * can only be disassociated and moved to a temporary list, not freed.
+ *
+ * The caller is responsible for freeing the magazines.
  */
 static void
-depot_purge(struct magazinedepot *depot, struct objcache *oc)
+depot_disassociate(struct magazinedepot *depot, struct magazinelist *tmplist)
 {
-	depot->unallocated_objects += 
-		maglist_purge(oc, &depot->fullmagazines, TRUE);
-	depot->unallocated_objects +=
-		maglist_purge(oc, &depot->emptymagazines, TRUE);
-	if (depot->unallocated_objects && depot->waiting)
-		wakeup(depot);
+	maglist_disassociate(depot, &depot->fullmagazines, tmplist, TRUE);
+	maglist_disassociate(depot, &depot->emptymagazines, tmplist, TRUE);
 }
 
 #ifdef notneeded
@@ -615,17 +659,27 @@
 {
 	struct percpu_objcache *cache_percpu = &oc->cache_percpu[myclusterid];
 	struct magazinedepot *depot = &oc->depot[myclusterid];
+	struct magazinelist tmplist;
+	int count;
 
-	mag_purge(oc, cache_percpu->loaded_magazine);
-	mag_purge(oc, cache_percpu->previous_magazine);
+	SLIST_INIT(&tmplist);
+	crit_enter();
+	count = mag_purge(oc, cache_percpu->loaded_magazine, FALSE);
+	count += mag_purge(oc, cache_percpu->previous_magazine, FALSE);
+	crit_exit();
 
-	/* XXX need depot token */
-	depot_purge(depot, oc);
+	spin_lock_wr(&depot->spin);
+	depot->unallocated_objects += count;
+	depot_disassociate(depot, &tmplist);
+	spin_unlock_wr(&depot->spin);
+	count += maglist_purge(oc, &tmplist);
+	if (count && depot->waiting)
+		wakeup(depot);
 }
 #endif
 
 /*
- * Try to free up some memory.  Return as soon as some free memory found.
+ * Try to free up some memory.  Return as soon as some free memory is found.
  * For each object cache on the reclaim list, first try the current per-cpu
  * cache, then the full magazine depot.
  */
@@ -635,8 +689,10 @@
 	struct objcache *oc;
 	struct percpu_objcache *cpucache;
 	struct magazinedepot *depot;
-	lwkt_tokref ilock;
-	int i, ndel;
+	struct magazinelist tmplist;
+	int i, count;
+
+	SLIST_INIT(&tmplist);
 
 	for (i = 0; i < nlist; i++) {
 		oc = oclist[i];
@@ -644,53 +700,61 @@
 		depot = &oc->depot[myclusterid];
 
 		crit_enter();
-		if ((ndel = mag_purge(oc, cpucache->loaded_magazine)) > 0 ||
-		    (ndel = mag_purge(oc, cpucache->previous_magazine)) > 0) {
-			crit_exit();
-			lwkt_gettoken(&ilock, &depot->token);
-			depot->unallocated_objects += ndel;
-			if (depot->unallocated_objects && depot->waiting)
+		count = mag_purge(oc, cpucache->loaded_magazine, FALSE);
+		if (count == 0)
+			count += mag_purge(oc, cpucache->previous_magazine, FALSE);
+		crit_exit();
+		if (count > 0) {
+			spin_lock_wr(&depot->spin);
+			depot->unallocated_objects += count;
+			spin_unlock_wr(&depot->spin);
+			if (depot->waiting)
 				wakeup(depot);
-			lwkt_reltoken(&ilock);
 			return (TRUE);
 		}
 		crit_exit();
-		lwkt_gettoken(&ilock, &depot->token);
-		if ((ndel =
-		     maglist_purge(oc, &depot->fullmagazines, FALSE)) > 0) {
-			depot->unallocated_objects += ndel;
-			if (depot->unallocated_objects && depot->waiting)
+		spin_lock_wr(&depot->spin);
+		maglist_disassociate(depot, &depot->fullmagazines,
+				     &tmplist, FALSE);
+		spin_unlock_wr(&depot->spin);
+		count = maglist_purge(oc, &tmplist);
+		if (count > 0) {
+			if (depot->waiting)
 				wakeup(depot);
-			lwkt_reltoken(&ilock);
 			return (TRUE);
 		}
-		lwkt_reltoken(&ilock);
 	}
 	return (FALSE);
 }
 
 /*
  * Destroy an object cache.  Must have no existing references.
- * XXX Not clear this is a useful API function.
  */
 void
 objcache_destroy(struct objcache *oc)
 {
 	struct percpu_objcache *cache_percpu;
+	struct magazinedepot *depot;
 	int clusterid, cpuid;
+	struct magazinelist tmplist;
 
-	/* XXX need depot token? */
-	for (clusterid = 0; clusterid < MAXCLUSTERS; clusterid++)
-		depot_purge(&oc->depot[clusterid], oc);
+	SLIST_INIT(&tmplist);
+	for (clusterid = 0; clusterid < MAXCLUSTERS; clusterid++) {
+		depot = &oc->depot[clusterid];
+		spin_lock_wr(&depot->spin);
+		depot_disassociate(depot, &tmplist);
+		spin_unlock_wr(&depot->spin);
+	}
+	maglist_purge(oc, &tmplist);
 
 	for (cpuid = 0; cpuid < ncpus; cpuid++) {
 		cache_percpu = &oc->cache_percpu[cpuid];
 
-		mag_purge(oc, cache_percpu->loaded_magazine);
-		kfree(cache_percpu->loaded_magazine, M_OBJMAG);
-
-		mag_purge(oc, cache_percpu->previous_magazine);
-		kfree(cache_percpu->previous_magazine, M_OBJMAG);
+		mag_purge(oc, cache_percpu->loaded_magazine, TRUE);
+		mag_purge(oc, cache_percpu->previous_magazine, TRUE);
+		cache_percpu->loaded_magazine = NULL;
+		cache_percpu->previous_magazine = NULL;
+		/* don't bother adjusting depot->unallocated_objects */
 	}
 
 	kfree(oc->name, M_TEMP);
@@ -709,24 +773,34 @@
 	char *p = base;
 	char *end = (char *)base + (nelts * size);
 	struct magazinedepot *depot = &oc->depot[myclusterid];
-	lwkt_tokref ilock;
-	struct magazine sentinelfullmag = { 0, 0 };
-	struct magazine *emptymag = &sentinelfullmag;
+	struct magazine *emptymag = mag_alloc(depot->magcapcity);
 
-	lwkt_gettoken(&ilock, &depot->token);
 	while (p < end) {
+		emptymag->objects[emptymag->rounds++] = p;
 		if (MAGAZINE_FULL(emptymag)) {
-			emptymag = mag_alloc(depot->magcapacity);
+			spin_lock_wr(&depot->spin);
 			SLIST_INSERT_HEAD(&depot->fullmagazines, emptymag,
 					  nextmagazine);
+			depot->unallocated_objects += emptymag->rounds;
+			spin_unlock_wr(&depot->spin);
+			if (depot->waiting)
+				wakeup(depot);
+			emptymag = mag_alloc(depot->magcapacity);
 		}
-		emptymag->objects[emptymag->rounds++] = p;
 		p += size;
 	}
-	depot->unallocated_objects += nelts;
-	if (depot->unallocated_objects && depot->waiting)
-		wakeup(depot);
-	lwkt_reltoken(&ilock);
+	if (MAGAZINE_EMPTY(emptymag)) {
+		mag_purge(oc, emptymag, TRUE);
+	} else {
+		spin_lock_wr(&depot->spin);
+		SLIST_INSERT_HEAD(&depot->fullmagazines, emptymag,
+				  nextmagazine);
+		depot->unallocated_objects += emptymag->rounds;
+		spin_unlock_wr(&depot->spin);
+		if (depot->waiting)
+			wakeup(depot);
+		emptymag = mag_alloc(depot->magcapacity);
+	}
 }
 #endif
 
@@ -749,17 +823,22 @@
 {
 	struct objcache *oc;
 	struct magazinedepot *depot;
-	lwkt_tokref olock, dlock;
+	struct magazinelist tmplist;
+
+	XXX we need to detect when an objcache is destroyed out from under
+	    us XXX
+
+	SLIST_INIT(&tmplist);
 
-	lwkt_gettoken(&olock, &objcachelist_token);
+	spin_lock_wr(&objcachelist_spin);
 	SLIST_FOREACH(oc, &allobjcaches, oc_next) {
 		depot = &oc->depot[myclusterid];
 		if (depot->magcapacity < MAXMAGSIZE) {
 			if (depot->contested > objcache_contention_rate) {
-				lwkt_gettoken(&dlock, &depot->token);
-				depot_purge(depot, oc);
+				spin_lock_wr(&depot->spin);
+				depot_disassociate(depot, &tmplist);
 				depot->magcapacity *= 2;
-				lwkt_reltoken(&dlock);
+				spin_unlock_wr(&depot->spin);
 				printf("objcache_timer: increasing cache %s"
 				       " magsize to %d, contested %d times\n",
 				    oc->name, depot->magcapacity,
@@ -767,8 +846,12 @@
 			}
 			depot->contested = 0;
 		}
+		spin_unlock_wr(&objcachelist_spin);
+		if (maglist_purge(oc, &tmplist) > 0 && depot->waiting)
+			wakeup(depot);
+		spin_lock_wr(&objcachelist_spin);
 	}
-	lwkt_reltoken(&olock);
+	spin_unlock_wr(&objcachelist_spin);
 
 	callout_reset(&objcache_callout, objcache_rebalance_period,
 		      objcache_timer, NULL);
@@ -779,7 +862,7 @@
 static void
 objcache_init(void)
 {
-	lwkt_token_init(&objcachelist_token);
+	spin_init(&objcachelist_spin);
 #if 0
 	callout_init(&objcache_callout);
 	objcache_rebalance_period = 60 * hz;





More information about the Kernel mailing list