VM idle page zeroing

Venkatesh Srinivas me at endeavour.zapto.org
Mon May 17 08:06:55 PDT 2010


Hi,

Attached is a patch that changes a few things:

1) Removes the CVS ID; that was vestigial, I forgot I'd included that.

2) Rename idlezero_count back to cnt_prezero; this is because this is the 
original name (in FreeBSD and in DragonFly). I don't feel strongly about 
this at all - is it reasonable to try to match up sysctl names or not 
worth it?

3) Change vm_page_zero_check() to return the reason that the check failed.

4) In vm_pagezero(), switch tsleep to sleep for a variable amount of time; 
by default it is still hz/10, now called DEFAULT_SLEEP_TIME. If 
vm_page_zero_check() failed because zeroing was either disabled or because
we hit the ZIDLE_HI limit, mark ourselves to sleep for a long time 
(hz*10). If we are lower than the upper limit by higher than the low 
limit, or we are below the low limit, restore the sleep time.

5) Set zero_state when we hit the high mark and clear when we fall below 
the low mark; this restores the hysteresis formerly present.

Other thoughts - before we enter the mainloop, perhaps a sleep without a 
timeout, if idlezero_enable = 0? Then the enable/disable sysctl could be a 
SYSCTL_PROC which wakes it up; this would minimize the (admittedly tiny) 
load on systems on which it is disabled.

Thoughts on any of this?

Thanks,
-- vs
--- /scratch/dragonfly/sys/vm/vm_zeroidle.c	2010-05-16 14:15:40.000000000 -0400
+++ vm_zeroidle.c	2010-05-17 04:58:22.000000000 -0400
@@ -35,8 +35,6 @@
  *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
  *	Utah $Hdr: vm_machdep.c	1.16.1.1 89/06/23$
  * from FreeBSD: .../i386/vm_machdep.c,v 1.165 2001/07/04 23:27:04 dillon
- *
- * $Id: vm_zeroidle.c,v 1.3 2010/05/12 04:50:45 sv5679 Exp $
  */
 
 #include <sys/param.h>
@@ -67,7 +65,6 @@
 /* Maximum number of pages per second to zero */
 #define NPAGES_RUN	(20000)
 
-
 static int idlezero_enable = 0;
 TUNABLE_INT("vm.idlezero_enable", &idlezero_enable);
 SYSCTL_INT(_vm, OID_AUTO, idlezero_enable, CTLFLAG_RW, &idlezero_enable, 0,
@@ -79,8 +76,8 @@
 SYSCTL_INT(_vm, OID_AUTO, idlezero_nocache, CTLFLAG_RW, &idlezero_nocache, 0,
 	   "Maximum pages per second to zero");
 
-static int idlezero_count = 0;
-SYSCTL_INT(_vm, OID_AUTO, idlezero_count, CTLFLAG_RD, &idlezero_count, 0,
+static int cnt_prezero = 0;
+SYSCTL_INT(_vm, OID_AUTO, cnt_prezero, CTLFLAG_RD, &cnt_prezero, 0,
 	   "The number of physical pages prezeroed at idle time");
 
 enum zeroidle_state {
@@ -90,7 +87,18 @@
 	STATE_RELEASE_PAGE
 };
 
+enum zero_check_state {
+	Z_DISABLED,
+	Z_LOW_LIM,
+	Z_HIGH_LIM,
+	Z_ZERO
+};
+
+#define DEFAULT_SLEEP_TIME	(hz / 10)
+#define LONG_SLEEP_TIME	(hz * 10)
+
 static int zero_state;
+static int sleep_time;
 
 /*
  * Attempt to maintain approximately 1/2 of our free pages in a
@@ -100,16 +108,16 @@
  * fast sleeps. We also do not want to be continuously zeroing
  * pages because doing so may flush our L1 and L2 caches too much.
  */
-static int
+static enum zero_check_state
 vm_page_zero_check(void)
 {
 	if (idlezero_enable == 0)
-		return (0);
+		return (Z_DISABLED);
 	if (zero_state && vm_page_zero_count >= ZIDLE_LO(vmstats.v_free_count))
-		return (0);
+		return (Z_LOW_LIM);
 	if (vm_page_zero_count >= ZIDLE_HI(vmstats.v_free_count))
-		return (0);
-	return (1);
+		return (Z_HIGH_LIM);
+	return (Z_ZERO);
 }
 
 static void
@@ -121,6 +129,7 @@
 	char *pg = NULL;
 	int npages = 0;
 	int i = 0;
+	enum zero_check_state zs;
 
 	/*
 	 * Adjust thread parameters before entering our loop.  The thread
@@ -132,6 +141,7 @@
 	rel_mplock();
 	lwkt_setpri_self(TDPRI_IDLE_WORK);
 	lwkt_setcpu_self(globaldata_find(ncpus - 1));
+	sleep_time = DEFAULT_SLEEP_TIME;
 
 	/*
 	 * Loop forever
@@ -142,9 +152,35 @@
 			/*
 			 * Wait for work.
 			 */
-			tsleep(&zero_state, 0, "pgzero", hz / 10);
-			if (vm_page_zero_check())
+			tsleep(&zero_state, 0, "pgzero", sleep_time);
+			/*
+			 * Check whether we ought zero pages - if we are
+			 * disabled or there are plenty of zero pages, we
+			 * sleep, for a long time.
+			 */
+			zs = vm_page_zero_check();
+			if (zs == Z_DISABLED) {
+				sleep_time = LONG_SLEEP_TIME;
+				break;
+			}
+			switch(zs) {
+			case Z_ZERO:
+				sleep_time = DEFAULT_SLEEP_TIME;
 				npages = idlezero_rate / 10;
+				zero_state = 0;
+				break;
+			case Z_HIGH_LIM:
+				sleep_time = LONG_SLEEP_TIME;
+				zero_state = 1;
+				break;
+			case Z_LOW_LIM:
+				sleep_time = DEFAULT_SLEEP_TIME;
+				break;
+			default:
+				/* Z_DISABLED handled above switch */
+				break;
+			}
+
 			if (npages)
 				state = STATE_GET_PAGE;	/* Fallthrough */
 			break;
@@ -193,7 +229,7 @@
 				vm_page_flag_set(m, PG_ZERO);
 				vm_page_free_toq(m);
 				state = STATE_GET_PAGE;
-				++idlezero_count;
+				++cnt_prezero;
 				rel_mplock();
 			}
 			break;




More information about the Kernel mailing list