tsleep/wakeup patch #1

Matthew Dillon dillon at apollo.backplane.com
Tue Nov 8 22:34:33 PST 2005


    Here is stage 2, a patch relative to the latest HEAD.  It makes tsleep
    and wakeup MP safe and cleans up a huge chunk of the process scheduling
    code.  I was even able to get rid of a bunch of code sitting in the
    critical process switching path (mi_switch() is completely gone now).

    Clearly it needs a lot of testing before I commit, so I'm going to
    give it until friday.  The type of testing needed:

	* Creating and destroying lots of processes.  e.g. buildworlds
	* Hitting ^C, ^Z, etc... things that generate signals and
	  change the process state.

    This patch will make tsleep() and wakeup() MP safe when used with 
    kernel threads.  The functions are still not going to be MP safe when
    used with processes (and won't be until after the release).

    Once I get this piece banged into shape I will make the EM ethernet
    device driver and its interrupt thread MP safe. 

					-Matt


Index: kern/kern_sig.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sig.c,v
retrieving revision 1.38
diff -u -r1.38 kern_sig.c
--- kern/kern_sig.c	11 Oct 2005 09:59:56 -0000	1.38
+++ kern/kern_sig.c	9 Nov 2005 06:10:19 -0000
@@ -914,17 +914,18 @@
 			 * it has no further action.  If SIGCONT is held, we
 			 * continue the process and leave the signal in
 			 * p_siglist.  If the process catches SIGCONT, let it
-			 * handle the signal itself.  If it isn't waiting on
-			 * an event, then it goes back to run state.
-			 * Otherwise, process goes back to sleep state.
+			 * handle the signal itself.
 			 */
 			if (action == SIG_DFL)
 				SIGDELSET(p->p_siglist, sig);
 			if (action == SIG_CATCH)
 				goto run;
-			if (p->p_wchan == 0)
-				goto run;
-			clrrunnable(p, SSLEEP);
+
+			/*
+			 * Make runnable but do not break a tsleep unless
+			 * some other signal was pending.
+			 */
+			setrunnable(p);
 			goto out;
 		}
 
@@ -938,13 +939,10 @@
 		}
 
 		/*
-		 * If process is sleeping interruptibly, then simulate a
-		 * wakeup so that when it is continued, it will be made
-		 * runnable and can look at the signal.  But don't make
-		 * the process runnable, leave it stopped.
+		 * The process is sleeping interruptably but is stopped,
+		 * just set the P_BREAKTSLEEP flag
 		 */
-		if (p->p_wchan && (p->p_flag & P_SINTR))
-			unsleep(p->p_thread);
+		p->p_flag |= P_BREAKTSLEEP;
 		goto out;
 	default:
 		/*
@@ -996,6 +994,10 @@
 	}
 	/*NOTREACHED*/
 run:
+	/*
+	 * Make runnable and break out of any tsleep as well.
+	 */
+	p->p_flag |= P_BREAKTSLEEP;
 	setrunnable(p);
 out:
 	crit_exit();
@@ -1244,8 +1246,9 @@
 			psignal(p->p_pptr, SIGCHLD);
 			do {
 				stop(p);
-				mi_switch(p);
-			} while (!trace_req(p) && p->p_flag & P_TRACED);
+				lwkt_deschedule_self(p->p_thread);
+				lwkt_switch();
+			} while (!trace_req(p) && (p->p_flag & P_TRACED));
 
 			/*
 			 * If parent wants us to take the signal,
@@ -1323,7 +1326,10 @@
 				stop(p);
 				if ((p->p_pptr->p_procsig->ps_flag & PS_NOCLDSTOP) == 0)
 					psignal(p->p_pptr, SIGCHLD);
-				mi_switch(p);
+				if (p->p_stat == SSTOP) {
+					lwkt_deschedule_self(p->p_thread);
+					lwkt_switch();
+				}
 				break;
 			} else if (prop & SA_IGNORE) {
 				/*
Index: kern/kern_synch.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_synch.c,v
retrieving revision 1.52
diff -u -r1.52 kern_synch.c
--- kern/kern_synch.c	9 Nov 2005 03:39:15 -0000	1.52
+++ kern/kern_synch.c	9 Nov 2005 06:08:18 -0000
@@ -68,6 +68,7 @@
 
 int	hogticks;
 int	lbolt;
+int	lbolt_syncer;
 int	sched_quantum;		/* Roundrobin scheduling quantum in ticks. */
 int	ncpus;
 int	ncpus2, ncpus2_shift, ncpus2_mask;
@@ -157,14 +158,14 @@
 static void
 schedcpu(void *arg)
 {
+	struct rlimit *rlim;
 	struct proc *p;
+	u_int64_t ttime;
 
+	/*
+	 * General process statistics once a second
+	 */
 	FOREACH_PROC_IN_SYSTEM(p) {
-		/*
-		 * Increment time in/out of memory and sleep time
-		 * (if sleeping).  We ignore overflow; with 16-bit int's
-		 * (remember them?) overflow takes 45 days.
-		 */
 		crit_enter();
 		p->p_swtime++;
 		if (p->p_stat == SSLEEP || p->p_stat == SSTOP)
@@ -181,7 +182,43 @@
 		}
 		crit_exit();
 	}
+
+	/*
+	 * Resource checks.  XXX break out since psignal/killproc can block,
+	 * limiting us to one process killed per second.  There is probably
+	 * a better way.
+	 */
+	FOREACH_PROC_IN_SYSTEM(p) {
+		crit_enter();
+		if (p->p_stat == SZOMB || 
+		    p->p_limit == NULL || 
+		    p->p_thread == NULL
+		) {
+			crit_exit();
+			continue;
+		}
+		ttime = p->p_thread->td_sticks + p->p_thread->td_uticks;
+		if (p->p_limit->p_cpulimit != RLIM_INFINITY &&
+		    ttime > p->p_limit->p_cpulimit
+		) {
+			rlim = &p->p_rlimit[RLIMIT_CPU];
+			if (ttime / (rlim_t)1000000 >= rlim->rlim_max) {
+				killproc(p, "exceeded maximum CPU limit");
+			} else {
+				psignal(p, SIGXCPU);
+				if (rlim->rlim_cur < rlim->rlim_max) {
+					/* XXX: we should make a private copy */
+					rlim->rlim_cur += 5;
+				}
+			}
+			crit_exit();
+			break;
+		}
+		crit_exit();
+	}
+
 	wakeup((caddr_t)&lbolt);
+	wakeup((caddr_t)&lbolt_syncer);
 	callout_reset(&schedcpu_callout, hz, schedcpu, NULL);
 }
 
@@ -234,11 +271,8 @@
 
 		gd->gd_tsleep_hash = slpque_cpu0;
 	} else {
-#if 0
 		gd->gd_tsleep_hash = malloc(sizeof(slpque_cpu0), 
 					    M_TSLEEP, M_WAITOK | M_ZERO);
-#endif
-		gd->gd_tsleep_hash = slpque_cpu0;
 	}
 	for (i = 0; i < TABLESIZE; ++i)
 		TAILQ_INIT(&gd->gd_tsleep_hash[i]);
@@ -267,8 +301,10 @@
 	struct thread *td = curthread;
 	struct proc *p = td->td_proc;		/* may be NULL */
 	globaldata_t gd;
-	int sig = 0, catch = flags & PCATCH;
-	int id = LOOKUP(ident);
+	int sig;
+	int catch;
+	int id;
+	int error;
 	int oldpri;
 	struct callout thandle;
 
@@ -292,15 +328,50 @@
 	}
 	gd = td->td_gd;
 	KKASSERT(td != &gd->gd_idlethread);	/* you must be kidding! */
+
+	/*
+	 * NOTE: all of this occurs on the current cpu, including any
+	 * callout-based wakeups, so a critical section is a sufficient
+	 * interlock.
+	 *
+	 * The entire sequence through to where we actually sleep must
+	 * run without breaking the critical section.
+	 */
+	id = LOOKUP(ident);
+	catch = flags & PCATCH;
+	error = 0;
+	sig = 0;
+
 	crit_enter_quick(td);
+
 	KASSERT(ident != NULL, ("tsleep: no ident"));
 	KASSERT(p == NULL || p->p_stat == SRUN, ("tsleep %p %s %d",
 		ident, wmesg, p->p_stat));
 
-	td->td_wchan = ident;
-	td->td_wmesg = wmesg;
-	td->td_wdomain = flags & PDOMAIN_MASK;
+	/*
+	 * Setup for the current process (if this is a process). 
+	 */
 	if (p) {
+		if (catch) {
+			/*
+			 * Early termination if PCATCH was set and a
+			 * signal is pending, interlocked with the
+			 * critical section.
+			 */
+			if ((sig = CURSIG(p)))
+				goto resume;
+
+			/*
+			 * Causes psignal to wake us up when.
+			 */
+			p->p_flag |= P_SINTR;
+		}
+
+		/*
+		 * Make sure the current process has been untangled from
+		 * the userland scheduler and initialize slptime to start
+		 * counting.
+		 */
 		if (flags & PNORESCHED)
 			td->td_flags |= TDF_NORESCHED;
 		p->p_usched->release_curproc(&p->p_lwp);
@@ -308,99 +379,106 @@
 	}
 
 	/*
-	 * note: all of this occurs on the current cpu, including any
-	 * callout-based wakeups, so a critical section is a sufficient
-	 * interlock.
+	 * Move our thread to the correct queue and setup our wchan, etc.
 	 */
 	lwkt_deschedule_self(td);
+	td->td_flags |= TDF_TSLEEPQ;
 	TAILQ_INSERT_TAIL(&gd->gd_tsleep_hash[id], td, td_threadq);
 	atomic_set_int(&slpque_cpumasks[id], gd->gd_cpumask);
+
+	td->td_wchan = ident;
+	td->td_wmesg = wmesg;
+	td->td_wdomain = flags & PDOMAIN_MASK;
+
+	/*
+	 * Setup the timeout, if any
+	 */
 	if (timo) {
 		callout_init(&thandle);
 		callout_reset(&thandle, timo, endtsleep, td);
 	}
+
 	/*
-	 * We put ourselves on the sleep queue and start our timeout
-	 * before calling CURSIG, as we could stop there, and a wakeup
-	 * or a SIGCONT (or both) could occur while we were stopped.
-	 * A SIGCONT would cause us to be marked as SSLEEP
-	 * without resuming us, thus we must be ready for sleep
-	 * when CURSIG is called.  If the wakeup happens while we're
-	 * stopped, td->td_wchan will be 0 upon return from CURSIG.
+	 * Beddy bye bye.
 	 */
 	if (p) {
-		if (catch) {
-			p->p_flag |= P_SINTR;
-			if ((sig = CURSIG(p))) {
-				if (td->td_wchan) {
-					unsleep(td);
-					lwkt_schedule_self(td);
-				}
-				p->p_stat = SRUN;
-				goto resume;
-			}
-			if (td->td_wchan == NULL) {
-				catch = 0;
-				goto resume;
-			}
-		} else {
-			sig = 0;
-		}
-
 		/*
-		 * If we are not the current process we have to remove ourself
-		 * from the run queue.
+		 * Ok, we are sleeping.  Remove us from the userland runq.
+		 * If we are in SRUN, change us to SSLEEP.  Note that we
+		 * could be in SSTOP here.
 		 */
-		KASSERT(p->p_stat == SRUN, ("PSTAT NOT SRUN %d %d", p->p_pid, p->p_stat));
+		if (p->p_stat == SRUN) {
+			if (p->p_flag & P_ONRUNQ)
+				p->p_usched->remrunqueue(&p->p_lwp);
+			p->p_stat = SSLEEP;
+		}
+		p->p_stats->p_ru.ru_nvcsw++;
+		lwkt_switch();
+
 		/*
-		 * If this is the current 'user' process schedule another one.
+		 * Switch us back to SRUN, but only if we are in SSLEEP. 
+		 * We could wind up in SSTOP (in which case it will be
+		 * handled on return-to-user mode).
 		 */
-		clrrunnable(p, SSLEEP);
-		p->p_stats->p_ru.ru_nvcsw++;
-		mi_switch(p);
-		KASSERT(p->p_stat == SRUN, ("tsleep: stat not srun"));
+		if (p->p_stat == SSLEEP)
+			p->p_stat = SRUN;
+		p->p_flag &= ~P_SINTR;		/* clean up temporary flag */
 	} else {
 		lwkt_switch();
 	}
+
 	/* 
 	 * Make sure we haven't switched cpus while we were asleep.  It's
-	 * not supposed to happen.
+	 * not supposed to happen.  Cleanup our temporary flags.
 	 */
 	KKASSERT(gd == td->td_gd);
-resume:
-	if (p)
-		p->p_flag &= ~P_SINTR;
-	crit_exit_quick(td);
 	td->td_flags &= ~TDF_NORESCHED;
-	if (td->td_flags & TDF_TIMEOUT) {
-		td->td_flags &= ~TDF_TIMEOUT;
-		if (sig == 0)
-			return (EWOULDBLOCK);
-	} else if (timo) {
-		callout_stop(&thandle);
-	} else if (td->td_wmesg) {
-		/*
-		 * This can happen if a thread is woken up directly.  Clear
-		 * wmesg to avoid debugging confusion.
-		 */
-		td->td_wmesg = NULL;
+
+	/*
+	 * Cleanup the timeout.
+	 */
+	if (timo) {
+		if (td->td_flags & TDF_TIMEOUT) {
+			td->td_flags &= ~TDF_TIMEOUT;
+			if (sig == 0)
+				error = EWOULDBLOCK;
+		} else {
+			callout_stop(&thandle);
+		}
 	}
-	/* inline of iscaught() */
+
+	/*
+	 * Since td_threadq is used both for our run queue AND for the
+	 * tsleep hash queue, we can't still be on it at this point because
+	 * we've gotten cpu back.
+	 */
+	KKASSERT((td->td_flags & TDF_TSLEEPQ) == 0);
+	td->td_wchan = NULL;
+	td->td_wmesg = NULL;
+	td->td_wdomain = 0;
+
+	/*
+	 * Figure out the correct error return
+	 */
+resume:
 	if (p) {
-		if (catch && (sig != 0 || (sig = CURSIG(p)))) {
+		if (catch && error == 0 && (sig != 0 || (sig = CURSIG(p)))) {
 			if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
-				return (EINTR);
-			return (ERESTART);
+				error = EINTR;
+			else
+				error = ERESTART;
 		}
 	}
-	return (0);
+	crit_exit_quick(td);
+	return (error);
 }
 
 /*
  * Implement the timeout for tsleep.  We interlock against
- * wchan when setting TDF_TIMEOUT.  For processes we remove
- * the sleep if the process is stopped rather then sleeping,
- * so it remains stopped.
+ * wchan when setting TDF_TIMEOUT.
+ *
+ * We set P_BREAKTSLEEP to indicate that an event has occured, but
+ * we only call setrunnable if the process is not stopped.
  *
  * This type of callout timeout had better be scheduled on the same
  * cpu the process is sleeping on.
@@ -414,37 +492,38 @@
 	crit_enter();
 	if (td->td_wchan) {
 		td->td_flags |= TDF_TIMEOUT;
+
 		if ((p = td->td_proc) != NULL) {
-			if (p->p_stat == SSLEEP)
+			p->p_flag |= P_BREAKTSLEEP;
+			if (p->p_stat != SSTOP)
 				setrunnable(p);
-			else
-				unsleep(td);
-		} else {
-			unsleep(td);
-			lwkt_schedule(td);
+		} else if (td->td_flags & TDF_TSLEEPQ) {
+			unsleep_and_wakeup_thread(td);
 		}
 	}
 	crit_exit();
 }
 
-/*
- * Remove a process from its wait queue
- *
- * XXX not MP safe until called only on the cpu holding the sleeping
- * process.
- */
 void
-unsleep(struct thread *td)
+unsleep_and_wakeup_thread(struct thread *td)
 {
+	globaldata_t gd = mycpu;
 	int id;
 
+#ifdef SMP
+	if (td->td_gd != gd) {
+		lwkt_send_ipiq(td->td_gd, (ipifunc1_t)unsleep_and_wakeup_thread, td);
+		return;
+	}
+#endif
 	crit_enter();
-	id = LOOKUP(td->td_wchan);
-	if (td->td_wchan) {
+	if (td->td_flags & TDF_TSLEEPQ) {
+		td->td_flags &= ~TDF_TSLEEPQ;
+		id = LOOKUP(td->td_wchan);
 		TAILQ_REMOVE(&td->td_gd->gd_tsleep_hash[id], td, td_threadq);
 		if (TAILQ_FIRST(&td->td_gd->gd_tsleep_hash[id]) == NULL)
 			atomic_clear_int(&slpque_cpumasks[id], td->td_gd->gd_cpumask);
-		td->td_wchan = NULL;
+		lwkt_schedule(td);
 	}
 	crit_exit();
 }
@@ -455,6 +534,8 @@
  *
  * The domain encodes the sleep/wakeup domain AND the first cpu to check
  * (which is always the current cpu).  As we iterate across cpus
+ *
+ * MPSAFE!  MAY BE CALLED WITHOUT THE BGL OR VIA IPI!
  */
 static void
 _wakeup(void *ident, int domain)
@@ -464,14 +545,12 @@
 	struct thread *ntd;
 	globaldata_t gd;
 	struct proc *p;
-#if 0
 #ifdef SMP
 	cpumask_t mask;
 	cpumask_t tmask;
 	int startcpu;
 	int nextcpu;
 #endif
-#endif
 	int id;
 
 	crit_enter();
@@ -484,14 +563,14 @@
 		if (td->td_wchan == ident && 
 		    td->td_wdomain == (domain & PDOMAIN_MASK)
 		) {
+			KKASSERT(td->td_flags & TDF_TSLEEPQ);
+			td->td_flags &= ~TDF_TSLEEPQ;
 			TAILQ_REMOVE(qp, td, td_threadq);
 			if (TAILQ_FIRST(qp) == NULL) {
 				atomic_clear_int(&slpque_cpumasks[id],
 						 gd->gd_cpumask);
 			}
-			td->td_wchan = NULL;
 			if ((p = td->td_proc) != NULL && p->p_stat == SSLEEP) {
-				p->p_stat = SRUN;
 				if (p->p_flag & P_INMEM) {
 					/*
 					 * LWKT scheduled now, there is no
@@ -514,7 +593,6 @@
 		}
 	}
 
-#if 0
 #ifdef SMP
 	/*
 	 * We finished checking the current cpu but there still may be
@@ -594,7 +672,6 @@
 		}
 	}
 #endif
-#endif
 done:
 	crit_exit();
 }
@@ -626,94 +703,41 @@
 }
 
 /*
- * The machine independent parts of mi_switch().
+ * setrunnable()
  *
- * 'p' must be the current process.
- */
-void
-mi_switch(struct proc *p)
-{
-	thread_t td = p->p_thread;
-	struct rlimit *rlim;
-	u_int64_t ttime;
-
-	KKASSERT(td == mycpu->gd_curthread);
-
-	crit_enter_quick(td);
-
-	/*
-	 * Check if the process exceeds its cpu resource allocation.
-	 * If over max, kill it.  Time spent in interrupts is not 
-	 * included.  YYY 64 bit match is expensive.  Ick.
-	 *
-	 * XXX move to the once-a-second process scan
-	 */
-	ttime = td->td_sticks + td->td_uticks;
-	if (p->p_stat != SZOMB && p->p_limit->p_cpulimit != RLIM_INFINITY &&
-	    ttime > p->p_limit->p_cpulimit) {
-		rlim = &p->p_rlimit[RLIMIT_CPU];
-		if (ttime / (rlim_t)1000000 >= rlim->rlim_max) {
-			killproc(p, "exceeded maximum CPU limit");
-		} else {
-			psignal(p, SIGXCPU);
-			if (rlim->rlim_cur < rlim->rlim_max) {
-				/* XXX: we should make a private copy */
-				rlim->rlim_cur += 5;
-			}
-		}
-	}
-
-	/*
-	 * If we are in a SSTOPped state we deschedule ourselves.  
-	 * YYY this needs to be cleaned up, remember that LWKTs stay on
-	 * their run queue which works differently then the user scheduler
-	 * which removes the process from the runq when it runs it.
-	 */
-	mycpu->gd_cnt.v_swtch++;
-	if (p->p_stat == SSTOP)
-		lwkt_deschedule_self(td);
-	lwkt_switch();
-	crit_exit_quick(td);
-}
-
-/*
- * Change process state to be runnable, placing it on the run queue if it
- * is in memory, and awakening the swapper if it isn't in memory.
+ * Set us to the SRUN or SSLEEP state depending on whether we are in a
+ * tsleep or not.  If P_BREAKTSLEEP is set, unsleep the process's thread.
+ *
+ * This function only has an effect if the process is currently in SSLEEP
+ * or SSTOP.
  *
- * This operation MUST OCCUR on the cpu that the thread is sleeping on.
+ * Note that tsleep() is responsible for switching us from SSLEEP to SRUN.
+ *
+ * The MP lock must be held on call.  Note that there is a race against
+ * the TDF_TSLEEPQ check between everyone calling setrunnable() and the
+ * callout timer.   At the moment, all are holding the BGL so we should
+ * be ok.  This function may be called from any cpu.
  */
 void
 setrunnable(struct proc *p)
 {
 	crit_enter();
-
-	switch (p->p_stat) {
-	case 0:
-	case SRUN:
-	case SZOMB:
-	default:
-		panic("setrunnable");
-	case SSTOP:
-	case SSLEEP:
-		unsleep(p->p_thread);	/* e.g. when sending signals */
-		break;
-
-	case SIDL:
-		break;
-	}
-	p->p_stat = SRUN;
-
-	/*
-	 * The process is controlled by LWKT at this point, we do not mess
-	 * around with the userland scheduler until the thread tries to 
-	 * return to user mode.  We do not clear p_slptime or call
-	 * setrunqueue().
-	 */
-	if (p->p_flag & P_INMEM) {
-		lwkt_schedule(p->p_thread);
-	} else {
-		p->p_flag |= P_SWAPINREQ;
-		wakeup((caddr_t)&proc0);
+	if (p->p_stat == SSLEEP || p->p_stat == SSTOP) {
+		if (p->p_thread->td_flags & TDF_TSLEEPQ) {
+			p->p_stat = SSLEEP;
+			if (p->p_flag & P_BREAKTSLEEP) {
+				p->p_flag &= ~P_BREAKTSLEEP;
+				unsleep_and_wakeup_thread(p->p_thread);
+			}
+			/*
+			 * XXX handle P_INMEM in doreti ?  trap code ?
+			 * XXX set P_SWAPINREQ / wakeup (&proc0) ?
+			 */
+		} else {
+			p->p_stat = SRUN;
+			p->p_flag &= ~P_BREAKTSLEEP;
+			lwkt_schedule(p->p_thread);
+		}
 	}
 	crit_exit();
 }
@@ -752,20 +776,6 @@
 }
 
 /*
- * Change the process state to NOT be runnable, removing it from the run
- * queue.
- */
-void
-clrrunnable(struct proc *p, int stat)
-{
-	crit_enter_quick(p->p_thread);
-	if (p->p_stat == SRUN && (p->p_flag & P_ONRUNQ))
-		p->p_usched->remrunqueue(&p->p_lwp);
-	p->p_stat = stat;
-	crit_exit_quick(p->p_thread);
-}
-
-/*
  * Compute a tenex style load average of a quantity on
  * 1, 5 and 15 minute intervals.
  */
Index: kern/lwkt_thread.c
===================================================================
RCS file: /cvs/src/sys/kern/lwkt_thread.c,v
retrieving revision 1.85
diff -u -r1.85 lwkt_thread.c
--- kern/lwkt_thread.c	8 Nov 2005 22:38:43 -0000	1.85
+++ kern/lwkt_thread.c	9 Nov 2005 05:30:37 -0000
@@ -146,7 +146,7 @@
 void
 _lwkt_enqueue(thread_t td)
 {
-    if ((td->td_flags & (TDF_RUNQ|TDF_MIGRATING)) == 0) {
+    if ((td->td_flags & (TDF_RUNQ|TDF_MIGRATING|TDF_TSLEEPQ|TDF_BLOCKQ)) == 0) {
 	int nq = td->td_pri & TDPRI_MASK;
 	struct globaldata *gd = td->td_gd;
 
@@ -170,10 +170,6 @@
     KASSERT(td->td_wait == NULL, ("lwkt_schedule_self(): td_wait not NULL!"));
     KASSERT(td != &td->td_gd->gd_idlethread, ("lwkt_schedule_self(): scheduling gd_idlethread is illegal!"));
     _lwkt_enqueue(td);
-#ifdef _KERNEL
-    if (td->td_proc && td->td_proc->p_stat == SSLEEP)
-	panic("SCHED SELF PANIC");
-#endif
     crit_exit_quick(td);
 }
 
@@ -665,10 +661,12 @@
 		    ntd->td_flags |= TDF_IDLE_NOHLT;
 		    goto using_idle_thread;
 		} else {
+		    ++gd->gd_cnt.v_swtch;
 		    TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq);
 		    TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq);
 		}
 	    } else {
+		++gd->gd_cnt.v_swtch;
 		TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq);
 		TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq);
 	    }
@@ -677,6 +675,7 @@
 	     * THREAD SELECTION FOR A UP MACHINE BUILD.  We don't have to
 	     * worry about tokens or the BGL.
 	     */
+	    ++gd->gd_cnt.v_swtch;
 	    TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq);
 	    TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq);
 #endif
@@ -973,22 +972,7 @@
 {
     globaldata_t mygd = mycpu;
 
-#ifdef	INVARIANTS
     KASSERT(td != &td->td_gd->gd_idlethread, ("lwkt_schedule(): scheduling gd_idlethread is illegal!"));
-    if ((td->td_flags & TDF_PREEMPT_LOCK) == 0 && td->td_proc 
-	&& td->td_proc->p_stat == SSLEEP
-    ) {
-	printf("PANIC schedule curtd = %p (%d %d) target %p (%d %d)\n",
-	    curthread,
-	    curthread->td_proc ? curthread->td_proc->p_pid : -1,
-	    curthread->td_proc ? curthread->td_proc->p_stat : -1,
-	    td,
-	    td->td_proc ? td->td_proc->p_pid : -1,
-	    td->td_proc ? td->td_proc->p_stat : -1
-	);
-	panic("SCHED PANIC");
-    }
-#endif
     crit_enter_gd(mygd);
     if (td == mygd->gd_curthread) {
 	_lwkt_enqueue(td);
@@ -1276,17 +1260,16 @@
     lwkt_gettoken(&ilock, &w->wa_token);
     crit_enter();
     if (w->wa_gen == *gen) {
+again:
 	_lwkt_dequeue(td);
+	td->td_flags |= TDF_BLOCKQ;
 	TAILQ_INSERT_TAIL(&w->wa_waitq, td, td_threadq);
 	++w->wa_count;
 	td->td_wait = w;
 	td->td_wmesg = wmesg;
-    again:
 	lwkt_switch();
-	if (td->td_wmesg != NULL) {
-	    _lwkt_dequeue(td);
-	    goto again;
-	}
+	KKASSERT((td->td_flags & TDF_BLOCKQ) == 0);
+	td->td_wmesg = NULL;
     }
     crit_exit();
     *gen = w->wa_gen;
@@ -1315,9 +1298,10 @@
     while ((td = TAILQ_FIRST(&w->wa_waitq)) != NULL && count) {
 	--count;
 	--w->wa_count;
+	KKASSERT(td->td_flags & TDF_BLOCKQ);
 	TAILQ_REMOVE(&w->wa_waitq, td, td_threadq);
+	td->td_flags &= ~TDF_BLOCKQ;
 	td->td_wait = NULL;
-	td->td_wmesg = NULL;
 #ifdef SMP
 	if (td->td_gd == mycpu) {
 	    _lwkt_enqueue(td);
Index: kern/sys_generic.c
===================================================================
RCS file: /cvs/src/sys/kern/sys_generic.c,v
retrieving revision 1.22
diff -u -r1.22 sys_generic.c
--- kern/sys_generic.c	22 Jun 2005 01:33:21 -0000	1.22
+++ kern/sys_generic.c	9 Nov 2005 05:01:39 -0000
@@ -977,12 +977,17 @@
 	if (p != NULL) {
 		crit_enter();
 		if (p->p_wchan == (caddr_t)&selwait) {
-			if (p->p_stat == SSLEEP)
+			/*
+			 * Flag the process to break the tsleep when 
+			 * setrunnable is called, but only call setrunnable
+			 * here if the process is not in a stopped state.
+			 */
+			p->p_flag |= P_BREAKTSLEEP;
+			if (p->p_stat != SSTOP)
 				setrunnable(p);
-			else
-				unsleep(p->p_thread);
-		} else if (p->p_flag & P_SELECT)
+		} else if (p->p_flag & P_SELECT) {
 			p->p_flag &= ~P_SELECT;
+		}
 		crit_exit();
 	}
 }
Index: kern/sys_process.c
===================================================================
RCS file: /cvs/src/sys/kern/sys_process.c,v
retrieving revision 1.17
diff -u -r1.17 sys_process.c
--- kern/sys_process.c	27 Oct 2005 03:15:47 -0000	1.17
+++ kern/sys_process.c	9 Nov 2005 05:01:56 -0000
@@ -438,10 +438,14 @@
 		}
 
 	sendsig:
-		/* deliver or queue signal */
+		/*
+		 * Deliver or queue signal.  If the process is stopped
+		 * force it to SRUN again.
+		 */
 		crit_enter();
 		if (p->p_stat == SSTOP) {
 			p->p_xstat = data;
+			p->p_flag |= P_BREAKTSLEEP;
 			setrunnable(p);
 		} else if (data) {
 			psignal(p, data);
Index: kern/vfs_sync.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_sync.c,v
retrieving revision 1.7
diff -u -r1.7 vfs_sync.c
--- kern/vfs_sync.c	17 Sep 2005 07:43:00 -0000	1.7
+++ kern/vfs_sync.c	9 Nov 2005 05:58:58 -0000
@@ -266,7 +266,7 @@
 		 * filesystem activity.
 		 */
 		if (time_second == starttime)
-			tsleep(&lbolt, 0, "syncer", 0);
+			tsleep(&lbolt_syncer, 0, "syncer", 0);
 	}
 }
 
@@ -280,12 +280,11 @@
 int
 speedup_syncer(void)
 {
-	crit_enter();
-	if (updatethread->td_wchan == &lbolt) { /* YYY */
-		unsleep(updatethread);
-		lwkt_schedule(updatethread);
-	}
-	crit_exit();
+	/*
+	 * Don't bother protecting the test.  unsleep_and_wakeup_thread()
+	 * will only do something real if the thread is in the right state.
+	 */
+	wakeup(&lbolt_syncer);
 	if (rushjob < syncdelay / 2) {
 		rushjob += 1;
 		stat_rush_requests += 1;
Index: sys/kernel.h
===================================================================
RCS file: /cvs/src/sys/sys/kernel.h,v
retrieving revision 1.17
diff -u -r1.17 kernel.h
--- sys/kernel.h	10 Jun 2005 23:59:33 -0000	1.17
+++ sys/kernel.h	9 Nov 2005 05:58:37 -0000
@@ -76,6 +76,7 @@
 extern int profhz;			/* profiling clock's frequency */
 extern int ticks;
 extern int lbolt;			/* once a second sleep address */
+extern int lbolt_syncer;		/* approx 1 hz but may be sped up */
 
 #endif /* _KERNEL */
 
Index: sys/proc.h
===================================================================
RCS file: /cvs/src/sys/sys/proc.h,v
retrieving revision 1.71
diff -u -r1.71 proc.h
--- sys/proc.h	8 Nov 2005 20:47:02 -0000	1.71
+++ sys/proc.h	9 Nov 2005 05:54:41 -0000
@@ -315,6 +315,7 @@
 #define	P_ADVLOCK	0x00001	/* Process may hold a POSIX advisory lock. */
 #define	P_CONTROLT	0x00002	/* Has a controlling terminal. */
 #define	P_INMEM		0x00004	/* Loaded into memory. */
+#define P_BREAKTSLEEP	0x00008	/* Event pending, break tsleep on sigcont */
 #define	P_PPWAIT	0x00010	/* Parent is waiting for child to exec/exit. */
 #define	P_PROFIL	0x00020	/* Has started profiling. */
 #define P_SELECT	0x00040 /* Selecting; wakeup/waiting danger. */
@@ -457,14 +458,14 @@
 void	relscurproc(struct proc *curp);
 int	p_trespass (struct ucred *cr1, struct ucred *cr2);
 void	setrunnable (struct proc *);
-void	clrrunnable (struct proc *, int stat);
+void	clrrunnable (struct proc *);
 void	sleep_gdinit (struct globaldata *);
 int	suser (struct thread *td);
 int	suser_proc (struct proc *p);
 int	suser_cred (struct ucred *cred, int flag);
 void	cpu_heavy_switch (struct thread *);
 void	cpu_lwkt_switch (struct thread *);
-void	unsleep (struct thread *);
+void	unsleep_and_wakeup_thread(struct thread *);
 
 void	cpu_proc_exit (void) __dead2;
 void	cpu_thread_exit (void) __dead2;
Index: sys/thread.h
===================================================================
RCS file: /cvs/src/sys/sys/thread.h,v
retrieving revision 1.72
diff -u -r1.72 thread.h
--- sys/thread.h	8 Nov 2005 22:40:00 -0000	1.72
+++ sys/thread.h	9 Nov 2005 03:52:31 -0000
@@ -279,6 +279,11 @@
  * LWKT threads stay on their (per-cpu) run queue while running, not to
  * be confused with user processes which are removed from the user scheduling
  * run queue while actually running.
+ *
+ * td_threadq can represent the thread on one of three queues... the LWKT
+ * run queue, a tsleep queue, or an lwkt blocking queue.  The LWKT subsystem
+ * does not allow a thread to be scheduled if it already resides on some
+ * queue.
  */
 #define TDF_RUNNING		0x0001	/* thread still active */
 #define TDF_RUNQ		0x0002	/* on an LWKT run queue */
@@ -287,6 +292,7 @@
 #define TDF_IDLE_NOHLT		0x0010	/* we need to spin */
 #define TDF_MIGRATING		0x0020	/* thread is being migrated */
 #define TDF_SINTR		0x0040	/* interruptability hint for 'ps' */
+#define TDF_TSLEEPQ		0x0080	/* on a tsleep wait queue */
 
 #define TDF_SYSTHREAD		0x0100	/* system thread */
 #define TDF_ALLOCATED_THREAD	0x0200	/* zalloc allocated thread */
@@ -300,6 +306,7 @@
 #define TDF_NORESCHED		0x00020000	/* Do not reschedule on wake */
 #define TDF_BLOCKED		0x00040000	/* Thread is blocked */
 #define TDF_PANICWARN		0x00080000	/* panic warning in switch */
+#define TDF_BLOCKQ		0x00100000	/* on block queue */
 
 /*
  * Thread priorities.  Typically only one thread from any given
Index: vfs/procfs/procfs_ctl.c
===================================================================
retrieving revision 1.7
diff -u -r1.7 procfs_ctl.c
--- vfs/procfs/procfs_ctl.c	2 May 2004 03:05:11 -0000	1.7
+++ vfs/procfs/procfs_ctl.c	9 Nov 2005 05:04:40 -0000
@@ -266,6 +266,11 @@
 		panic("procfs_control");
 	}
 
+	/*
+	 * If the process is in a stopped state, make it runnable again.
+	 * Do not set P_BREAKTSLEEP - that is, do not break a tsleep that
+	 * might be in progress.
+	 */
 	if (p->p_stat == SSTOP)
 		setrunnable(p);
 	return (0);
@@ -310,6 +315,10 @@
 #ifdef FIX_SSTEP
 				FIX_SSTEP(p);
 #endif
+				/*
+				 * Make the process runnable but do not
+				 * break its tsleep.
+				 */
 				setrunnable(p);
 			} else {
 				psignal(p, nm->nm_val);





More information about the Commits mailing list