syscall messaging interface API
Matthew Dillon
dillon at apollo.backplane.com
Thu Jul 24 10:55:17 PDT 2003
:> Syscalls speed is critical if you use a hybrid user/kernel space thread
:> blocking/cond-var primitive to do 1:1 threading. Linux's "futex" mechanism
:> exploits and optimizes it to serve the "greater threading good". :)
:>
:> It shouldn't be ignored.
:
:Even in Linux, the contested case is rare. I've found that a
:*busy* login server handles on the order of 10,000 syscalls per
:second, while syscall overhead is on the order of 500 ns. The
:optimized syscall path in Linux 2.4.9 saves about 11% over FreeBSD
:4.X, at the expense of special cases for syscalls with more than
:four arguments and other kludges. So the potential improvement
:we're talking about here is probably well under 0.01%.
:
:Even if you did manage to come up with a program where ``syscall
:throughput'' is significant, batching the calls as Matt suggested
:would provide much better performance benefits than microoptimization.
Right. I really think batching is the solution to these sorts
of specialized situations. For example, if you had a massively
multi-threaded program doing hundreds of thousands of I/O ops a
second it would be quite reasonable for the threading system to
simply build up chains of syscall messages as the threads try to
run them and then dispatch them all to the kernel in one fell
swoop.
The absolute best cooperative-switching code I can write that's
still useable in a real threading system takes about 36ns to
switch between userland threads on a 1.2GHz P4 (around 46ns
on my 1GHz P3). I've included it below as an example. With
such low overhead this means that one can basically use a
userland threading abstraction to treat system calls as normal
blocking entities, but have the threading system queue the
system call messages and then dispatch a whole bunch at once
rather then running them one at a time.
-Matt
Matthew Dillon
<dillon at xxxxxxxxxxxxx>
/*
* sw.S
*/
.text
.globl qswitch, qrestore, qinit, qirestore, qstart
#if 0
#define PUSHAL pushal /* these are slow */
#define POPAL popal
#endif
#if 1
/* call-used only */
#define PUSHAL \
pushl %ebx ; \
pushl %esi ; \
pushl %edi ; \
pushl %ebp
#define POPAL \
popl %ebp ; \
popl %edi ; \
popl %esi ; \
popl %ebx
#endif
#if 0
/* call-used only */
#define PUSHAL \
subl $16,%esp ; \
movl %ebx,(%esp) ; \
movl %esi,4(%esp) ; \
movl %edi,8(%esp) ; \
movl %ebp,12(%esp)
#define POPAL \
movl (%esp),%ebx ; \
movl 4(%esp),%esi ; \
movl 8(%esp),%edi ; \
movl 12(%esp),%ebp ; \
addl $16,%esp
#endif
/* qswitch(&olddesc, newdesc) */
qswitch:
movl 8(%esp),%eax /* new state */
movl 4(%esp),%edx /* save old state */
PUSHAL
pushl $qrestore
movl %esp,(%edx)
movl %eax,%esp
ret
/* qstart(newdesc) */
qstart:
movl 4(%esp),%esp
ret
qrestore:
POPAL
ret
/* newdesc = qinit(topofstk, func, arg) */
qinit:
movl 4(%esp),%eax
subl $16,%eax
movl $qirestore,0(%eax)
movl 8(%esp),%ecx
movl %ecx,4(%eax)
movl $0,8(%eax)
movl 12(%esp),%ecx
movl %ecx,12(%eax)
ret /* return eax */
qirestore:
ret
/*
* swtest.c
*
* Test best-case userland thread switching speed
*/
void qswitch(int **oldsw, int *newsw);
void qstart(int *newsw);
int *qinit(void *topstk, void *func, void *arg);
static void do_task1(void *arg);
static void do_task2(void *arg);
int *task1;
int *task2;
char stk1[16384];
char stk2[16384];
int count;
int
main(int ac, char **av)
{
task1 = qinit(stk1 + sizeof(stk1), do_task1, NULL);
task2 = qinit(stk2 + sizeof(stk2), do_task2, NULL);
/*start_timing();*/
qstart(task1);
}
static void
do_task1(void *arg)
{
for (;;) {
qswitch(&task1, task2);
if (++count > 10000000) {
/*stop_timing(count, "uthread_switch");*/
exit(0);
}
}
}
static void
do_task2(void *arg)
{
for (;;) {
++count;
qswitch(&task2, task1);
}
}
More information about the Kernel
mailing list