mlockall / munlockall patch
Venkatesh Srinivas
me at endeavour.zapto.org
Wed Nov 24 05:49:10 PST 2010
Hi,
This patch is the start of mlockall/munlockall support; it adds a
field to each vm_map, flags, to support mlockall(MCL_FUTURE) {from
FreeBSD} and modifies mmap() and brk() to test for that flag and wire
in any newly ill-gotten pages. It also implements munlockall(). This
code has been tested in a vkernel, seems to work okay.
Questions:
1) what permissions do we want to check for mlockall()?
2) current, I read the vm_map flags under the per-map lock. this is
probably overkill for mmap and brk; should I read the value directly
instead?
3) in munlockall(), I've marked a section 'XXX', where it might be
possible to hit an in-transition map entry (entry->eflags ==
MAP_ENTRY_IN_TRANSITION). I don't understand places in the vm where
that is tested for and the map lock released around it... I didn't see
any place where that was set and the per-map lock released afterwards,
perhaps I'm missing something?
4) are automatic stack growth pages supposed to be affected by MCL_FUTURE?
5) are pages from the 43bsd compat code supposed to be affected by MCL_FUTURE?
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index 49f3a65..95253f1 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -486,6 +486,7 @@ vm_map_init(struct vm_map *map, vm_offset_t min,
vm_offset_t max, pmap_t
pmap)
map->first_free = &map->header;
map->hint = &map->header;
map->timestamp = 0;
+ map->flags = 0;
lockinit(&map->lock, "thrd_sleep", 0, 0);
}
@@ -3405,6 +3406,10 @@ Retry:
}
}
+ if ((rv == KERN_SUCCESS) && (map->flags & MAP_WIREFUTURE)) {
+
+ }
+
done:
if (use_read_lock)
vm_map_unlock_read(map);
diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h
index 5061ffb..93c6a39 100644
--- a/sys/vm/vm_map.h
+++ b/sys/vm/vm_map.h
@@ -110,6 +110,7 @@ RB_PROTOTYPE(vm_map_rb_tree, vm_map_entry,
rb_entry, rb_vm_m
ap_compare);
* vm_map_entry_t an entry in an address map.
*/
+typedef u_int vm_flags_t;
typedef u_int vm_eflags_t;
/*
@@ -224,12 +225,18 @@ struct vm_map {
vm_map_entry_t hint; /* hint for quick lookups */
unsigned int timestamp; /* Version number */
vm_map_entry_t first_free; /* First free space hint */
+ vm_flags_t flags; /* flags for this vm_map */
struct pmap *pmap; /* Physical map */
#define min_offset header.start
#define max_offset header.end
};
/*
+ * vm_flags_t values
+ */
+#define MAP_WIREFUTURE 0x01 /* wire all future pages */
+
+/*
* Registered upcall
*/
struct upcall;
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index a5beeb2..f41d4c6 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -1026,29 +1026,76 @@ sys_mlock(struct mlock_args *uap)
}
/*
- * mlockall_args(int how)
- *
- * Dummy routine, doesn't actually do anything.
+ * mlockall(int how)
*
* No requirements
*/
int
sys_mlockall(struct mlockall_args *uap)
{
- return (ENOSYS);
+ struct thread *td = curthread;
+ struct proc *p = td->td_proc;
+ vm_map_t map = &p->p_vmspace->vm_map;
+ int how = uap->how;
+ int rc = KERN_SUCCESS;
+
+ vm_map_lock(map);
+
+ if (how & MCL_FUTURE)
+ map->flags |= MAP_WIREFUTURE;
+
+ if (how & MCL_CURRENT) {
+ rc = ENOSYS;
+ }
+
+ vm_map_unlock(map);
+
+ return (rc == KERN_SUCCESS) ? 0 : rc;
}
/*
- * munlockall_args(void)
+ * munlockall(void)
*
- * Dummy routine, doesn't actually do anything.
+ * Unwire all user-wired map entries, cancel MCL_FUTURE from mlockall
*
* No requirements
*/
int
sys_munlockall(struct munlockall_args *uap)
{
- return (ENOSYS);
+ struct thread *td = curthread;
+ struct proc *p = td->td_proc;
+ vm_map_t map = &p->p_vmspace->vm_map;
+ vm_map_entry_t entry;
+ int rc = KERN_SUCCESS;
+
+ vm_map_lock(map);
+
+ /* Clear MAP_WIREFUTURE to cancel mlockall(MCL_FUTURE) */
+ map->flags &= ~MAP_WIREFUTURE;
+
+ for (entry = map->header.next;
+ entry != &map->header;
+ entry = entry->next) {
+ if ((entry->eflags & MAP_ENTRY_USER_WIRED) == 0)
+ continue;
+
+ /* XXX: Deal with MAP_ENTRY_IN_TRANSITION here? */
+
+ KASSERT(entry->wired_count > 0,
+ ("wired_count was 0 with USER_WIRED set! %p", entry));
+
+ /* Drop wired count, if it hits zero, unwire the entry */
+ entry->eflags &= ~MAP_ENTRY_USER_WIRED;
+ entry->wired_count--;
+ if (entry->wired_count == 0)
+ vm_fault_unwire(map, entry);
+ }
+
+ map->timestamp++;
+ vm_map_unlock(map);
+
+ return (rc);
}
/*
@@ -1111,6 +1158,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr,
vm_size_t size, v m_prot_t
prot,
int rv = KERN_SUCCESS;
off_t objsize;
int docow;
+ int vflags;
if (size == 0)
return (0);
@@ -1315,6 +1363,16 @@ vm_mmap(vm_map_t map, vm_offset_t *addr,
vm_size_t size, vm_prot_t
prot,
}
/*
+ * If process has marked all future mappings to be wired, do so
+ */
+ vm_map_lock_read(map);
+ vflags = map->flags;
+ vm_map_unlock_read(map);
+
+ if ((rv == KERN_SUCCESS) && (vflags & MAP_WIREFUTURE))
+ vm_map_unwire(map, *addr, *addr + size, FALSE);
+
+ /*
* Set the access time on the vnode
*/
if (vp != NULL)
diff --git a/sys/vm/vm_unix.c b/sys/vm/vm_unix.c
index 428c10f..2cc4638 100644
--- a/sys/vm/vm_unix.c
+++ b/sys/vm/vm_unix.c
@@ -75,6 +75,7 @@ sys_obreak(struct obreak_args *uap)
vm_offset_t new, old, base;
int rv;
int error;
+ int vflags;
error = 0;
lwkt_gettoken(&vm_token);
@@ -125,6 +126,14 @@ sys_obreak(struct obreak_args *uap)
goto done;
}
vm->vm_dsize += btoc(diff);
+
+ vm_map_lock_read(&vm->vm_map);
+ vflags = vm->vm_map.flags;
+ vm_map_unlock_read(&vm->vm_map);
+
+ if (vflags & MAP_WIREFUTURE)
+ vm_map_unwire(&vm->vm_map, old, new, FALSE);
+
} else if (new < old) {
rv = vm_map_remove(&vm->vm_map, new, old);
if (rv != KERN_SUCCESS) {
bash-3.2$ reset
Erase is backspace.
bash-3.2$ reset
Erase is backspace.
bash-3.2$ cat ~/mlockall.patch2
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index 49f3a65..95253f1 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -486,6 +486,7 @@ vm_map_init(struct vm_map *map, vm_offset_t min,
vm_offset_t max, pmap_t pmap)
map->first_free = &map->header;
map->hint = &map->header;
map->timestamp = 0;
+ map->flags = 0;
lockinit(&map->lock, "thrd_sleep", 0, 0);
}
@@ -3405,6 +3406,10 @@ Retry:
}
}
+ if ((rv == KERN_SUCCESS) && (map->flags & MAP_WIREFUTURE)) {
+
+ }
+
done:
if (use_read_lock)
vm_map_unlock_read(map);
diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h
index 5061ffb..93c6a39 100644
--- a/sys/vm/vm_map.h
+++ b/sys/vm/vm_map.h
@@ -110,6 +110,7 @@ RB_PROTOTYPE(vm_map_rb_tree, vm_map_entry,
rb_entry, rb_vm_map_compare);
* vm_map_entry_t an entry in an address map.
*/
+typedef u_int vm_flags_t;
typedef u_int vm_eflags_t;
/*
@@ -224,12 +225,18 @@ struct vm_map {
vm_map_entry_t hint; /* hint for quick lookups */
unsigned int timestamp; /* Version number */
vm_map_entry_t first_free; /* First free space hint */
+ vm_flags_t flags; /* flags for this vm_map */
struct pmap *pmap; /* Physical map */
#define min_offset header.start
#define max_offset header.end
};
/*
+ * vm_flags_t values
+ */
+#define MAP_WIREFUTURE 0x01 /* wire all future pages */
+
+/*
* Registered upcall
*/
struct upcall;
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index a5beeb2..f41d4c6 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -1026,29 +1026,76 @@ sys_mlock(struct mlock_args *uap)
}
/*
- * mlockall_args(int how)
- *
- * Dummy routine, doesn't actually do anything.
+ * mlockall(int how)
*
* No requirements
*/
int
sys_mlockall(struct mlockall_args *uap)
{
- return (ENOSYS);
+ struct thread *td = curthread;
+ struct proc *p = td->td_proc;
+ vm_map_t map = &p->p_vmspace->vm_map;
+ int how = uap->how;
+ int rc = KERN_SUCCESS;
+
+ vm_map_lock(map);
+
+ if (how & MCL_FUTURE)
+ map->flags |= MAP_WIREFUTURE;
+
+ if (how & MCL_CURRENT) {
+ rc = ENOSYS;
+ }
+
+ vm_map_unlock(map);
+
+ return (rc == KERN_SUCCESS) ? 0 : rc;
}
/*
- * munlockall_args(void)
+ * munlockall(void)
*
- * Dummy routine, doesn't actually do anything.
+ * Unwire all user-wired map entries, cancel MCL_FUTURE from mlockall
*
* No requirements
*/
int
sys_munlockall(struct munlockall_args *uap)
{
- return (ENOSYS);
+ struct thread *td = curthread;
+ struct proc *p = td->td_proc;
+ vm_map_t map = &p->p_vmspace->vm_map;
+ vm_map_entry_t entry;
+ int rc = KERN_SUCCESS;
+
+ vm_map_lock(map);
+
+ /* Clear MAP_WIREFUTURE to cancel mlockall(MCL_FUTURE) */
+ map->flags &= ~MAP_WIREFUTURE;
+
+ for (entry = map->header.next;
+ entry != &map->header;
+ entry = entry->next) {
+ if ((entry->eflags & MAP_ENTRY_USER_WIRED) == 0)
+ continue;
+
+ /* XXX: Deal with MAP_ENTRY_IN_TRANSITION here? */
+
+ KASSERT(entry->wired_count > 0,
+ ("wired_count was 0 with USER_WIRED set! %p", entry));
+
+ /* Drop wired count, if it hits zero, unwire the entry */
+ entry->eflags &= ~MAP_ENTRY_USER_WIRED;
+ entry->wired_count--;
+ if (entry->wired_count == 0)
+ vm_fault_unwire(map, entry);
+ }
+
+ map->timestamp++;
+ vm_map_unlock(map);
+
+ return (rc);
}
/*
@@ -1111,6 +1158,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr,
vm_size_t size, vm_prot_t prot,
int rv = KERN_SUCCESS;
off_t objsize;
int docow;
+ int vflags;
if (size == 0)
return (0);
@@ -1315,6 +1363,16 @@ vm_mmap(vm_map_t map, vm_offset_t *addr,
vm_size_t size, vm_prot_t prot,
}
/*
+ * If process has marked all future mappings to be wired, do so
+ */
+ vm_map_lock_read(map);
+ vflags = map->flags;
+ vm_map_unlock_read(map);
+
+ if ((rv == KERN_SUCCESS) && (vflags & MAP_WIREFUTURE))
+ vm_map_unwire(map, *addr, *addr + size, FALSE);
+
+ /*
* Set the access time on the vnode
*/
if (vp != NULL)
diff --git a/sys/vm/vm_unix.c b/sys/vm/vm_unix.c
index 428c10f..2cc4638 100644
--- a/sys/vm/vm_unix.c
+++ b/sys/vm/vm_unix.c
@@ -75,6 +75,7 @@ sys_obreak(struct obreak_args *uap)
vm_offset_t new, old, base;
int rv;
int error;
+ int vflags;
error = 0;
lwkt_gettoken(&vm_token);
@@ -125,6 +126,14 @@ sys_obreak(struct obreak_args *uap)
goto done;
}
vm->vm_dsize += btoc(diff);
+
+ vm_map_lock_read(&vm->vm_map);
+ vflags = vm->vm_map.flags;
+ vm_map_unlock_read(&vm->vm_map);
+
+ if (vflags & MAP_WIREFUTURE)
+ vm_map_unwire(&vm->vm_map, old, new, FALSE);
+
} else if (new < old) {
rv = vm_map_remove(&vm->vm_map, new, old);
if (rv != KERN_SUCCESS) {
Thanks!
-- vs
More information about the Kernel
mailing list