mlockall / munlockall patch

Venkatesh Srinivas me at endeavour.zapto.org
Wed Nov 24 05:49:10 PST 2010


Hi,

This patch is the start of mlockall/munlockall support; it adds a
field to each vm_map, flags, to support mlockall(MCL_FUTURE) {from
FreeBSD} and modifies mmap() and brk() to test for that flag and wire
in any newly ill-gotten pages. It also implements munlockall(). This
code has been tested in a vkernel, seems to work okay.

Questions:
1) what permissions do we want to check for mlockall()?
2) current, I read the vm_map flags under the per-map lock. this is
probably overkill for mmap and brk; should I read the value directly
instead?
3) in munlockall(), I've marked a section 'XXX', where it might be
possible to hit an in-transition map entry (entry->eflags ==
MAP_ENTRY_IN_TRANSITION). I don't understand places in the vm where
that is tested for and the map lock released around it... I didn't see
any place where that was set and the per-map lock released afterwards,
perhaps I'm missing something?
4) are automatic stack growth pages supposed to be affected by MCL_FUTURE?
5) are pages from the 43bsd compat code supposed to be affected by MCL_FUTURE?

diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index 49f3a65..95253f1 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -486,6 +486,7 @@ vm_map_init(struct vm_map *map, vm_offset_t min,
vm_offset_t                                              max, pmap_t
pmap)
        map->first_free = &map->header;
        map->hint = &map->header;
        map->timestamp = 0;
+       map->flags = 0;
        lockinit(&map->lock, "thrd_sleep", 0, 0);
 }

@@ -3405,6 +3406,10 @@ Retry:
                }
        }

+       if ((rv == KERN_SUCCESS) && (map->flags & MAP_WIREFUTURE)) {
+
+       }
+
 done:
        if (use_read_lock)
                vm_map_unlock_read(map);
diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h
index 5061ffb..93c6a39 100644
--- a/sys/vm/vm_map.h
+++ b/sys/vm/vm_map.h
@@ -110,6 +110,7 @@ RB_PROTOTYPE(vm_map_rb_tree, vm_map_entry,
rb_entry, rb_vm_m
ap_compare);
  *     vm_map_entry_t          an entry in an address map.
  */

+typedef u_int vm_flags_t;
 typedef u_int vm_eflags_t;

 /*
@@ -224,12 +225,18 @@ struct vm_map {
        vm_map_entry_t hint;            /* hint for quick lookups */
        unsigned int timestamp;         /* Version number */
        vm_map_entry_t first_free;      /* First free space hint */
+       vm_flags_t flags;               /* flags for this vm_map */
        struct pmap *pmap;              /* Physical map */
 #define        min_offset              header.start
 #define max_offset             header.end
 };

 /*
+ * vm_flags_t values
+ */
+#define MAP_WIREFUTURE         0x01    /* wire all future pages */
+
+/*
  * Registered upcall
  */
 struct upcall;
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index a5beeb2..f41d4c6 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -1026,29 +1026,76 @@ sys_mlock(struct mlock_args *uap)
 }

 /*
- * mlockall_args(int how)
- *
- * Dummy routine, doesn't actually do anything.
+ * mlockall(int how)
  *
  * No requirements
  */
 int
 sys_mlockall(struct mlockall_args *uap)
 {
-       return (ENOSYS);
+       struct thread *td = curthread;
+       struct proc *p = td->td_proc;
+       vm_map_t map = &p->p_vmspace->vm_map;
+       int how = uap->how;
+       int rc = KERN_SUCCESS;
+
+       vm_map_lock(map);
+
+       if (how & MCL_FUTURE)
+               map->flags |= MAP_WIREFUTURE;
+
+       if (how & MCL_CURRENT) {
+               rc = ENOSYS;
+       }
+
+       vm_map_unlock(map);
+
+       return (rc == KERN_SUCCESS) ? 0 : rc;
 }

 /*
- * munlockall_args(void)
+ * munlockall(void)
  *
- * Dummy routine, doesn't actually do anything.
+ *     Unwire all user-wired map entries, cancel MCL_FUTURE from mlockall
  *
  * No requirements
  */
 int
 sys_munlockall(struct munlockall_args *uap)
 {
-       return (ENOSYS);
+       struct thread *td = curthread;
+       struct proc *p = td->td_proc;
+       vm_map_t map = &p->p_vmspace->vm_map;
+       vm_map_entry_t entry;
+       int rc = KERN_SUCCESS;
+
+       vm_map_lock(map);
+
+       /* Clear MAP_WIREFUTURE to cancel mlockall(MCL_FUTURE) */
+       map->flags &= ~MAP_WIREFUTURE;
+
+       for (entry = map->header.next;
+            entry != &map->header;
+            entry = entry->next) {
+               if ((entry->eflags & MAP_ENTRY_USER_WIRED) == 0)
+                       continue;
+
+               /* XXX: Deal with MAP_ENTRY_IN_TRANSITION here? */
+
+               KASSERT(entry->wired_count > 0,
+                       ("wired_count was 0 with USER_WIRED set! %p", entry));
+
+               /* Drop wired count, if it hits zero, unwire the entry */
+               entry->eflags &= ~MAP_ENTRY_USER_WIRED;
+               entry->wired_count--;
+               if (entry->wired_count == 0)
+                       vm_fault_unwire(map, entry);
+       }
+
+       map->timestamp++;
+       vm_map_unlock(map);
+
+       return (rc);
 }

 /*
@@ -1111,6 +1158,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr,
vm_size_t size, v                                             m_prot_t
prot,
        int rv = KERN_SUCCESS;
        off_t objsize;
        int docow;
+       int vflags;

        if (size == 0)
                return (0);
@@ -1315,6 +1363,16 @@ vm_mmap(vm_map_t map, vm_offset_t *addr,
vm_size_t size,                                              vm_prot_t
prot,
        }

        /*
+        * If process has marked all future mappings to be wired, do so
+        */
+       vm_map_lock_read(map);
+       vflags = map->flags;
+       vm_map_unlock_read(map);
+
+       if ((rv == KERN_SUCCESS) && (vflags & MAP_WIREFUTURE))
+               vm_map_unwire(map, *addr, *addr + size, FALSE);
+
+       /*
         * Set the access time on the vnode
         */
        if (vp != NULL)
diff --git a/sys/vm/vm_unix.c b/sys/vm/vm_unix.c
index 428c10f..2cc4638 100644
--- a/sys/vm/vm_unix.c
+++ b/sys/vm/vm_unix.c
@@ -75,6 +75,7 @@ sys_obreak(struct obreak_args *uap)
        vm_offset_t new, old, base;
        int rv;
        int error;
+       int vflags;

        error = 0;
        lwkt_gettoken(&vm_token);
@@ -125,6 +126,14 @@ sys_obreak(struct obreak_args *uap)
                        goto done;
                }
                vm->vm_dsize += btoc(diff);
+
+               vm_map_lock_read(&vm->vm_map);
+               vflags = vm->vm_map.flags;
+               vm_map_unlock_read(&vm->vm_map);
+
+               if (vflags & MAP_WIREFUTURE)
+                       vm_map_unwire(&vm->vm_map, old, new, FALSE);
+
        } else if (new < old) {
                rv = vm_map_remove(&vm->vm_map, new, old);
                if (rv != KERN_SUCCESS) {
bash-3.2$ reset
Erase is backspace.
bash-3.2$ reset
Erase is backspace.
bash-3.2$ cat ~/mlockall.patch2
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index 49f3a65..95253f1 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -486,6 +486,7 @@ vm_map_init(struct vm_map *map, vm_offset_t min,
vm_offset_t max, pmap_t pmap)
        map->first_free = &map->header;
        map->hint = &map->header;
        map->timestamp = 0;
+       map->flags = 0;
        lockinit(&map->lock, "thrd_sleep", 0, 0);
 }

@@ -3405,6 +3406,10 @@ Retry:
                }
        }

+       if ((rv == KERN_SUCCESS) && (map->flags & MAP_WIREFUTURE)) {
+
+       }
+
 done:
        if (use_read_lock)
                vm_map_unlock_read(map);
diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h
index 5061ffb..93c6a39 100644
--- a/sys/vm/vm_map.h
+++ b/sys/vm/vm_map.h
@@ -110,6 +110,7 @@ RB_PROTOTYPE(vm_map_rb_tree, vm_map_entry,
rb_entry, rb_vm_map_compare);
  *     vm_map_entry_t          an entry in an address map.
  */

+typedef u_int vm_flags_t;
 typedef u_int vm_eflags_t;

 /*
@@ -224,12 +225,18 @@ struct vm_map {
        vm_map_entry_t hint;            /* hint for quick lookups */
        unsigned int timestamp;         /* Version number */
        vm_map_entry_t first_free;      /* First free space hint */
+       vm_flags_t flags;               /* flags for this vm_map */
        struct pmap *pmap;              /* Physical map */
 #define        min_offset              header.start
 #define max_offset             header.end
 };

 /*
+ * vm_flags_t values
+ */
+#define MAP_WIREFUTURE         0x01    /* wire all future pages */
+
+/*
  * Registered upcall
  */
 struct upcall;
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index a5beeb2..f41d4c6 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -1026,29 +1026,76 @@ sys_mlock(struct mlock_args *uap)
 }

 /*
- * mlockall_args(int how)
- *
- * Dummy routine, doesn't actually do anything.
+ * mlockall(int how)
  *
  * No requirements
  */
 int
 sys_mlockall(struct mlockall_args *uap)
 {
-       return (ENOSYS);
+       struct thread *td = curthread;
+       struct proc *p = td->td_proc;
+       vm_map_t map = &p->p_vmspace->vm_map;
+       int how = uap->how;
+       int rc = KERN_SUCCESS;
+
+       vm_map_lock(map);
+
+       if (how & MCL_FUTURE)
+               map->flags |= MAP_WIREFUTURE;
+
+       if (how & MCL_CURRENT) {
+               rc = ENOSYS;
+       }
+
+       vm_map_unlock(map);
+
+       return (rc == KERN_SUCCESS) ? 0 : rc;
 }

 /*
- * munlockall_args(void)
+ * munlockall(void)
  *
- * Dummy routine, doesn't actually do anything.
+ *     Unwire all user-wired map entries, cancel MCL_FUTURE from mlockall
  *
  * No requirements
  */
 int
 sys_munlockall(struct munlockall_args *uap)
 {
-       return (ENOSYS);
+       struct thread *td = curthread;
+       struct proc *p = td->td_proc;
+       vm_map_t map = &p->p_vmspace->vm_map;
+       vm_map_entry_t entry;
+       int rc = KERN_SUCCESS;
+
+       vm_map_lock(map);
+
+       /* Clear MAP_WIREFUTURE to cancel mlockall(MCL_FUTURE) */
+       map->flags &= ~MAP_WIREFUTURE;
+
+       for (entry = map->header.next;
+            entry != &map->header;
+            entry = entry->next) {
+               if ((entry->eflags & MAP_ENTRY_USER_WIRED) == 0)
+                       continue;
+
+               /* XXX: Deal with MAP_ENTRY_IN_TRANSITION here? */
+
+               KASSERT(entry->wired_count > 0,
+                       ("wired_count was 0 with USER_WIRED set! %p", entry));
+
+               /* Drop wired count, if it hits zero, unwire the entry */
+               entry->eflags &= ~MAP_ENTRY_USER_WIRED;
+               entry->wired_count--;
+               if (entry->wired_count == 0)
+                       vm_fault_unwire(map, entry);
+       }
+
+       map->timestamp++;
+       vm_map_unlock(map);
+
+       return (rc);
 }

 /*
@@ -1111,6 +1158,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr,
vm_size_t size, vm_prot_t prot,
        int rv = KERN_SUCCESS;
        off_t objsize;
        int docow;
+       int vflags;

        if (size == 0)
                return (0);
@@ -1315,6 +1363,16 @@ vm_mmap(vm_map_t map, vm_offset_t *addr,
vm_size_t size, vm_prot_t prot,
        }

        /*
+        * If process has marked all future mappings to be wired, do so
+        */
+       vm_map_lock_read(map);
+       vflags = map->flags;
+       vm_map_unlock_read(map);
+
+       if ((rv == KERN_SUCCESS) && (vflags & MAP_WIREFUTURE))
+               vm_map_unwire(map, *addr, *addr + size, FALSE);
+
+       /*
         * Set the access time on the vnode
         */
        if (vp != NULL)
diff --git a/sys/vm/vm_unix.c b/sys/vm/vm_unix.c
index 428c10f..2cc4638 100644
--- a/sys/vm/vm_unix.c
+++ b/sys/vm/vm_unix.c
@@ -75,6 +75,7 @@ sys_obreak(struct obreak_args *uap)
        vm_offset_t new, old, base;
        int rv;
        int error;
+       int vflags;

        error = 0;
        lwkt_gettoken(&vm_token);
@@ -125,6 +126,14 @@ sys_obreak(struct obreak_args *uap)
                        goto done;
                }
                vm->vm_dsize += btoc(diff);
+
+               vm_map_lock_read(&vm->vm_map);
+               vflags = vm->vm_map.flags;
+               vm_map_unlock_read(&vm->vm_map);
+
+               if (vflags & MAP_WIREFUTURE)
+                       vm_map_unwire(&vm->vm_map, old, new, FALSE);
+
        } else if (new < old) {
                rv = vm_map_remove(&vm->vm_map, new, old);
                if (rv != KERN_SUCCESS) {

Thanks!
-- vs





More information about the Kernel mailing list