PAT support
Aggelos Economopoulos
aoiko at cc.ece.ntua.gr
Mon Apr 19 10:14:49 PDT 2010
Here's a patch for Page Attribute Table support I ported from FreeBSD
quite a while back. I'm sending it to submit@ in case someone will get
interested and deal w/ the cpu identification part (probably integrating
the FreeBSD code for that too) so we can integrate it. It will get used
more and more in new code and will significantly help porting.
As you'll notice in pmap_init_pat()
+#ifdef notyet
+ if (cpu_vendor_id != CPU_VENDOR_INTEL ||
+ (I386_CPU_FAMILY(cpu_id) == 6 && I386_CPU_MODEL(cpu_id) >=
0xe)) {
+#else
+ if (!0) {
+#endif
so we need at least I386_CPU_{FAMILY,MODEL} (to be found in
i386/include/specialreg.h) and having the companion code would be nice
too (ISTR it was straightforward but not trivial, not sure if I remember
correctly now. Looks trivial enough :/)
I *have* tried using the PMAP in mxge(4) so I don't think you'll get any
panics ;)
Aggelos
diff --git a/sys/cpu/i386/include/cpufunc.h b/sys/cpu/i386/include/cpufunc.h
index 9b7e80e..9c57446 100644
--- a/sys/cpu/i386/include/cpufunc.h
+++ b/sys/cpu/i386/include/cpufunc.h
@@ -396,6 +396,14 @@ cpu_invltlb(void)
#endif
+#ifndef _CPU_WBINVL_DEFINED
+static __inline void
+cpu_wbinvl(void)
+{
+ __asm __volatile("wbinvd");
+}
+#endif
+
static __inline void
cpu_nop(void)
{
diff --git a/sys/cpu/i386/include/pmap.h b/sys/cpu/i386/include/pmap.h
index f7f0def..ccaef4e 100644
--- a/sys/cpu/i386/include/pmap.h
+++ b/sys/cpu/i386/include/pmap.h
@@ -62,10 +62,12 @@
#define PG_A 0x020 /* A Accessed */
#define PG_M 0x040 /* D Dirty */
#define PG_PS 0x080 /* PS Page size (0=4k,1=4M) */
+#define PG_PTE_PAT 0x080 /* PAT PAT index */
#define PG_G 0x100 /* G Global */
#define PG_AVAIL1 0x200 /* / Available for system */
#define PG_AVAIL2 0x400 /* < programmers use */
#define PG_AVAIL3 0x800 /* \ */
+#define PG_PDE_PAT 0x1000 /* PAT PAT index */
/* Our various interpretations of the above */
diff --git a/sys/cpu/i386/include/specialreg.h b/sys/cpu/i386/include/specialreg.h
index e0207ec..36a9811 100644
--- a/sys/cpu/i386/include/specialreg.h
+++ b/sys/cpu/i386/include/specialreg.h
@@ -159,6 +159,7 @@
#define MSR_MTRR64kBase 0x250
#define MSR_MTRR16kBase 0x258
#define MSR_MTRR4kBase 0x268
+#define MSR_PAT 0x277
#define MSR_MTRRdefType 0x2ff
#define MSR_MC0_CTL 0x400
#define MSR_MC0_STATUS 0x401
@@ -184,6 +185,17 @@
#define MSR_THERM_INTERRUPT 0x19b
#define MSR_THERM_STATUS 0x19c
+/*
+ * PAT modes.
+ */
+#define PAT_UNCACHEABLE 0x00
+#define PAT_WRITE_COMBINING 0x01
+#define PAT_WRITE_THROUGH 0x04
+#define PAT_WRITE_PROTECTED 0x05
+#define PAT_WRITE_BACK 0x06
+#define PAT_UNCACHED 0x07
+#define PAT_VALUE(i, m) ((long long)(m) << (8 * (i)))
+#define PAT_MASK(i) PAT_VALUE(i, 0xff)
/*
* Constants related to MTRRs
diff --git a/sys/platform/pc32/i386/mp_machdep.c b/sys/platform/pc32/i386/mp_machdep.c
index eba146f..5e82c3c 100644
--- a/sys/platform/pc32/i386/mp_machdep.c
+++ b/sys/platform/pc32/i386/mp_machdep.c
@@ -624,6 +624,8 @@ init_secondary(void)
load_cr0(cr0);
pmap_set_opt(); /* PSE/4MB pages, etc */
+ pmap_init_pat(); /* Page Attribute Table */
+
/* set up CPU registers and state */
cpu_setregs();
diff --git a/sys/platform/pc32/i386/pmap.c b/sys/platform/pc32/i386/pmap.c
index 0504439..bf04044 100644
--- a/sys/platform/pc32/i386/pmap.c
+++ b/sys/platform/pc32/i386/pmap.c
@@ -158,6 +158,7 @@ vm_offset_t KvaSize; /* max size of kernel virtual address space */
static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */
static int pgeflag; /* PG_G or-in */
static int pseflag; /* PG_PS or-in */
+static int pat_works; /* Is page attribute table sane? */
static vm_object_t kptobj;
@@ -216,6 +217,7 @@ static unsigned * pmap_pte_quick (pmap_t pmap, vm_offset_t va);
static vm_page_t pmap_page_lookup (vm_object_t object, vm_pindex_t pindex);
static int pmap_unuse_pt (pmap_t, vm_offset_t, vm_page_t, pmap_inval_info_t);
static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
+static int pmap_cache_bits(int, boolean_t);
static unsigned pdir4mb;
@@ -295,6 +297,55 @@ pmap_pte_quick(pmap_t pmap, vm_offset_t va)
return (0);
}
+/*
+ * Setup the PAT MSR.
+ */
+void
+pmap_init_pat(void)
+{
+ uint64_t pat_msr;
+
+ /* Bail if this CPU doesn't implement PAT. */
+ if (!(cpu_feature & CPUID_PAT))
+ return;
+
+#ifdef notyet
+ if (cpu_vendor_id != CPU_VENDOR_INTEL ||
+ (I386_CPU_FAMILY(cpu_id) == 6 && I386_CPU_MODEL(cpu_id) >= 0xe)) {
+#else
+ if (!0) {
+#endif
+ /*
+ * Leave the indices 0-3 at the default of WB, WT, UC, and UC-.
+ * Program 4 and 5 as WP and WC.
+ * Leave 6 and 7 as UC and UC-.
+ */
+ pat_msr = rdmsr(MSR_PAT);
+ pat_msr &= ~(PAT_MASK(4) | PAT_MASK(5));
+ pat_msr |= PAT_VALUE(4, PAT_WRITE_PROTECTED) |
+ PAT_VALUE(5, PAT_WRITE_COMBINING);
+ pat_works = 1;
+ } else {
+ /*
+ * Due to some Intel errata, we can only safely use the lower 4
+ * PAT entries. Thus, just replace PAT Index 2 with WC instead
+ * of UC-.
+ *
+ * Intel Pentium III Processor Specification Update
+ * Errata E.27 (Upper Four PAT Entries Not Usable With Mode B
+ * or Mode C Paging)
+ *
+ * Intel Pentium IV Processor Specification Update
+ * Errata N46 (PAT Index MSB May Be Calculated Incorrectly)
+ */
+ pat_msr = rdmsr(MSR_PAT);
+ pat_msr &= ~PAT_MASK(2);
+ pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING);
+ pat_works = 0;
+ }
+ wrmsr(MSR_PAT, pat_msr);
+}
+
/*
* Bootstrap the system enough to run with virtual memory.
@@ -446,6 +497,7 @@ pmap_bootstrap(vm_paddr_t firstaddr, vm_paddr_t loadaddr)
}
#endif
+ pmap_init_pat();
/*
* We need to finish setting up the globaldata page for the BSP.
* locore has already populated the page table for the mdglobaldata
@@ -554,6 +606,89 @@ pmap_init2(void)
* Low level helper routines.....
***************************************************/
+/*
+ * Determine the appropriate bits to set in a PTE or PDE for a specified
+ * caching mode.
+ */
+static int
+pmap_cache_bits(int mode, boolean_t is_pde)
+{
+ int pat_flag, pat_index, cache_bits;
+
+ /* The PAT bit is different for PTE's and PDE's. */
+ pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT;
+
+ /* If we don't support PAT, map extended modes to older ones. */
+ if (!(cpu_feature & CPUID_PAT)) {
+ switch (mode) {
+ case PAT_UNCACHEABLE:
+ case PAT_WRITE_THROUGH:
+ case PAT_WRITE_BACK:
+ break;
+ case PAT_UNCACHED:
+ case PAT_WRITE_COMBINING:
+ case PAT_WRITE_PROTECTED:
+ mode = PAT_UNCACHEABLE;
+ break;
+ }
+ }
+
+ /* Map the caching mode to a PAT index. */
+ if (pat_works) {
+ switch (mode) {
+ case PAT_UNCACHEABLE:
+ pat_index = 3;
+ break;
+ case PAT_WRITE_THROUGH:
+ pat_index = 1;
+ break;
+ case PAT_WRITE_BACK:
+ pat_index = 0;
+ break;
+ case PAT_UNCACHED:
+ pat_index = 2;
+ break;
+ case PAT_WRITE_COMBINING:
+ pat_index = 5;
+ break;
+ case PAT_WRITE_PROTECTED:
+ pat_index = 4;
+ break;
+ default:
+ panic("Unknown caching mode %d\n", mode);
+ }
+ } else {
+ switch (mode) {
+ case PAT_UNCACHED:
+ case PAT_UNCACHEABLE:
+ case PAT_WRITE_PROTECTED:
+ pat_index = 3;
+ break;
+ case PAT_WRITE_THROUGH:
+ pat_index = 1;
+ break;
+ case PAT_WRITE_BACK:
+ pat_index = 0;
+ break;
+ case PAT_WRITE_COMBINING:
+ pat_index = 2;
+ break;
+ default:
+ panic("Unknown caching mode %d\n", mode);
+ }
+ }
+
+ /* Map the 3-bit index value into the PAT, PCD, and PWT bits. */
+ cache_bits = 0;
+ if (pat_index & 0x4)
+ cache_bits |= pat_flag;
+ if (pat_index & 0x2)
+ cache_bits |= PG_NC_PCD;
+ if (pat_index & 0x1)
+ cache_bits |= PG_NC_PWT;
+ return (cache_bits);
+}
+
#if defined(PMAP_DIAGNOSTIC)
/*
@@ -3210,6 +3345,70 @@ pmap_unmapdev(vm_offset_t va, vm_size_t size)
kmem_free(&kernel_map, base, size);
}
+int
+pmap_change_attr(vm_offset_t va, vm_size_t size, int mode)
+{
+ vm_offset_t base, offset, tmpva;
+ pt_entry_t *pte;
+ u_int opte, npte;
+ pd_entry_t *pde;
+ pmap_inval_info info;
+
+ base = trunc_page(va);
+ offset = va & PAGE_MASK;
+ size = roundup(offset + size, PAGE_SIZE);
+
+ /*
+ * Only supported on kernel virtual addresses
+ */
+ if (base < KvaStart)
+ return (EINVAL);
+
+ /* 4MB pages and pages that aren't mapped aren't supported. */
+ for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) {
+ pde = pmap_pde(&kernel_pmap, tmpva);
+ if (*pde & PG_PS)
+ return (EINVAL);
+ if (*pde == 0)
+ return (EINVAL);
+ pte = vtopte(tmpva);
+ if (*pte == 0)
+ return (EINVAL);
+ }
+
+ pmap_inval_init(&info);
+ /*
+ * Ok, all the pages exist and are 4k, so run through them updating
+ * their cache mode.
+ */
+ for (tmpva = base; size > 0; ) {
+ pte = vtopte(tmpva);
+
+ /*
+ * The cache mode bits are all in the low 32-bits of the
+ * PTE, so we can just spin on updating the low 32-bits.
+ */
+ do {
+ opte = *(u_int *)pte;
+ npte = opte & ~(PG_PTE_PAT | PG_NC_PCD | PG_NC_PWT);
+ npte |= pmap_cache_bits(mode, 0);
+ } while (npte != opte &&
+ !atomic_cmpset_int((u_int *)pte, opte, npte));
+ pmap_inval_add(&info, &kernel_pmap, tmpva);
+ tmpva += PAGE_SIZE;
+ size -= PAGE_SIZE;
+ }
+
+ /*
+ * Flush CPU caches to make sure any data isn't cached that shouldn't
+ * be, etc.
+ */
+ pmap_inval_cache_add(&info, &kernel_pmap, -1);
+ pmap_inval_flush(&info);
+ return (0);
+}
+
+
/*
* perform the pmap work for mincore
*/
diff --git a/sys/platform/pc32/i386/pmap_inval.c b/sys/platform/pc32/i386/pmap_inval.c
index b6b68f3..35999ba 100644
--- a/sys/platform/pc32/i386/pmap_inval.c
+++ b/sys/platform/pc32/i386/pmap_inval.c
@@ -67,7 +67,7 @@
#ifdef SMP
static void
-_cpu_invltlb(void *dummy)
+_cpu_invltlb(void *dummy __unused)
{
cpu_invltlb();
}
@@ -78,6 +78,12 @@ _cpu_invl1pg(void *data)
cpu_invlpg(data);
}
+static void
+_cpu_wbinvl(void *dummy __unused)
+{
+ cpu_wbinvl();
+}
+
#endif
/*
@@ -89,6 +95,7 @@ pmap_inval_init(pmap_inval_info_t info)
info->pir_flags = 0;
}
+#ifdef SMP
/*
* Add a (pmap, va) pair to the invalidation list and protect access
* as appropriate.
@@ -96,7 +103,6 @@ pmap_inval_init(pmap_inval_info_t info)
void
pmap_inval_add(pmap_inval_info_t info, pmap_t pmap, vm_offset_t va)
{
-#ifdef SMP
if ((info->pir_flags & PIRF_CPUSYNC) == 0) {
info->pir_flags |= PIRF_CPUSYNC;
info->pir_cpusync.cs_run_func = NULL;
@@ -106,46 +112,86 @@ pmap_inval_add(pmap_inval_info_t info, pmap_t pmap, vm_offset_t va)
} else if (pmap->pm_active & ~info->pir_cpusync.cs_mask) {
lwkt_cpusync_add(pmap->pm_active, &info->pir_cpusync);
}
-#else
- if (pmap->pm_active == 0)
- return;
-#endif
if ((info->pir_flags & (PIRF_INVLTLB|PIRF_INVL1PG)) == 0) {
if (va == (vm_offset_t)-1) {
info->pir_flags |= PIRF_INVLTLB;
-#ifdef SMP
info->pir_cpusync.cs_fin2_func = _cpu_invltlb;
-#endif
} else {
info->pir_flags |= PIRF_INVL1PG;
info->pir_cpusync.cs_data = (void *)va;
-#ifdef SMP
info->pir_cpusync.cs_fin2_func = _cpu_invl1pg;
-#endif
}
} else {
info->pir_flags |= PIRF_INVLTLB;
-#ifdef SMP
info->pir_cpusync.cs_fin2_func = _cpu_invltlb;
-#endif
}
}
+void
+pmap_inval_cache_add(pmap_inval_info_t info, pmap_t pmap,
+ vm_offset_t va __unused)
+{
+ if ((info->pir_flags & PIRF_CPUSYNC) == 0) {
+ info->pir_flags |= PIRF_CPUSYNC;
+ info->pir_cpusync.cs_run_func = NULL;
+ info->pir_cpusync.cs_fin1_func = NULL;
+ info->pir_cpusync.cs_fin2_func = NULL;
+ lwkt_cpusync_start(pmap->pm_active, &info->pir_cpusync);
+ } else if (pmap->pm_active & ~info->pir_cpusync.cs_mask) {
+ lwkt_cpusync_add(pmap->pm_active, &info->pir_cpusync);
+ }
+ info->pir_flags |= PIRF_WBINVL;
+ info->pir_cpusync.cs_fin2_func = _cpu_wbinvl;
+}
+
/*
* Synchronize changes with target cpus.
*/
void
pmap_inval_flush(pmap_inval_info_t info)
{
-#ifdef SMP
if (info->pir_flags & PIRF_CPUSYNC)
lwkt_cpusync_finish(&info->pir_cpusync);
-#else
+ info->pir_flags = 0;
+}
+
+#else /* !SMP */
+
+void
+pmap_inval_add(pmap_inval_info_t info, pmap_t pmap, vm_offset_t va)
+{
+ if (pmap->pm_active == 0)
+ return;
+ if ((info->pir_flags & (PIRF_INVLTLB|PIRF_INVL1PG)) == 0) {
+ if (va == (vm_offset_t)-1) {
+ info->pir_flags |= PIRF_INVLTLB;
+ } else {
+ info->pir_flags |= PIRF_INVL1PG;
+ info->pir_cpusync.cs_data = (void *)va;
+ }
+ } else {
+ info->pir_flags |= PIRF_INVLTLB;
+ }
+}
+
+void
+pmap_inval_cache_add(pmap_inval_info_t info, pmap_t pmap, vm_offset_t va)
+{
+ if (pmap->pm_active == 0)
+ return;
+ info->pir_flags |= PIRF_WBINVL;
+}
+
+void
+pmap_inval_flush(pmap_inval_info_t info)
+{
if (info->pir_flags & PIRF_INVLTLB)
cpu_invltlb();
else if (info->pir_flags & PIRF_INVL1PG)
cpu_invlpg(info->pir_cpusync.cs_data);
-#endif
+ if (info->pir_flags & PIRF_WBINVL)
+ cpu_wbinvl();
info->pir_flags = 0;
}
+#endif /* SMP */
diff --git a/sys/platform/pc32/include/pmap.h b/sys/platform/pc32/include/pmap.h
index 9d7dda8..8efc52e 100644
--- a/sys/platform/pc32/include/pmap.h
+++ b/sys/platform/pc32/include/pmap.h
@@ -249,6 +249,8 @@ int pmap_get_pgeflag(void);
#ifdef SMP
void pmap_set_opt (void);
#endif
+void pmap_init_pat(void);
+int pmap_change_attr(vm_offset_t, vm_size_t, int);
#endif /* _KERNEL */
diff --git a/sys/platform/pc32/include/pmap_inval.h b/sys/platform/pc32/include/pmap_inval.h
index e8cd668..23e149f 100644
--- a/sys/platform/pc32/include/pmap_inval.h
+++ b/sys/platform/pc32/include/pmap_inval.h
@@ -51,6 +51,7 @@ typedef pmap_inval_info *pmap_inval_info_t;
#define PIRF_INVLTLB 0x0001 /* request invalidation of whole table */
#define PIRF_INVL1PG 0x0002 /* else request invalidation of one page */
#define PIRF_CPUSYNC 0x0004 /* cpusync is currently active */
+#define PIRF_WBINVL 0x0008 /* request cache invalidation */
#ifdef _KERNEL
@@ -60,6 +61,7 @@ typedef pmap_inval_info *pmap_inval_info_t;
void pmap_inval_init(pmap_inval_info_t);
void pmap_inval_add(pmap_inval_info_t, pmap_t, vm_offset_t);
+void pmap_inval_cache_add(pmap_inval_info_t, pmap_t, vm_offset_t);
void pmap_inval_flush(pmap_inval_info_t);
#endif
More information about the Submit
mailing list