summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/Kconfig.debug36
-rw-r--r--arch/x86/entry/vdso/vdso32/vclock_gettime.c2
-rw-r--r--arch/x86/include/asm/page_64_types.h3
-rw-r--r--arch/x86/include/asm/page_types.h3
-rw-r--r--arch/x86/include/asm/pgtable.h25
-rw-r--r--arch/x86/include/asm/pgtable_types.h40
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/dump_pagetables.c81
-rw-r--r--arch/x86/mm/gup.c18
-rw-r--r--arch/x86/mm/init_32.c2
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--arch/x86/mm/pageattr.c79
12 files changed, 219 insertions, 74 deletions
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index d8c0d3266173..3e0baf726eef 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -65,10 +65,14 @@ config EARLY_PRINTK_EFI
This is useful for kernel debugging when your machine crashes very
early before the console code is initialized.
+config X86_PTDUMP_CORE
+ def_bool n
+
config X86_PTDUMP
bool "Export kernel pagetable layout to userspace via debugfs"
depends on DEBUG_KERNEL
select DEBUG_FS
+ select X86_PTDUMP_CORE
---help---
Say Y here if you want to show the kernel pagetable layout in a
debugfs file. This information is only useful for kernel developers
@@ -79,7 +83,8 @@ config X86_PTDUMP
config EFI_PGT_DUMP
bool "Dump the EFI pagetable"
- depends on EFI && X86_PTDUMP
+ depends on EFI
+ select X86_PTDUMP_CORE
---help---
Enable this if you want to dump the EFI page table before
enabling virtual mode. This can be used to debug miscellaneous
@@ -105,6 +110,35 @@ config DEBUG_RODATA_TEST
feature as well as for the change_page_attr() infrastructure.
If in doubt, say "N"
+config DEBUG_WX
+ bool "Warn on W+X mappings at boot"
+ depends on DEBUG_RODATA
+ default y
+ select X86_PTDUMP_CORE
+ ---help---
+ Generate a warning if any W+X mappings are found at boot.
+
+ This is useful for discovering cases where the kernel is leaving
+ W+X mappings after applying NX, as such mappings are a security risk.
+
+ Look for a message in dmesg output like this:
+
+ x86/mm: Checked W+X mappings: passed, no W+X pages found.
+
+ or like this, if the check failed:
+
+ x86/mm: Checked W+X mappings: FAILED, <N> W+X pages found.
+
+ Note that even if the check fails, your kernel is possibly
+ still fine, as W+X mappings are not a security hole in
+ themselves, what they do is that they make the exploitation
+ of other unfixed kernel bugs easier.
+
+ There is no runtime or memory usage effect of this option
+ once the kernel has booted up - it's a one time check.
+
+ If in doubt, say "Y".
+
config DEBUG_SET_MODULE_RONX
bool "Set loadable kernel module data as NX and text as RO"
depends on MODULES
diff --git a/arch/x86/entry/vdso/vdso32/vclock_gettime.c b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
index 175cc72c0f68..87a86e017f0e 100644
--- a/arch/x86/entry/vdso/vdso32/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
@@ -14,11 +14,13 @@
*/
#undef CONFIG_64BIT
#undef CONFIG_X86_64
+#undef CONFIG_PGTABLE_LEVELS
#undef CONFIG_ILLEGAL_POINTER_VALUE
#undef CONFIG_SPARSEMEM_VMEMMAP
#undef CONFIG_NR_CPUS
#define CONFIG_X86_32 1
+#define CONFIG_PGTABLE_LEVELS 2
#define CONFIG_PAGE_OFFSET 0
#define CONFIG_ILLEGAL_POINTER_VALUE 0
#define CONFIG_NR_CPUS 1
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 4edd53b79a81..4928cf0d5af0 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -26,9 +26,6 @@
#define MCE_STACK 4
#define N_EXCEPTION_STACKS 4 /* hw limit: 7 */
-#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT)
-#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1))
-
/*
* Set __PAGE_OFFSET to the most negative possible address +
* PGDIR_SIZE*16 (pgd slot 272). The gap is to allow a space for a
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index c7c712f2648b..c5b7fb2774d0 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -20,6 +20,9 @@
#define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT)
#define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1))
+#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT)
+#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1))
+
#define HPAGE_SHIFT PMD_SHIFT
#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT)
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 867da5bbb4a3..c0b41f111a9a 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -19,6 +19,13 @@
#include <asm/x86_init.h>
void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
+void ptdump_walk_pgd_level_checkwx(void);
+
+#ifdef CONFIG_DEBUG_WX
+#define debug_checkwx() ptdump_walk_pgd_level_checkwx()
+#else
+#define debug_checkwx() do { } while (0)
+#endif
/*
* ZERO_PAGE is a global shared page that is always zero: used
@@ -142,12 +149,12 @@ static inline unsigned long pte_pfn(pte_t pte)
static inline unsigned long pmd_pfn(pmd_t pmd)
{
- return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
+ return (pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT;
}
static inline unsigned long pud_pfn(pud_t pud)
{
- return (pud_val(pud) & PTE_PFN_MASK) >> PAGE_SHIFT;
+ return (pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT;
}
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
@@ -379,7 +386,9 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
return __pgprot(preservebits | addbits);
}
-#define pte_pgprot(x) __pgprot(pte_flags(x) & PTE_FLAGS_MASK)
+#define pte_pgprot(x) __pgprot(pte_flags(x))
+#define pmd_pgprot(x) __pgprot(pmd_flags(x))
+#define pud_pgprot(x) __pgprot(pud_flags(x))
#define canon_pgprot(p) __pgprot(massage_pgprot(p))
@@ -502,14 +511,15 @@ static inline int pmd_none(pmd_t pmd)
static inline unsigned long pmd_page_vaddr(pmd_t pmd)
{
- return (unsigned long)__va(pmd_val(pmd) & PTE_PFN_MASK);
+ return (unsigned long)__va(pmd_val(pmd) & pmd_pfn_mask(pmd));
}
/*
* Currently stuck as a macro due to indirect forward reference to
* linux/mmzone.h's __section_mem_map_addr() definition:
*/
-#define pmd_page(pmd) pfn_to_page((pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT)
+#define pmd_page(pmd) \
+ pfn_to_page((pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT)
/*
* the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
@@ -570,14 +580,15 @@ static inline int pud_present(pud_t pud)
static inline unsigned long pud_page_vaddr(pud_t pud)
{
- return (unsigned long)__va((unsigned long)pud_val(pud) & PTE_PFN_MASK);
+ return (unsigned long)__va(pud_val(pud) & pud_pfn_mask(pud));
}
/*
* Currently stuck as a macro due to indirect forward reference to
* linux/mmzone.h's __section_mem_map_addr() definition:
*/
-#define pud_page(pud) pfn_to_page(pud_val(pud) >> PAGE_SHIFT)
+#define pud_page(pud) \
+ pfn_to_page((pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT)
/* Find an entry in the second-level page table.. */
static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 13f310bfc09a..dd5b0aa9dd2f 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -209,10 +209,10 @@ enum page_cache_mode {
#include <linux/types.h>
-/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */
+/* Extracts the PFN from a (pte|pmd|pud|pgd)val_t of a 4KB page */
#define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK)
-/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */
+/* Extracts the flags from a (pte|pmd|pud|pgd)val_t of a 4KB page */
#define PTE_FLAGS_MASK (~PTE_PFN_MASK)
typedef struct pgprot { pgprotval_t pgprot; } pgprot_t;
@@ -276,14 +276,46 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
}
#endif
+static inline pudval_t pud_pfn_mask(pud_t pud)
+{
+ if (native_pud_val(pud) & _PAGE_PSE)
+ return PUD_PAGE_MASK & PHYSICAL_PAGE_MASK;
+ else
+ return PTE_PFN_MASK;
+}
+
+static inline pudval_t pud_flags_mask(pud_t pud)
+{
+ if (native_pud_val(pud) & _PAGE_PSE)
+ return ~(PUD_PAGE_MASK & (pudval_t)PHYSICAL_PAGE_MASK);
+ else
+ return ~PTE_PFN_MASK;
+}
+
static inline pudval_t pud_flags(pud_t pud)
{
- return native_pud_val(pud) & PTE_FLAGS_MASK;
+ return native_pud_val(pud) & pud_flags_mask(pud);
+}
+
+static inline pmdval_t pmd_pfn_mask(pmd_t pmd)
+{
+ if (native_pmd_val(pmd) & _PAGE_PSE)
+ return PMD_PAGE_MASK & PHYSICAL_PAGE_MASK;
+ else
+ return PTE_PFN_MASK;
+}
+
+static inline pmdval_t pmd_flags_mask(pmd_t pmd)
+{
+ if (native_pmd_val(pmd) & _PAGE_PSE)
+ return ~(PMD_PAGE_MASK & (pmdval_t)PHYSICAL_PAGE_MASK);
+ else
+ return ~PTE_PFN_MASK;
}
static inline pmdval_t pmd_flags(pmd_t pmd)
{
- return native_pmd_val(pmd) & PTE_FLAGS_MASK;
+ return native_pmd_val(pmd) & pmd_flags_mask(pmd);
}
static inline pte_t native_make_pte(pteval_t val)
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index a482d105172b..65c47fda26fc 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -14,7 +14,7 @@ obj-$(CONFIG_SMP) += tlb.o
obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
-obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
+obj-$(CONFIG_X86_PTDUMP_CORE) += dump_pagetables.o
obj-$(CONFIG_HIGHMEM) += highmem_32.o
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index f0cedf3395af..1bf417e9cc13 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -32,6 +32,8 @@ struct pg_state {
const struct addr_marker *marker;
unsigned long lines;
bool to_dmesg;
+ bool check_wx;
+ unsigned long wx_pages;
};
struct addr_marker {
@@ -155,7 +157,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
pt_dump_cont_printf(m, dmsg, " ");
if ((level == 4 && pr & _PAGE_PAT) ||
((level == 3 || level == 2) && pr & _PAGE_PAT_LARGE))
- pt_dump_cont_printf(m, dmsg, "pat ");
+ pt_dump_cont_printf(m, dmsg, "PAT ");
else
pt_dump_cont_printf(m, dmsg, " ");
if (pr & _PAGE_GLOBAL)
@@ -198,8 +200,8 @@ static void note_page(struct seq_file *m, struct pg_state *st,
* we have now. "break" is either changing perms, levels or
* address space marker.
*/
- prot = pgprot_val(new_prot) & PTE_FLAGS_MASK;
- cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK;
+ prot = pgprot_val(new_prot);
+ cur = pgprot_val(st->current_prot);
if (!st->level) {
/* First entry */
@@ -214,6 +216,16 @@ static void note_page(struct seq_file *m, struct pg_state *st,
const char *unit = units;
unsigned long delta;
int width = sizeof(unsigned long) * 2;
+ pgprotval_t pr = pgprot_val(st->current_prot);
+
+ if (st->check_wx && (pr & _PAGE_RW) && !(pr & _PAGE_NX)) {
+ WARN_ONCE(1,
+ "x86/mm: Found insecure W+X mapping at address %p/%pS\n",
+ (void *)st->start_address,
+ (void *)st->start_address);
+ st->wx_pages += (st->current_address -
+ st->start_address) / PAGE_SIZE;
+ }
/*
* Now print the actual finished series
@@ -269,13 +281,13 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
{
int i;
pte_t *start;
+ pgprotval_t prot;
start = (pte_t *) pmd_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PTE; i++) {
- pgprot_t prot = pte_pgprot(*start);
-
+ prot = pte_flags(*start);
st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
- note_page(m, st, prot, 4);
+ note_page(m, st, __pgprot(prot), 4);
start++;
}
}
@@ -287,18 +299,19 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
{
int i;
pmd_t *start;
+ pgprotval_t prot;
start = (pmd_t *) pud_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PMD; i++) {
st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
if (!pmd_none(*start)) {
- pgprotval_t prot = pmd_val(*start) & PTE_FLAGS_MASK;
-
- if (pmd_large(*start) || !pmd_present(*start))
+ if (pmd_large(*start) || !pmd_present(*start)) {
+ prot = pmd_flags(*start);
note_page(m, st, __pgprot(prot), 3);
- else
+ } else {
walk_pte_level(m, st, *start,
P + i * PMD_LEVEL_MULT);
+ }
} else
note_page(m, st, __pgprot(0), 3);
start++;
@@ -318,19 +331,20 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
{
int i;
pud_t *start;
+ pgprotval_t prot;
start = (pud_t *) pgd_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PUD; i++) {
st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
if (!pud_none(*start)) {
- pgprotval_t prot = pud_val(*start) & PTE_FLAGS_MASK;
-
- if (pud_large(*start) || !pud_present(*start))
+ if (pud_large(*start) || !pud_present(*start)) {
+ prot = pud_flags(*start);
note_page(m, st, __pgprot(prot), 2);
- else
+ } else {
walk_pmd_level(m, st, *start,
P + i * PUD_LEVEL_MULT);
+ }
} else
note_page(m, st, __pgprot(0), 2);
@@ -344,13 +358,15 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
#define pgd_none(a) pud_none(__pud(pgd_val(a)))
#endif
-void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
+static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
+ bool checkwx)
{
#ifdef CONFIG_X86_64
pgd_t *start = (pgd_t *) &init_level4_pgt;
#else
pgd_t *start = swapper_pg_dir;
#endif
+ pgprotval_t prot;
int i;
struct pg_state st = {};
@@ -359,16 +375,20 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
st.to_dmesg = true;
}
+ st.check_wx = checkwx;
+ if (checkwx)
+ st.wx_pages = 0;
+
for (i = 0; i < PTRS_PER_PGD; i++) {
st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
if (!pgd_none(*start)) {
- pgprotval_t prot = pgd_val(*start) & PTE_FLAGS_MASK;
-
- if (pgd_large(*start) || !pgd_present(*start))
+ if (pgd_large(*start) || !pgd_present(*start)) {
+ prot = pgd_flags(*start);
note_page(m, &st, __pgprot(prot), 1);
- else
+ } else {
walk_pud_level(m, &st, *start,
i * PGD_LEVEL_MULT);
+ }
} else
note_page(m, &st, __pgprot(0), 1);
@@ -378,8 +398,26 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
/* Flush out the last page */
st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT);
note_page(m, &st, __pgprot(0), 0);
+ if (!checkwx)
+ return;
+ if (st.wx_pages)
+ pr_info("x86/mm: Checked W+X mappings: FAILED, %lu W+X pages found.\n",
+ st.wx_pages);
+ else
+ pr_info("x86/mm: Checked W+X mappings: passed, no W+X pages found.\n");
}
+void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
+{
+ ptdump_walk_pgd_level_core(m, pgd, false);
+}
+
+void ptdump_walk_pgd_level_checkwx(void)
+{
+ ptdump_walk_pgd_level_core(NULL, NULL, true);
+}
+
+#ifdef CONFIG_X86_PTDUMP
static int ptdump_show(struct seq_file *m, void *v)
{
ptdump_walk_pgd_level(m, NULL);
@@ -397,10 +435,13 @@ static const struct file_operations ptdump_fops = {
.llseek = seq_lseek,
.release = single_release,
};
+#endif
static int pt_dump_init(void)
{
+#ifdef CONFIG_X86_PTDUMP
struct dentry *pe;
+#endif
#ifdef CONFIG_X86_32
/* Not a compile-time constant on x86-32 */
@@ -412,10 +453,12 @@ static int pt_dump_init(void)
address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
#endif
+#ifdef CONFIG_X86_PTDUMP
pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL,
&ptdump_fops);
if (!pe)
return -ENOMEM;
+#endif
return 0;
}
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 81bf3d2af3eb..ae9a37bf1371 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -118,21 +118,20 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
unsigned long mask;
- pte_t pte = *(pte_t *)&pmd;
struct page *head, *page;
int refs;
mask = _PAGE_PRESENT|_PAGE_USER;
if (write)
mask |= _PAGE_RW;
- if ((pte_flags(pte) & mask) != mask)
+ if ((pmd_flags(pmd) & mask) != mask)
return 0;
/* hugepages are never "special" */
- VM_BUG_ON(pte_flags(pte) & _PAGE_SPECIAL);
- VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+ VM_BUG_ON(pmd_flags(pmd) & _PAGE_SPECIAL);
+ VM_BUG_ON(!pfn_valid(pmd_pfn(pmd)));
refs = 0;
- head = pte_page(pte);
+ head = pmd_page(pmd);
page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
do {
VM_BUG_ON_PAGE(compound_head(page) != head, page);
@@ -195,21 +194,20 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
unsigned long mask;
- pte_t pte = *(pte_t *)&pud;
struct page *head, *page;
int refs;
mask = _PAGE_PRESENT|_PAGE_USER;
if (write)
mask |= _PAGE_RW;
- if ((pte_flags(pte) & mask) != mask)
+ if ((pud_flags(pud) & mask) != mask)
return 0;
/* hugepages are never "special" */
- VM_BUG_ON(pte_flags(pte) & _PAGE_SPECIAL);
- VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+ VM_BUG_ON(pud_flags(pud) & _PAGE_SPECIAL);
+ VM_BUG_ON(!pfn_valid(pud_pfn(pud)));
refs = 0;
- head = pte_page(pte);
+ head = pud_page(pud);
page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
do {
VM_BUG_ON_PAGE(compound_head(page) != head, page);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 7562f42914b4..cb4ef3de61f9 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -957,6 +957,8 @@ void mark_rodata_ro(void)
set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
#endif
mark_nxdata_nx();
+ if (__supported_pte_mask & _PAGE_NX)
+ debug_checkwx();
}
#endif
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index df48430c279b..5ed62eff31bd 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1150,6 +1150,8 @@ void mark_rodata_ro(void)
free_init_pages("unused kernel",
(unsigned long) __va(__pa_symbol(rodata_end)),
(unsigned long) __va(__pa_symbol(_sdata)));
+
+ debug_checkwx();
}
#endif
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 050a092b8d9a..a3137a4feed1 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -414,18 +414,28 @@ pmd_t *lookup_pmd_address(unsigned long address)
phys_addr_t slow_virt_to_phys(void *__virt_addr)
{
unsigned long virt_addr = (unsigned long)__virt_addr;
- phys_addr_t phys_addr;
- unsigned long offset;
+ unsigned long phys_addr, offset;
enum pg_level level;
- unsigned long pmask;
pte_t *pte;
pte = lookup_address(virt_addr, &level);
BUG_ON(!pte);
- pmask = page_level_mask(level);
- offset = virt_addr & ~pmask;
- phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
- return (phys_addr | offset);
+
+ switch (level) {
+ case PG_LEVEL_1G:
+ phys_addr = pud_pfn(*(pud_t *)pte) << PAGE_SHIFT;
+ offset = virt_addr & ~PUD_PAGE_MASK;
+ break;
+ case PG_LEVEL_2M:
+ phys_addr = pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT;
+ offset = virt_addr & ~PMD_PAGE_MASK;
+ break;
+ default:
+ phys_addr = pte_pfn(*pte) << PAGE_SHIFT;
+ offset = virt_addr & ~PAGE_MASK;
+ }
+
+ return (phys_addr_t)(phys_addr | offset);
}
EXPORT_SYMBOL_GPL(slow_virt_to_phys);
@@ -458,7 +468,7 @@ static int
try_preserve_large_page(pte_t *kpte, unsigned long address,
struct cpa_data *cpa)
{
- unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn;
+ unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn, old_pfn;
pte_t new_pte, old_pte, *tmp;
pgprot_t old_prot, new_prot, req_prot;
int i, do_split = 1;
@@ -478,17 +488,21 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
switch (level) {
case PG_LEVEL_2M:
-#ifdef CONFIG_X86_64
+ old_prot = pmd_pgprot(*(pmd_t *)kpte);
+ old_pfn = pmd_pfn(*(pmd_t *)kpte);
+ break;
case PG_LEVEL_1G:
-#endif
- psize = page_level_size(level);
- pmask = page_level_mask(level);
+ old_prot = pud_pgprot(*(pud_t *)kpte);
+ old_pfn = pud_pfn(*(pud_t *)kpte);
break;
default:
do_split = -EINVAL;
goto out_unlock;
}
+ psize = page_level_size(level);
+ pmask = page_level_mask(level);
+
/*
* Calculate the number of pages, which fit into this large
* page starting at address:
@@ -504,7 +518,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
* up accordingly.
*/
old_pte = *kpte;
- old_prot = req_prot = pgprot_large_2_4k(pte_pgprot(old_pte));
+ req_prot = pgprot_large_2_4k(old_prot);
pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr);
pgprot_val(req_prot) |= pgprot_val(cpa->mask_set);
@@ -530,10 +544,10 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
req_prot = canon_pgprot(req_prot);
/*
- * old_pte points to the large page base address. So we need
+ * old_pfn points to the large page base pfn. So we need
* to add the offset of the virtual address:
*/
- pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT);
+ pfn = old_pfn + ((address & (psize - 1)) >> PAGE_SHIFT);
cpa->pfn = pfn;
new_prot = static_protections(req_prot, address, pfn);
@@ -544,7 +558,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
* the pages in the range we try to preserve:
*/
addr = address & pmask;
- pfn = pte_pfn(old_pte);
+ pfn = old_pfn;
for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) {
pgprot_t chk_prot = static_protections(req_prot, addr, pfn);
@@ -574,7 +588,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
* The address is aligned and the number of pages
* covers the full page.
*/
- new_pte = pfn_pte(pte_pfn(old_pte), new_prot);
+ new_pte = pfn_pte(old_pfn, new_prot);
__set_pmd_pte(kpte, address, new_pte);
cpa->flags |= CPA_FLUSHTLB;
do_split = 0;
@@ -591,7 +605,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
struct page *base)
{
pte_t *pbase = (pte_t *)page_address(base);
- unsigned long pfn, pfninc = 1;
+ unsigned long ref_pfn, pfn, pfninc = 1;
unsigned int i, level;
pte_t *tmp;
pgprot_t ref_prot;
@@ -608,26 +622,33 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
}
paravirt_alloc_pte(&init_mm, page_to_pfn(base));
- ref_prot = pte_pgprot(pte_clrhuge(*kpte));
- /* promote PAT bit to correct position */
- if (level == PG_LEVEL_2M)
+ switch (level) {
+ case PG_LEVEL_2M:
+ ref_prot = pmd_pgprot(*(pmd_t *)kpte);
+ /* clear PSE and promote PAT bit to correct position */
ref_prot = pgprot_large_2_4k(ref_prot);
+ ref_pfn = pmd_pfn(*(pmd_t *)kpte);
+ break;
-#ifdef CONFIG_X86_64
- if (level == PG_LEVEL_1G) {
+ case PG_LEVEL_1G:
+ ref_prot = pud_pgprot(*(pud_t *)kpte);
+ ref_pfn = pud_pfn(*(pud_t *)kpte);
pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
+
/*
- * Set the PSE flags only if the PRESENT flag is set
+ * Clear the PSE flags if the PRESENT flag is not set
* otherwise pmd_present/pmd_huge will return true
* even on a non present pmd.
*/
- if (pgprot_val(ref_prot) & _PAGE_PRESENT)
- pgprot_val(ref_prot) |= _PAGE_PSE;
- else
+ if (!(pgprot_val(ref_prot) & _PAGE_PRESENT))
pgprot_val(ref_prot) &= ~_PAGE_PSE;
+ break;
+
+ default:
+ spin_unlock(&pgd_lock);
+ return 1;
}
-#endif
/*
* Set the GLOBAL flags only if the PRESENT flag is set
@@ -643,7 +664,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
/*
* Get the target pfn from the original entry:
*/
- pfn = pte_pfn(*kpte);
+ pfn = ref_pfn;
for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
set_pte(&pbase[i], pfn_pte(pfn, canon_pgprot(ref_prot)));