diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-03-10 08:59:07 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-03-10 08:59:07 -0800 |
commit | baeedc7158da5b0f489d04125ba6adfba532a6f7 (patch) | |
tree | 1f42b5cd4a961ed10673578a28602dd13d09ab99 | |
parent | 8fe3ccaed080a7804bc459c42e0419253973be92 (diff) | |
parent | 90eceff1a375f6ffa78caf8654e787c0a8a591ef (diff) |
Merge branch 'prep-for-5level'
Merge 5-level page table prep from Kirill Shutemov:
"Here's relatively low-risk part of 5-level paging patchset. Merging it
now will make x86 5-level paging enabling in v4.12 easier.
The first patch is actually x86-specific: detect 5-level paging
support. It boils down to single define.
The rest of patchset converts Linux MMU abstraction from 4- to 5-level
paging.
Enabling of new abstraction in most cases requires adding single line
of code in arch-specific code. The rest is taken care by asm-generic/.
Changes to mm/ code are mostly mechanical: add support for new page
table level -- p4d_t -- where we deal with pud_t now.
v2:
- fix build on microblaze (Michal);
- comment for __ARCH_HAS_5LEVEL_HACK in kasan_populate_zero_shadow();
- acks from Michal"
* emailed patches from Kirill A Shutemov <kirill.shutemov@linux.intel.com>:
mm: introduce __p4d_alloc()
mm: convert generic code to 5-level paging
asm-generic: introduce <asm-generic/pgtable-nop4d.h>
arch, mm: convert all architectures to use 5level-fixup.h
asm-generic: introduce __ARCH_USE_5LEVEL_HACK
asm-generic: introduce 5level-fixup.h
x86/cpufeature: Add 5-level paging detection
64 files changed, 868 insertions, 147 deletions
diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h index 317ff773e1ca..b18fcb606908 100644 --- a/arch/arc/include/asm/hugepage.h +++ b/arch/arc/include/asm/hugepage.h @@ -11,6 +11,7 @@ #define _ASM_ARC_HUGEPAGE_H #include <linux/types.h> +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> static inline pte_t pmd_pte(pmd_t pmd) diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index e94ca72b974e..ee22d40afef4 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -37,6 +37,7 @@ #include <asm/page.h> #include <asm/mmu.h> +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #include <linux/const.h> diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index a8d656d9aec7..1c462381c225 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -20,6 +20,7 @@ #else +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> #include <asm/memory.h> #include <asm/pgtable-hwdef.h> diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h index 69b2fd41503c..345a072b5856 100644 --- a/arch/arm64/include/asm/pgtable-types.h +++ b/arch/arm64/include/asm/pgtable-types.h @@ -55,9 +55,13 @@ typedef struct { pteval_t pgprot; } pgprot_t; #define __pgprot(x) ((pgprot_t) { (x) } ) #if CONFIG_PGTABLE_LEVELS == 2 +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #elif CONFIG_PGTABLE_LEVELS == 3 +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> +#elif CONFIG_PGTABLE_LEVELS == 4 +#include <asm-generic/5level-fixup.h> #endif #endif /* __ASM_PGTABLE_TYPES_H */ diff --git a/arch/avr32/include/asm/pgtable-2level.h b/arch/avr32/include/asm/pgtable-2level.h index 425dd567b5b9..d5b1c63993ec 100644 --- a/arch/avr32/include/asm/pgtable-2level.h +++ b/arch/avr32/include/asm/pgtable-2level.h @@ -8,6 +8,7 @@ #ifndef __ASM_AVR32_PGTABLE_2LEVEL_H #define __ASM_AVR32_PGTABLE_2LEVEL_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> /* diff --git a/arch/cris/include/asm/pgtable.h b/arch/cris/include/asm/pgtable.h index 2a3210ba4c72..fa3a73004cc5 100644 --- a/arch/cris/include/asm/pgtable.h +++ b/arch/cris/include/asm/pgtable.h @@ -6,6 +6,7 @@ #define _CRIS_PGTABLE_H #include <asm/page.h> +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #ifndef __ASSEMBLY__ diff --git a/arch/frv/include/asm/pgtable.h b/arch/frv/include/asm/pgtable.h index a0513d463a1f..ab6e7e961b54 100644 --- a/arch/frv/include/asm/pgtable.h +++ b/arch/frv/include/asm/pgtable.h @@ -16,6 +16,7 @@ #ifndef _ASM_PGTABLE_H #define _ASM_PGTABLE_H +#include <asm-generic/5level-fixup.h> #include <asm/mem-layout.h> #include <asm/setup.h> #include <asm/processor.h> diff --git a/arch/h8300/include/asm/pgtable.h b/arch/h8300/include/asm/pgtable.h index 8341db67821d..7d265d28ba5e 100644 --- a/arch/h8300/include/asm/pgtable.h +++ b/arch/h8300/include/asm/pgtable.h @@ -1,5 +1,6 @@ #ifndef _H8300_PGTABLE_H #define _H8300_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> #include <asm-generic/pgtable.h> #define pgtable_cache_init() do { } while (0) diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h index 49eab8136ec3..24a9177fb897 100644 --- a/arch/hexagon/include/asm/pgtable.h +++ b/arch/hexagon/include/asm/pgtable.h @@ -26,6 +26,7 @@ */ #include <linux/swap.h> #include <asm/page.h> +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> /* A handy thing to have if one has the RAM. Declared in head.S */ diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h index 384794e665fc..6cc22c8d8923 100644 --- a/arch/ia64/include/asm/pgtable.h +++ b/arch/ia64/include/asm/pgtable.h @@ -587,8 +587,10 @@ extern struct page *zero_page_memmap_ptr; #if CONFIG_PGTABLE_LEVELS == 3 +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> #endif +#include <asm-generic/5level-fixup.h> #include <asm-generic/pgtable.h> #endif /* _ASM_IA64_PGTABLE_H */ diff --git a/arch/metag/include/asm/pgtable.h b/arch/metag/include/asm/pgtable.h index ffa3a3a2ecad..0c151e5af079 100644 --- a/arch/metag/include/asm/pgtable.h +++ b/arch/metag/include/asm/pgtable.h @@ -6,6 +6,7 @@ #define _METAG_PGTABLE_H #include <asm/pgtable-bits.h> +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> /* Invalid regions on Meta: 0x00000000-0x001FFFFF and 0xFFFF0000-0xFFFFFFFF */ diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h index fd850879854d..d506bb0893f9 100644 --- a/arch/microblaze/include/asm/page.h +++ b/arch/microblaze/include/asm/page.h @@ -95,7 +95,8 @@ typedef struct { unsigned long pgd; } pgd_t; # else /* CONFIG_MMU */ typedef struct { unsigned long ste[64]; } pmd_t; typedef struct { pmd_t pue[1]; } pud_t; -typedef struct { pud_t pge[1]; } pgd_t; +typedef struct { pud_t p4e[1]; } p4d_t; +typedef struct { p4d_t pge[1]; } pgd_t; # endif /* CONFIG_MMU */ # define pte_val(x) ((x).pte) diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h index d21f3da7bdb6..6f94bed571c4 100644 --- a/arch/mips/include/asm/pgtable-32.h +++ b/arch/mips/include/asm/pgtable-32.h @@ -16,6 +16,7 @@ #include <asm/cachectl.h> #include <asm/fixmap.h> +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> extern int temp_tlb_entry; diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h index 514cbc0a6a67..130a2a6c1531 100644 --- a/arch/mips/include/asm/pgtable-64.h +++ b/arch/mips/include/asm/pgtable-64.h @@ -17,6 +17,7 @@ #include <asm/cachectl.h> #include <asm/fixmap.h> +#define __ARCH_USE_5LEVEL_HACK #if defined(CONFIG_PAGE_SIZE_64KB) && !defined(CONFIG_MIPS_VA_BITS_48) #include <asm-generic/pgtable-nopmd.h> #else diff --git a/arch/mn10300/include/asm/page.h b/arch/mn10300/include/asm/page.h index 3810a6f740fd..dfe730a5ede0 100644 --- a/arch/mn10300/include/asm/page.h +++ b/arch/mn10300/include/asm/page.h @@ -57,6 +57,7 @@ typedef struct page *pgtable_t; #define __pgd(x) ((pgd_t) { (x) }) #define __pgprot(x) ((pgprot_t) { (x) }) +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #endif /* !__ASSEMBLY__ */ diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h index 298393c3cb42..db4f7d179220 100644 --- a/arch/nios2/include/asm/pgtable.h +++ b/arch/nios2/include/asm/pgtable.h @@ -22,6 +22,7 @@ #include <asm/tlbflush.h> #include <asm/pgtable-bits.h> +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #define FIRST_USER_ADDRESS 0UL diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h index 3567aa7be555..ff97374ca069 100644 --- a/arch/openrisc/include/asm/pgtable.h +++ b/arch/openrisc/include/asm/pgtable.h @@ -25,6 +25,7 @@ #ifndef __ASM_OPENRISC_PGTABLE_H #define __ASM_OPENRISC_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 012223638815..26ed228d4dc6 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -1,6 +1,7 @@ #ifndef _ASM_POWERPC_BOOK3S_32_PGTABLE_H #define _ASM_POWERPC_BOOK3S_32_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #include <asm/book3s/32/hash.h> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index ec1e731e6a2d..8f4d41936e5a 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1,9 +1,12 @@ #ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ #define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ +#include <asm-generic/5level-fixup.h> + #ifndef __ASSEMBLY__ #include <linux/mmdebug.h> #endif + /* * Common bits between hash and Radix page table */ diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index ba9921bf202e..5134ade2e850 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -1,6 +1,7 @@ #ifndef _ASM_POWERPC_NOHASH_32_PGTABLE_H #define _ASM_POWERPC_NOHASH_32_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h index d0db98793dd8..9f4de0a1035e 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h @@ -1,5 +1,8 @@ #ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H #define _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H + +#include <asm-generic/5level-fixup.h> + /* * Entries per page directory level. The PTE level must use a 64b record * for each page table entry. The PMD and PGD level use a 32b record for diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h index 55b28ef3409a..1facb584dd29 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h @@ -1,6 +1,7 @@ #ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H #define _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 7ed1972b1920..93e37b12e882 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -24,6 +24,7 @@ * the S390 page table tree. */ #ifndef __ASSEMBLY__ +#include <asm-generic/5level-fixup.h> #include <linux/sched.h> #include <linux/mm_types.h> #include <linux/page-flags.h> diff --git a/arch/score/include/asm/pgtable.h b/arch/score/include/asm/pgtable.h index 0553e5cd5985..46ff8fd678a7 100644 --- a/arch/score/include/asm/pgtable.h +++ b/arch/score/include/asm/pgtable.h @@ -2,6 +2,7 @@ #define _ASM_SCORE_PGTABLE_H #include <linux/const.h> +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #include <asm/fixmap.h> diff --git a/arch/sh/include/asm/pgtable-2level.h b/arch/sh/include/asm/pgtable-2level.h index 19bd89db17e7..f75cf4387257 100644 --- a/arch/sh/include/asm/pgtable-2level.h +++ b/arch/sh/include/asm/pgtable-2level.h @@ -1,6 +1,7 @@ #ifndef __ASM_SH_PGTABLE_2LEVEL_H #define __ASM_SH_PGTABLE_2LEVEL_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> /* diff --git a/arch/sh/include/asm/pgtable-3level.h b/arch/sh/include/asm/pgtable-3level.h index 249a985d9648..9b1e776eca31 100644 --- a/arch/sh/include/asm/pgtable-3level.h +++ b/arch/sh/include/asm/pgtable-3level.h @@ -1,6 +1,7 @@ #ifndef __ASM_SH_PGTABLE_3LEVEL_H #define __ASM_SH_PGTABLE_3LEVEL_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> /* diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 56e49c8f770d..8a598528ec1f 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -12,6 +12,7 @@ * the SpitFire page tables. */ +#include <asm-generic/5level-fixup.h> #include <linux/compiler.h> #include <linux/const.h> #include <asm/types.h> diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h index d26a42279036..5f8c615cb5e9 100644 --- a/arch/tile/include/asm/pgtable_32.h +++ b/arch/tile/include/asm/pgtable_32.h @@ -74,6 +74,7 @@ extern unsigned long VMALLOC_RESERVE /* = CONFIG_VMALLOC_RESERVE */; #define MAXMEM (_VMALLOC_START - PAGE_OFFSET) /* We have no pmd or pud since we are strictly a two-level page table */ +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> static inline int pud_huge_page(pud_t pud) { return 0; } diff --git a/arch/tile/include/asm/pgtable_64.h b/arch/tile/include/asm/pgtable_64.h index e96cec52f6d8..96fe58b45118 100644 --- a/arch/tile/include/asm/pgtable_64.h +++ b/arch/tile/include/asm/pgtable_64.h @@ -59,6 +59,7 @@ #ifndef __ASSEMBLY__ /* We have no pud since we are a three-level page table. */ +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> /* diff --git a/arch/um/include/asm/pgtable-2level.h b/arch/um/include/asm/pgtable-2level.h index cfbe59752469..179c0ea87a0c 100644 --- a/arch/um/include/asm/pgtable-2level.h +++ b/arch/um/include/asm/pgtable-2level.h @@ -8,6 +8,7 @@ #ifndef __UM_PGTABLE_2LEVEL_H #define __UM_PGTABLE_2LEVEL_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> /* PGDIR_SHIFT determines what a third-level page table entry can map */ diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h index bae8523a162f..c4d876dfb9ac 100644 --- a/arch/um/include/asm/pgtable-3level.h +++ b/arch/um/include/asm/pgtable-3level.h @@ -7,6 +7,7 @@ #ifndef __UM_PGTABLE_3LEVEL_H #define __UM_PGTABLE_3LEVEL_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> /* PGDIR_SHIFT determines what a third-level page table entry can map */ diff --git a/arch/unicore32/include/asm/pgtable.h b/arch/unicore32/include/asm/pgtable.h index 818d0f5598e3..a4f2bef37e70 100644 --- a/arch/unicore32/include/asm/pgtable.h +++ b/arch/unicore32/include/asm/pgtable.h @@ -12,6 +12,7 @@ #ifndef __UNICORE_PGTABLE_H__ #define __UNICORE_PGTABLE_H__ +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #include <asm/cpu-single.h> diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 4e7772387c6e..b04bb6dfed7f 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -289,7 +289,8 @@ #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ -#define X86_FEATURE_RDPID (16*32+ 22) /* RDPID instruction */ +#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ +#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 8b4de22d6429..62484333673d 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -273,6 +273,8 @@ static inline pgdval_t pgd_flags(pgd_t pgd) } #if CONFIG_PGTABLE_LEVELS > 3 +#include <asm-generic/5level-fixup.h> + typedef struct { pudval_t pud; } pud_t; static inline pud_t native_make_pud(pmdval_t val) @@ -285,6 +287,7 @@ static inline pudval_t native_pud_val(pud_t pud) return pud.pud; } #else +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> static inline pudval_t native_pud_val(pud_t pud) @@ -306,6 +309,7 @@ static inline pmdval_t native_pmd_val(pmd_t pmd) return pmd.pmd; } #else +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> static inline pmdval_t native_pmd_val(pmd_t pmd) diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index 8aa0e0d9cbb2..30dd5b2e4ad5 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -11,6 +11,7 @@ #ifndef _XTENSA_PGTABLE_H #define _XTENSA_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #include <asm/page.h> #include <asm/kmem_layout.h> diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c index 6fb773dbcd0c..93be82fc338a 100644 --- a/drivers/misc/sgi-gru/grufault.c +++ b/drivers/misc/sgi-gru/grufault.c @@ -219,15 +219,20 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr, int write, unsigned long *paddr, int *pageshift) { pgd_t *pgdp; - pmd_t *pmdp; + p4d_t *p4dp; pud_t *pudp; + pmd_t *pmdp; pte_t pte; pgdp = pgd_offset(vma->vm_mm, vaddr); if (unlikely(pgd_none(*pgdp))) goto err; - pudp = pud_offset(pgdp, vaddr); + p4dp = p4d_offset(pgdp, vaddr); + if (unlikely(p4d_none(*p4dp))) + goto err; + + pudp = pud_offset(p4dp, vaddr); if (unlikely(pud_none(*pudp))) goto err; diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 2bb1c72380f2..1d227b0fcf49 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -265,6 +265,7 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx, { struct mm_struct *mm = ctx->mm; pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd, _pmd; pte_t *pte; @@ -275,7 +276,10 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx, pgd = pgd_offset(mm, address); if (!pgd_present(*pgd)) goto out; - pud = pud_offset(pgd, address); + p4d = p4d_offset(pgd, address); + if (!p4d_present(*p4d)) + goto out; + pud = pud_offset(p4d, address); if (!pud_present(*pud)) goto out; pmd = pmd_offset(pud, address); diff --git a/include/asm-generic/4level-fixup.h b/include/asm-generic/4level-fixup.h index 5bdab6bffd23..928fd66b1271 100644 --- a/include/asm-generic/4level-fixup.h +++ b/include/asm-generic/4level-fixup.h @@ -15,7 +15,6 @@ ((unlikely(pgd_none(*(pud))) && __pmd_alloc(mm, pud, address))? \ NULL: pmd_offset(pud, address)) -#define pud_alloc(mm, pgd, address) (pgd) #define pud_offset(pgd, start) (pgd) #define pud_none(pud) 0 #define pud_bad(pud) 0 @@ -35,4 +34,6 @@ #undef pud_addr_end #define pud_addr_end(addr, end) (end) +#include <asm-generic/5level-fixup.h> + #endif diff --git a/include/asm-generic/5level-fixup.h b/include/asm-generic/5level-fixup.h new file mode 100644 index 000000000000..b5ca82dc4175 --- /dev/null +++ b/include/asm-generic/5level-fixup.h @@ -0,0 +1,41 @@ +#ifndef _5LEVEL_FIXUP_H +#define _5LEVEL_FIXUP_H + +#define __ARCH_HAS_5LEVEL_HACK +#define __PAGETABLE_P4D_FOLDED + +#define P4D_SHIFT PGDIR_SHIFT +#define P4D_SIZE PGDIR_SIZE +#define P4D_MASK PGDIR_MASK +#define PTRS_PER_P4D 1 + +#define p4d_t pgd_t + +#define pud_alloc(mm, p4d, address) \ + ((unlikely(pgd_none(*(p4d))) && __pud_alloc(mm, p4d, address)) ? \ + NULL : pud_offset(p4d, address)) + +#define p4d_alloc(mm, pgd, address) (pgd) +#define p4d_offset(pgd, start) (pgd) +#define p4d_none(p4d) 0 +#define p4d_bad(p4d) 0 +#define p4d_present(p4d) 1 +#define p4d_ERROR(p4d) do { } while (0) +#define p4d_clear(p4d) pgd_clear(p4d) +#define p4d_val(p4d) pgd_val(p4d) +#define p4d_populate(mm, p4d, pud) pgd_populate(mm, p4d, pud) +#define p4d_page(p4d) pgd_page(p4d) +#define p4d_page_vaddr(p4d) pgd_page_vaddr(p4d) + +#define __p4d(x) __pgd(x) +#define set_p4d(p4dp, p4d) set_pgd(p4dp, p4d) + +#undef p4d_free_tlb +#define p4d_free_tlb(tlb, x, addr) do { } while (0) +#define p4d_free(mm, x) do { } while (0) +#define __p4d_free_tlb(tlb, x, addr) do { } while (0) + +#undef p4d_addr_end +#define p4d_addr_end(addr, end) (end) + +#endif diff --git a/include/asm-generic/pgtable-nop4d-hack.h b/include/asm-generic/pgtable-nop4d-hack.h new file mode 100644 index 000000000000..752fb7511750 --- /dev/null +++ b/include/asm-generic/pgtable-nop4d-hack.h @@ -0,0 +1,62 @@ +#ifndef _PGTABLE_NOP4D_HACK_H +#define _PGTABLE_NOP4D_HACK_H + +#ifndef __ASSEMBLY__ +#include <asm-generic/5level-fixup.h> + +#define __PAGETABLE_PUD_FOLDED + +/* + * Having the pud type consist of a pgd gets the size right, and allows + * us to conceptually access the pgd entry that this pud is folded into + * without casting. + */ +typedef struct { pgd_t pgd; } pud_t; + +#define PUD_SHIFT PGDIR_SHIFT +#define PTRS_PER_PUD 1 +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) + +/* + * The "pgd_xxx()" functions here are trivial for a folded two-level + * setup: the pud is never bad, and a pud always exists (as it's folded + * into the pgd entry) + */ +static inline int pgd_none(pgd_t pgd) { return 0; } +static inline int pgd_bad(pgd_t pgd) { return 0; } +static inline int pgd_present(pgd_t pgd) { return 1; } +static inline void pgd_clear(pgd_t *pgd) { } +#define pud_ERROR(pud) (pgd_ERROR((pud).pgd)) + +#define pgd_populate(mm, pgd, pud) do { } while (0) +/* + * (puds are folded into pgds so this doesn't get actually called, + * but the define is needed for a generic inline function.) + */ +#define set_pgd(pgdptr, pgdval) set_pud((pud_t *)(pgdptr), (pud_t) { pgdval }) + +static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) +{ + return (pud_t *)pgd; +} + +#define pud_val(x) (pgd_val((x).pgd)) +#define __pud(x) ((pud_t) { __pgd(x) }) + +#define pgd_page(pgd) (pud_page((pud_t){ pgd })) +#define pgd_page_vaddr(pgd) (pud_page_vaddr((pud_t){ pgd })) + +/* + * allocating and freeing a pud is trivial: the 1-entry pud is + * inside the pgd, so has no extra memory associated with it. + */ +#define pud_alloc_one(mm, address) NULL +#define pud_free(mm, x) do { } while (0) +#define __pud_free_tlb(tlb, x, a) do { } while (0) + +#undef pud_addr_end +#define pud_addr_end(addr, end) (end) + +#endif /* __ASSEMBLY__ */ +#endif /* _PGTABLE_NOP4D_HACK_H */ diff --git a/include/asm-generic/pgtable-nop4d.h b/include/asm-generic/pgtable-nop4d.h new file mode 100644 index 000000000000..de364ecb8df6 --- /dev/null +++ b/include/asm-generic/pgtable-nop4d.h @@ -0,0 +1,56 @@ +#ifndef _PGTABLE_NOP4D_H +#define _PGTABLE_NOP4D_H + +#ifndef __ASSEMBLY__ + +#define __PAGETABLE_P4D_FOLDED + +typedef struct { pgd_t pgd; } p4d_t; + +#define P4D_SHIFT PGDIR_SHIFT +#define PTRS_PER_P4D 1 +#define P4D_SIZE (1UL << P4D_SHIFT) +#define P4D_MASK (~(P4D_SIZE-1)) + +/* + * The "pgd_xxx()" functions here are trivial for a folded two-level + * setup: the p4d is never bad, and a p4d always exists (as it's folded + * into the pgd entry) + */ +static inline int pgd_none(pgd_t pgd) { return 0; } +static inline int pgd_bad(pgd_t pgd) { return 0; } +static inline int pgd_present(pgd_t pgd) { return 1; } +static inline void pgd_clear(pgd_t *pgd) { } +#define p4d_ERROR(p4d) (pgd_ERROR((p4d).pgd)) + +#define pgd_populate(mm, pgd, p4d) do { } while (0) +/* + * (p4ds are folded into pgds so this doesn't get actually called, + * but the define is needed for a generic inline function.) + */ +#define set_pgd(pgdptr, pgdval) set_p4d((p4d_t *)(pgdptr), (p4d_t) { pgdval }) + +static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) +{ + return (p4d_t *)pgd; +} + +#define p4d_val(x) (pgd_val((x).pgd)) +#define __p4d(x) ((p4d_t) { __pgd(x) }) + +#define pgd_page(pgd) (p4d_page((p4d_t){ pgd })) +#define pgd_page_vaddr(pgd) (p4d_page_vaddr((p4d_t){ pgd })) + +/* + * allocating and freeing a p4d is trivial: the 1-entry p4d is + * inside the pgd, so has no extra memory associated with it. + */ +#define p4d_alloc_one(mm, address) NULL +#define p4d_free(mm, x) do { } while (0) +#define __p4d_free_tlb(tlb, x, a) do { } while (0) + +#undef p4d_addr_end +#define p4d_addr_end(addr, end) (end) + +#endif /* __ASSEMBLY__ */ +#endif /* _PGTABLE_NOP4D_H */ diff --git a/include/asm-generic/pgtable-nopud.h b/include/asm-generic/pgtable-nopud.h index 810431d8351b..c2b9b96d6268 100644 --- a/include/asm-generic/pgtable-nopud.h +++ b/include/asm-generic/pgtable-nopud.h @@ -3,52 +3,57 @@ #ifndef __ASSEMBLY__ +#ifdef __ARCH_USE_5LEVEL_HACK +#include <asm-generic/pgtable-nop4d-hack.h> +#else +#include <asm-generic/pgtable-nop4d.h> + #define __PAGETABLE_PUD_FOLDED /* - * Having the pud type consist of a pgd gets the size right, and allows - * us to conceptually access the pgd entry that this pud is folded into + * Having the pud type consist of a p4d gets the size right, and allows + * us to conceptually access the p4d entry that this pud is folded into * without casting. */ -typedef struct { pgd_t pgd; } pud_t; +typedef struct { p4d_t p4d; } pud_t; -#define PUD_SHIFT PGDIR_SHIFT +#define PUD_SHIFT P4D_SHIFT #define PTRS_PER_PUD 1 #define PUD_SIZE (1UL << PUD_SHIFT) #define PUD_MASK (~(PUD_SIZE-1)) /* - * The "pgd_xxx()" functions here are trivial for a folded two-level + * The "p4d_xxx()" functions here are trivial for a folded two-level * setup: the pud is never bad, and a pud always exists (as it's folded - * into the pgd entry) + * into the p4d entry) */ -static inline int pgd_none(pgd_t pgd) { return 0; } -static inline int pgd_bad(pgd_t pgd) { return 0; } -static inline int pgd_present(pgd_t pgd) { return 1; } -static inline void pgd_clear(pgd_t *pgd) { } -#define pud_ERROR(pud) (pgd_ERROR((pud).pgd)) +static inline int p4d_none(p4d_t p4d) { return 0; } +static inline int p4d_bad(p4d_t p4d) { return 0; } +static inline int p4d_present(p4d_t p4d) { return 1; } +static inline void p4d_clear(p4d_t *p4d) { } +#define pud_ERROR(pud) (p4d_ERROR((pud).p4d)) -#define pgd_populate(mm, pgd, pud) do { } while (0) +#define p4d_populate(mm, p4d, pud) do { } while (0) /* - * (puds are folded into pgds so this doesn't get actually called, + * (puds are folded into p4ds so this doesn't get actually called, * but the define is needed for a generic inline function.) */ -#define set_pgd(pgdptr, pgdval) set_pud((pud_t *)(pgdptr), (pud_t) { pgdval }) +#define set_p4d(p4dptr, p4dval) set_pud((pud_t *)(p4dptr), (pud_t) { p4dval }) -static inline pud_t * pud_offset(pgd_t * pgd, unsigned long address) +static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) { - return (pud_t *)pgd; + return (pud_t *)p4d; } -#define pud_val(x) (pgd_val((x).pgd)) -#define __pud(x) ((pud_t) { __pgd(x) } ) +#define pud_val(x) (p4d_val((x).p4d)) +#define __pud(x) ((pud_t) { __p4d(x) }) -#define pgd_page(pgd) (pud_page((pud_t){ pgd })) -#define pgd_page_vaddr(pgd) (pud_page_vaddr((pud_t){ pgd })) +#define p4d_page(p4d) (pud_page((pud_t){ p4d })) +#define p4d_page_vaddr(p4d) (pud_page_vaddr((pud_t){ p4d })) /* * allocating and freeing a pud is trivial: the 1-entry pud is - * inside the pgd, so has no extra memory associated with it. + * inside the p4d, so has no extra memory associated with it. */ #define pud_alloc_one(mm, address) NULL #define pud_free(mm, x) do { } while (0) @@ -58,4 +63,5 @@ static inline pud_t * pud_offset(pgd_t * pgd, unsigned long address) #define pud_addr_end(addr, end) (end) #endif /* __ASSEMBLY__ */ +#endif /* !__ARCH_USE_5LEVEL_HACK */ #endif /* _PGTABLE_NOPUD_H */ diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index f4ca23b158b3..1fad160f35de 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -10,9 +10,9 @@ #include <linux/bug.h> #include <linux/errno.h> -#if 4 - defined(__PAGETABLE_PUD_FOLDED) - defined(__PAGETABLE_PMD_FOLDED) != \ - CONFIG_PGTABLE_LEVELS -#error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{PUD,PMD}_FOLDED +#if 5 - defined(__PAGETABLE_P4D_FOLDED) - defined(__PAGETABLE_PUD_FOLDED) - \ + defined(__PAGETABLE_PMD_FOLDED) != CONFIG_PGTABLE_LEVELS +#error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{P4D,PUD,PMD}_FOLDED #endif /* @@ -424,6 +424,13 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) (__boundary - 1 < (end) - 1)? __boundary: (end); \ }) +#ifndef p4d_addr_end +#define p4d_addr_end(addr, end) \ +({ unsigned long __boundary = ((addr) + P4D_SIZE) & P4D_MASK; \ + (__boundary - 1 < (end) - 1)? __boundary: (end); \ +}) +#endif + #ifndef pud_addr_end #define pud_addr_end(addr, end) \ ({ unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK; \ @@ -444,6 +451,7 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) * Do the tests inline, but report and clear the bad entry in mm/memory.c. */ void pgd_clear_bad(pgd_t *); +void p4d_clear_bad(p4d_t *); void pud_clear_bad(pud_t *); void pmd_clear_bad(pmd_t *); @@ -458,6 +466,17 @@ static inline int pgd_none_or_clear_bad(pgd_t *pgd) return 0; } +static inline int p4d_none_or_clear_bad(p4d_t *p4d) +{ + if (p4d_none(*p4d)) + return 1; + if (unlikely(p4d_bad(*p4d))) { + p4d_clear_bad(p4d); + return 1; + } + return 0; +} + static inline int pud_none_or_clear_bad(pud_t *pud) { if (pud_none(*pud)) @@ -844,11 +863,30 @@ static inline int pmd_protnone(pmd_t pmd) #endif /* CONFIG_MMU */ #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP + +#ifndef __PAGETABLE_P4D_FOLDED +int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot); +int p4d_clear_huge(p4d_t *p4d); +#else +static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot) +{ + return 0; +} +static inline int p4d_clear_huge(p4d_t *p4d) +{ + return 0; +} +#endif /* !__PAGETABLE_P4D_FOLDED */ + int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot); int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot); int pud_clear_huge(pud_t *pud); int pmd_clear_huge(pmd_t *pmd); #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ +static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot) +{ + return 0; +} static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) { return 0; @@ -857,6 +895,10 @@ static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) { return 0; } +static inline int p4d_clear_huge(p4d_t *p4d) +{ + return 0; +} static inline int pud_clear_huge(pud_t *pud) { return 0; diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 4329bc6ef04b..8afa4335e5b2 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -270,6 +270,12 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, __pte_free_tlb(tlb, ptep, address); \ } while (0) +#define pmd_free_tlb(tlb, pmdp, address) \ + do { \ + __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + __pmd_free_tlb(tlb, pmdp, address); \ + } while (0) + #ifndef __ARCH_HAS_4LEVEL_HACK #define pud_free_tlb(tlb, pudp, address) \ do { \ @@ -278,11 +284,13 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, } while (0) #endif -#define pmd_free_tlb(tlb, pmdp, address) \ +#ifndef __ARCH_HAS_5LEVEL_HACK +#define p4d_free_tlb(tlb, pudp, address) \ do { \ - __tlb_adjust_range(tlb, address, PAGE_SIZE); \ - __pmd_free_tlb(tlb, pmdp, address); \ + __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + __p4d_free_tlb(tlb, pudp, address); \ } while (0) +#endif #define tlb_migrate_finish(mm) do {} while (0) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 503099d8aada..b857fc8cc2ec 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -122,7 +122,7 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, pud_t *pud, int flags); int pmd_huge(pmd_t pmd); -int pud_huge(pud_t pmd); +int pud_huge(pud_t pud); unsigned long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot); @@ -197,6 +197,9 @@ static inline void __unmap_hugepage_range(struct mmu_gather *tlb, #ifndef pgd_huge #define pgd_huge(x) 0 #endif +#ifndef p4d_huge +#define p4d_huge(x) 0 +#endif #ifndef pgd_write static inline int pgd_write(pgd_t pgd) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index ceb3fe78a0d3..1c823bef4c15 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -18,6 +18,7 @@ extern unsigned char kasan_zero_page[PAGE_SIZE]; extern pte_t kasan_zero_pte[PTRS_PER_PTE]; extern pmd_t kasan_zero_pmd[PTRS_PER_PMD]; extern pud_t kasan_zero_pud[PTRS_PER_PUD]; +extern p4d_t kasan_zero_p4d[PTRS_PER_P4D]; void kasan_populate_zero_shadow(const void *shadow_start, const void *shadow_end); diff --git a/include/linux/mm.h b/include/linux/mm.h index 0d65dd72c0f4..5f01c88f0800 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1560,14 +1560,24 @@ static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, return ptep; } +#ifdef __PAGETABLE_P4D_FOLDED +static inline int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, + unsigned long address) +{ + return 0; +} +#else +int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); +#endif + #ifdef __PAGETABLE_PUD_FOLDED -static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, +static inline int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address) { return 0; } #else -int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); +int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address); #endif #if defined(__PAGETABLE_PMD_FOLDED) || !defined(CONFIG_MMU) @@ -1619,11 +1629,22 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address); * Remove it when 4level-fixup.h has been removed. */ #if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK) -static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) + +#ifndef __ARCH_HAS_5LEVEL_HACK +static inline p4d_t *p4d_alloc(struct mm_struct *mm, pgd_t *pgd, + unsigned long address) +{ + return (unlikely(pgd_none(*pgd)) && __p4d_alloc(mm, pgd, address)) ? + NULL : p4d_offset(pgd, address); +} + +static inline pud_t *pud_alloc(struct mm_struct *mm, p4d_t *p4d, + unsigned long address) { - return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))? - NULL: pud_offset(pgd, address); + return (unlikely(p4d_none(*p4d)) && __pud_alloc(mm, p4d, address)) ? + NULL : pud_offset(p4d, address); } +#endif /* !__ARCH_HAS_5LEVEL_HACK */ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) { @@ -2385,7 +2406,8 @@ void sparse_mem_maps_populate_node(struct page **map_map, struct page *sparse_mem_map_populate(unsigned long pnum, int nid); pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); -pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node); +p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node); +pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node); pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node); pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node); void *vmemmap_alloc_block(unsigned long size, int node); diff --git a/lib/ioremap.c b/lib/ioremap.c index a3e14ce92a56..4bb30206b942 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -14,6 +14,7 @@ #include <asm/pgtable.h> #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP +static int __read_mostly ioremap_p4d_capable; static int __read_mostly ioremap_pud_capable; static int __read_mostly ioremap_pmd_capable; static int __read_mostly ioremap_huge_disabled; @@ -35,6 +36,11 @@ void __init ioremap_huge_init(void) } } +static inline int ioremap_p4d_enabled(void) +{ + return ioremap_p4d_capable; +} + static inline int ioremap_pud_enabled(void) { return ioremap_pud_capable; @@ -46,6 +52,7 @@ static inline int ioremap_pmd_enabled(void) } #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ +static inline int ioremap_p4d_enabled(void) { return 0; } static inline int ioremap_pud_enabled(void) { return 0; } static inline int ioremap_pmd_enabled(void) { return 0; } #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ @@ -94,14 +101,14 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, return 0; } -static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, +static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { pud_t *pud; unsigned long next; phys_addr -= addr; - pud = pud_alloc(&init_mm, pgd, addr); + pud = pud_alloc(&init_mm, p4d, addr); if (!pud) return -ENOMEM; do { @@ -120,6 +127,32 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, return 0; } +static inline int ioremap_p4d_range(pgd_t *pgd, unsigned long addr, + unsigned long end, phys_addr_t phys_addr, pgprot_t prot) +{ + p4d_t *p4d; + unsigned long next; + + phys_addr -= addr; + p4d = p4d_alloc(&init_mm, pgd, addr); + if (!p4d) + return -ENOMEM; + do { + next = p4d_addr_end(addr, end); + + if (ioremap_p4d_enabled() && + ((next - addr) == P4D_SIZE) && + IS_ALIGNED(phys_addr + addr, P4D_SIZE)) { + if (p4d_set_huge(p4d, phys_addr + addr, prot)) + continue; + } + + if (ioremap_pud_range(p4d, addr, next, phys_addr + addr, prot)) + return -ENOMEM; + } while (p4d++, addr = next, addr != end); + return 0; +} + int ioremap_page_range(unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { @@ -135,7 +168,7 @@ int ioremap_page_range(unsigned long addr, pgd = pgd_offset_k(addr); do { next = pgd_addr_end(addr, end); - err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, prot); + err = ioremap_p4d_range(pgd, addr, next, phys_addr+addr, prot); if (err) break; } while (pgd++, addr = next, addr != end); @@ -226,6 +226,7 @@ struct page *follow_page_mask(struct vm_area_struct *vma, unsigned int *page_mask) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; spinlock_t *ptl; @@ -243,8 +244,13 @@ struct page *follow_page_mask(struct vm_area_struct *vma, pgd = pgd_offset(mm, address); if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) return no_page_table(vma, flags); - - pud = pud_offset(pgd, address); + p4d = p4d_offset(pgd, address); + if (p4d_none(*p4d)) + return no_page_table(vma, flags); + BUILD_BUG_ON(p4d_huge(*p4d)); + if (unlikely(p4d_bad(*p4d))) + return no_page_table(vma, flags); + pud = pud_offset(p4d, address); if (pud_none(*pud)) return no_page_table(vma, flags); if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) { @@ -325,6 +331,7 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address, struct page **page) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -338,7 +345,9 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address, else pgd = pgd_offset_gate(mm, address); BUG_ON(pgd_none(*pgd)); - pud = pud_offset(pgd, address); + p4d = p4d_offset(pgd, address); + BUG_ON(p4d_none(*p4d)); + pud = pud_offset(p4d, address); BUG_ON(pud_none(*pud)); pmd = pmd_offset(pud, address); if (pmd_none(*pmd)) @@ -1400,13 +1409,13 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, return 1; } -static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, +static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { unsigned long next; pud_t *pudp; - pudp = pud_offset(&pgd, addr); + pudp = pud_offset(&p4d, addr); do { pud_t pud = READ_ONCE(*pudp); @@ -1428,6 +1437,31 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, return 1; } +static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) +{ + unsigned long next; + p4d_t *p4dp; + + p4dp = p4d_offset(&pgd, addr); + do { + p4d_t p4d = READ_ONCE(*p4dp); + + next = p4d_addr_end(addr, end); + if (p4d_none(p4d)) + return 0; + BUILD_BUG_ON(p4d_huge(p4d)); + if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) { + if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr, + P4D_SHIFT, next, write, pages, nr)) + return 0; + } else if (!gup_p4d_range(p4d, addr, next, write, pages, nr)) + return 0; + } while (p4dp++, addr = next, addr != end); + + return 1; +} + /* * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to * the regular GUP. It will only return non-negative values. @@ -1478,7 +1512,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr, PGDIR_SHIFT, next, write, pages, &nr)) break; - } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) + } else if (!gup_p4d_range(pgd, addr, next, write, pages, &nr)) break; } while (pgdp++, addr = next, addr != end); local_irq_restore(flags); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 8f037e256c54..1ebc93e179f3 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2048,6 +2048,7 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, bool freeze, struct page *page) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; @@ -2055,7 +2056,11 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, if (!pgd_present(*pgd)) return; - pud = pud_offset(pgd, address); + p4d = p4d_offset(pgd, address); + if (!p4d_present(*p4d)) + return; + + pud = pud_offset(p4d, address); if (!pud_present(*pud)) return; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a7aa811b7d14..3d0aab9ee80d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4555,7 +4555,8 @@ out: int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) { pgd_t *pgd = pgd_offset(mm, *addr); - pud_t *pud = pud_offset(pgd, *addr); + p4d_t *p4d = p4d_offset(pgd, *addr); + pud_t *pud = pud_offset(p4d, *addr); BUG_ON(page_count(virt_to_page(ptep)) == 0); if (page_count(virt_to_page(ptep)) == 1) @@ -4586,11 +4587,13 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pte_t *pte = NULL; pgd = pgd_offset(mm, addr); - pud = pud_alloc(mm, pgd, addr); + p4d = p4d_offset(pgd, addr); + pud = pud_alloc(mm, p4d, addr); if (pud) { if (sz == PUD_SIZE) { pte = (pte_t *)pud; @@ -4610,18 +4613,22 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; - pmd_t *pmd = NULL; + pmd_t *pmd; pgd = pgd_offset(mm, addr); - if (pgd_present(*pgd)) { - pud = pud_offset(pgd, addr); - if (pud_present(*pud)) { - if (pud_huge(*pud)) - return (pte_t *)pud; - pmd = pmd_offset(pud, addr); - } - } + if (!pgd_present(*pgd)) + return NULL; + p4d = p4d_offset(pgd, addr); + if (!p4d_present(*p4d)) + return NULL; + pud = pud_offset(p4d, addr); + if (!pud_present(*pud)) + return NULL; + if (pud_huge(*pud)) + return (pte_t *)pud; + pmd = pmd_offset(pud, addr); return (pte_t *) pmd; } diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c index 31238dad85fb..b96a5f773d88 100644 --- a/mm/kasan/kasan_init.c +++ b/mm/kasan/kasan_init.c @@ -30,6 +30,9 @@ */ unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss; +#if CONFIG_PGTABLE_LEVELS > 4 +p4d_t kasan_zero_p4d[PTRS_PER_P4D] __page_aligned_bss; +#endif #if CONFIG_PGTABLE_LEVELS > 3 pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss; #endif @@ -82,10 +85,10 @@ static void __init zero_pmd_populate(pud_t *pud, unsigned long addr, } while (pmd++, addr = next, addr != end); } -static void __init zero_pud_populate(pgd_t *pgd, unsigned long addr, +static void __init zero_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end) { - pud_t *pud = pud_offset(pgd, addr); + pud_t *pud = pud_offset(p4d, addr); unsigned long next; do { @@ -107,6 +110,23 @@ static void __init zero_pud_populate(pgd_t *pgd, unsigned long addr, } while (pud++, addr = next, addr != end); } +static void __init zero_p4d_populate(pgd_t *pgd, unsigned long addr, + unsigned long end) +{ + p4d_t *p4d = p4d_offset(pgd, addr); + unsigned long next; + + do { + next = p4d_addr_end(addr, end); + + if (p4d_none(*p4d)) { + p4d_populate(&init_mm, p4d, + early_alloc(PAGE_SIZE, NUMA_NO_NODE)); + } + zero_pud_populate(p4d, addr, next); + } while (p4d++, addr = next, addr != end); +} + /** * kasan_populate_zero_shadow - populate shadow memory region with * kasan_zero_page @@ -125,6 +145,7 @@ void __init kasan_populate_zero_shadow(const void *shadow_start, next = pgd_addr_end(addr, end); if (IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) { + p4d_t *p4d; pud_t *pud; pmd_t *pmd; @@ -135,9 +156,22 @@ void __init kasan_populate_zero_shadow(const void *shadow_start, * 3,2 - level page tables where we don't have * puds,pmds, so pgd_populate(), pud_populate() * is noops. + * + * The ifndef is required to avoid build breakage. + * + * With 5level-fixup.h, pgd_populate() is not nop and + * we reference kasan_zero_p4d. It's not defined + * unless 5-level paging enabled. + * + * The ifndef can be dropped once all KASAN-enabled + * architectures will switch to pgtable-nop4d.h. */ - pgd_populate(&init_mm, pgd, lm_alias(kasan_zero_pud)); - pud = pud_offset(pgd, addr); +#ifndef __ARCH_HAS_5LEVEL_HACK + pgd_populate(&init_mm, pgd, lm_alias(kasan_zero_p4d)); +#endif + p4d = p4d_offset(pgd, addr); + p4d_populate(&init_mm, p4d, lm_alias(kasan_zero_pud)); + pud = pud_offset(p4d, addr); pud_populate(&init_mm, pud, lm_alias(kasan_zero_pmd)); pmd = pmd_offset(pud, addr); pmd_populate_kernel(&init_mm, pmd, lm_alias(kasan_zero_pte)); @@ -148,6 +182,6 @@ void __init kasan_populate_zero_shadow(const void *shadow_start, pgd_populate(&init_mm, pgd, early_alloc(PAGE_SIZE, NUMA_NO_NODE)); } - zero_pud_populate(pgd, addr, next); + zero_p4d_populate(pgd, addr, next); } while (pgd++, addr = next, addr != end); } diff --git a/mm/memory.c b/mm/memory.c index a97a4cec2e1f..235ba51b2fbf 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -445,7 +445,7 @@ static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud, mm_dec_nr_pmds(tlb->mm); } -static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, +static inline void free_pud_range(struct mmu_gather *tlb, p4d_t *p4d, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { @@ -454,7 +454,7 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, unsigned long start; start = addr; - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) @@ -462,6 +462,39 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, free_pmd_range(tlb, pud, addr, next, floor, ceiling); } while (pud++, addr = next, addr != end); + start &= P4D_MASK; + if (start < floor) + return; + if (ceiling) { + ceiling &= P4D_MASK; + if (!ceiling) + return; + } + if (end - 1 > ceiling - 1) + return; + + pud = pud_offset(p4d, start); + p4d_clear(p4d); + pud_free_tlb(tlb, pud, start); +} + +static inline void free_p4d_range(struct mmu_gather *tlb, pgd_t *pgd, + unsigned long addr, unsigned long end, + unsigned long floor, unsigned long ceiling) +{ + p4d_t *p4d; + unsigned long next; + unsigned long start; + + start = addr; + p4d = p4d_offset(pgd, addr); + do { + next = p4d_addr_end(addr, end); + if (p4d_none_or_clear_bad(p4d)) + continue; + free_pud_range(tlb, p4d, addr, next, floor, ceiling); + } while (p4d++, addr = next, addr != end); + start &= PGDIR_MASK; if (start < floor) return; @@ -473,9 +506,9 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, if (end - 1 > ceiling - 1) return; - pud = pud_offset(pgd, start); + p4d = p4d_offset(pgd, start); pgd_clear(pgd); - pud_free_tlb(tlb, pud, start); + p4d_free_tlb(tlb, p4d, start); } /* @@ -539,7 +572,7 @@ void free_pgd_range(struct mmu_gather *tlb, next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; - free_pud_range(tlb, pgd, addr, next, floor, ceiling); + free_p4d_range(tlb, pgd, addr, next, floor, ceiling); } while (pgd++, addr = next, addr != end); } @@ -658,7 +691,8 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, pte_t pte, struct page *page) { pgd_t *pgd = pgd_offset(vma->vm_mm, addr); - pud_t *pud = pud_offset(pgd, addr); + p4d_t *p4d = p4d_offset(pgd, addr); + pud_t *pud = pud_offset(p4d, addr); pmd_t *pmd = pmd_offset(pud, addr); struct address_space *mapping; pgoff_t index; @@ -1023,16 +1057,16 @@ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src } static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, - pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma, + p4d_t *dst_p4d, p4d_t *src_p4d, struct vm_area_struct *vma, unsigned long addr, unsigned long end) { pud_t *src_pud, *dst_pud; unsigned long next; - dst_pud = pud_alloc(dst_mm, dst_pgd, addr); + dst_pud = pud_alloc(dst_mm, dst_p4d, addr); if (!dst_pud) return -ENOMEM; - src_pud = pud_offset(src_pgd, addr); + src_pud = pud_offset(src_p4d, addr); do { next = pud_addr_end(addr, end); if (pud_trans_huge(*src_pud) || pud_devmap(*src_pud)) { @@ -1056,6 +1090,28 @@ static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src return 0; } +static inline int copy_p4d_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, + pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma, + unsigned long addr, unsigned long end) +{ + p4d_t *src_p4d, *dst_p4d; + unsigned long next; + + dst_p4d = p4d_alloc(dst_mm, dst_pgd, addr); + if (!dst_p4d) + return -ENOMEM; + src_p4d = p4d_offset(src_pgd, addr); + do { + next = p4d_addr_end(addr, end); + if (p4d_none_or_clear_bad(src_p4d)) + continue; + if (copy_pud_range(dst_mm, src_mm, dst_p4d, src_p4d, + vma, addr, next)) + return -ENOMEM; + } while (dst_p4d++, src_p4d++, addr = next, addr != end); + return 0; +} + int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, struct vm_area_struct *vma) { @@ -1111,7 +1167,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(src_pgd)) continue; - if (unlikely(copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd, + if (unlikely(copy_p4d_range(dst_mm, src_mm, dst_pgd, src_pgd, vma, addr, next))) { ret = -ENOMEM; break; @@ -1267,14 +1323,14 @@ next: } static inline unsigned long zap_pud_range(struct mmu_gather *tlb, - struct vm_area_struct *vma, pgd_t *pgd, + struct vm_area_struct *vma, p4d_t *p4d, unsigned long addr, unsigned long end, struct zap_details *details) { pud_t *pud; unsigned long next; - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); if (pud_trans_huge(*pud) || pud_devmap(*pud)) { @@ -1295,6 +1351,25 @@ next: return addr; } +static inline unsigned long zap_p4d_range(struct mmu_gather *tlb, + struct vm_area_struct *vma, pgd_t *pgd, + unsigned long addr, unsigned long end, + struct zap_details *details) +{ + p4d_t *p4d; + unsigned long next; + + p4d = p4d_offset(pgd, addr); + do { + next = p4d_addr_end(addr, end); + if (p4d_none_or_clear_bad(p4d)) + continue; + next = zap_pud_range(tlb, vma, p4d, addr, next, details); + } while (p4d++, addr = next, addr != end); + + return addr; +} + void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr, unsigned long end, @@ -1310,7 +1385,7 @@ void unmap_page_range(struct mmu_gather *tlb, next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; - next = zap_pud_range(tlb, vma, pgd, addr, next, details); + next = zap_p4d_range(tlb, vma, pgd, addr, next, details); } while (pgd++, addr = next, addr != end); tlb_end_vma(tlb, vma); } @@ -1465,16 +1540,24 @@ EXPORT_SYMBOL_GPL(zap_vma_ptes); pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl) { - pgd_t *pgd = pgd_offset(mm, addr); - pud_t *pud = pud_alloc(mm, pgd, addr); - if (pud) { - pmd_t *pmd = pmd_alloc(mm, pud, addr); - if (pmd) { - VM_BUG_ON(pmd_trans_huge(*pmd)); - return pte_alloc_map_lock(mm, pmd, addr, ptl); - } - } - return NULL; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset(mm, addr); + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return NULL; + pud = pud_alloc(mm, p4d, addr); + if (!pud) + return NULL; + pmd = pmd_alloc(mm, pud, addr); + if (!pmd) + return NULL; + + VM_BUG_ON(pmd_trans_huge(*pmd)); + return pte_alloc_map_lock(mm, pmd, addr, ptl); } /* @@ -1740,7 +1823,7 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud, return 0; } -static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd, +static inline int remap_pud_range(struct mm_struct *mm, p4d_t *p4d, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot) { @@ -1748,7 +1831,7 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd, unsigned long next; pfn -= addr >> PAGE_SHIFT; - pud = pud_alloc(mm, pgd, addr); + pud = pud_alloc(mm, p4d, addr); if (!pud) return -ENOMEM; do { @@ -1760,6 +1843,26 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd, return 0; } +static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd, + unsigned long addr, unsigned long end, + unsigned long pfn, pgprot_t prot) +{ + p4d_t *p4d; + unsigned long next; + + pfn -= addr >> PAGE_SHIFT; + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return -ENOMEM; + do { + next = p4d_addr_end(addr, end); + if (remap_pud_range(mm, p4d, addr, next, + pfn + (addr >> PAGE_SHIFT), prot)) + return -ENOMEM; + } while (p4d++, addr = next, addr != end); + return 0; +} + /** * remap_pfn_range - remap kernel memory to userspace * @vma: user vma to map to @@ -1816,7 +1919,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, flush_cache_range(vma, addr, end); do { next = pgd_addr_end(addr, end); - err = remap_pud_range(mm, pgd, addr, next, + err = remap_p4d_range(mm, pgd, addr, next, pfn + (addr >> PAGE_SHIFT), prot); if (err) break; @@ -1932,7 +2035,7 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, return err; } -static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd, +static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, unsigned long addr, unsigned long end, pte_fn_t fn, void *data) { @@ -1940,7 +2043,7 @@ static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd, unsigned long next; int err; - pud = pud_alloc(mm, pgd, addr); + pud = pud_alloc(mm, p4d, addr); if (!pud) return -ENOMEM; do { @@ -1952,6 +2055,26 @@ static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd, return err; } +static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd, + unsigned long addr, unsigned long end, + pte_fn_t fn, void *data) +{ + p4d_t *p4d; + unsigned long next; + int err; + + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return -ENOMEM; + do { + next = p4d_addr_end(addr, end); + err = apply_to_pud_range(mm, p4d, addr, next, fn, data); + if (err) + break; + } while (p4d++, addr = next, addr != end); + return err; +} + /* * Scan a region of virtual memory, filling in page tables as necessary * and calling a provided function on each leaf page table. @@ -1970,7 +2093,7 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, end); - err = apply_to_pud_range(mm, pgd, addr, next, fn, data); + err = apply_to_p4d_range(mm, pgd, addr, next, fn, data); if (err) break; } while (pgd++, addr = next, addr != end); @@ -3653,11 +3776,15 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, }; struct mm_struct *mm = vma->vm_mm; pgd_t *pgd; + p4d_t *p4d; int ret; pgd = pgd_offset(mm, address); + p4d = p4d_alloc(mm, pgd, address); + if (!p4d) + return VM_FAULT_OOM; - vmf.pud = pud_alloc(mm, pgd, address); + vmf.pud = pud_alloc(mm, p4d, address); if (!vmf.pud) return VM_FAULT_OOM; if (pud_none(*vmf.pud) && transparent_hugepage_enabled(vma)) { @@ -3779,12 +3906,35 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address, } EXPORT_SYMBOL_GPL(handle_mm_fault); +#ifndef __PAGETABLE_P4D_FOLDED +/* + * Allocate p4d page table. + * We've already handled the fast-path in-line. + */ +int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) +{ + p4d_t *new = p4d_alloc_one(mm, address); + if (!new) + return -ENOMEM; + + smp_wmb(); /* See comment in __pte_alloc */ + + spin_lock(&mm->page_table_lock); + if (pgd_present(*pgd)) /* Another has populated it */ + p4d_free(mm, new); + else + pgd_populate(mm, pgd, new); + spin_unlock(&mm->page_table_lock); + return 0; +} +#endif /* __PAGETABLE_P4D_FOLDED */ + #ifndef __PAGETABLE_PUD_FOLDED /* * Allocate page upper directory. * We've already handled the fast-path in-line. */ -int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) +int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address) { pud_t *new = pud_alloc_one(mm, address); if (!new) @@ -3793,10 +3943,17 @@ int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) smp_wmb(); /* See comment in __pte_alloc */ spin_lock(&mm->page_table_lock); - if (pgd_present(*pgd)) /* Another has populated it */ +#ifndef __ARCH_HAS_5LEVEL_HACK + if (p4d_present(*p4d)) /* Another has populated it */ pud_free(mm, new); else - pgd_populate(mm, pgd, new); + p4d_populate(mm, p4d, new); +#else + if (pgd_present(*p4d)) /* Another has populated it */ + pud_free(mm, new); + else + pgd_populate(mm, p4d, new); +#endif /* __ARCH_HAS_5LEVEL_HACK */ spin_unlock(&mm->page_table_lock); return 0; } @@ -3839,6 +3996,7 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address, pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *ptep; @@ -3847,7 +4005,11 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address, if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) goto out; - pud = pud_offset(pgd, address); + p4d = p4d_offset(pgd, address); + if (p4d_none(*p4d) || unlikely(p4d_bad(*p4d))) + goto out; + + pud = pud_offset(p4d, address); if (pud_none(*pud) || unlikely(pud_bad(*pud))) goto out; diff --git a/mm/mlock.c b/mm/mlock.c index 02f138244bf5..0dd9ca18e19e 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -380,6 +380,7 @@ static unsigned long __munlock_pagevec_fill(struct pagevec *pvec, pte = get_locked_pte(vma->vm_mm, start, &ptl); /* Make sure we do not cross the page table boundary */ end = pgd_addr_end(start, end); + end = p4d_addr_end(start, end); end = pud_addr_end(start, end); end = pmd_addr_end(start, end); diff --git a/mm/mprotect.c b/mm/mprotect.c index 848e946b08e5..8edd0d576254 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -193,14 +193,14 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, } static inline unsigned long change_pud_range(struct vm_area_struct *vma, - pgd_t *pgd, unsigned long addr, unsigned long end, + p4d_t *p4d, unsigned long addr, unsigned long end, pgprot_t newprot, int dirty_accountable, int prot_numa) { pud_t *pud; unsigned long next; unsigned long pages = 0; - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) @@ -212,6 +212,26 @@ static inline unsigned long change_pud_range(struct vm_area_struct *vma, return pages; } +static inline unsigned long change_p4d_range(struct vm_area_struct *vma, + pgd_t *pgd, unsigned long addr, unsigned long end, + pgprot_t newprot, int dirty_accountable, int prot_numa) +{ + p4d_t *p4d; + unsigned long next; + unsigned long pages = 0; + + p4d = p4d_offset(pgd, addr); + do { + next = p4d_addr_end(addr, end); + if (p4d_none_or_clear_bad(p4d)) + continue; + pages += change_pud_range(vma, p4d, addr, next, newprot, + dirty_accountable, prot_numa); + } while (p4d++, addr = next, addr != end); + + return pages; +} + static unsigned long change_protection_range(struct vm_area_struct *vma, unsigned long addr, unsigned long end, pgprot_t newprot, int dirty_accountable, int prot_numa) @@ -230,7 +250,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma, next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; - pages += change_pud_range(vma, pgd, addr, next, newprot, + pages += change_p4d_range(vma, pgd, addr, next, newprot, dirty_accountable, prot_numa); } while (pgd++, addr = next, addr != end); diff --git a/mm/mremap.c b/mm/mremap.c index 8233b0105c82..cd8a1b199ef9 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -32,6 +32,7 @@ static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; @@ -39,7 +40,11 @@ static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr) if (pgd_none_or_clear_bad(pgd)) return NULL; - pud = pud_offset(pgd, addr); + p4d = p4d_offset(pgd, addr); + if (p4d_none_or_clear_bad(p4d)) + return NULL; + + pud = pud_offset(p4d, addr); if (pud_none_or_clear_bad(pud)) return NULL; @@ -54,11 +59,15 @@ static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pgd = pgd_offset(mm, addr); - pud = pud_alloc(mm, pgd, addr); + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return NULL; + pud = pud_alloc(mm, p4d, addr); if (!pud) return NULL; diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index a23001a22c15..c4c9def8ffea 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -104,6 +104,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) struct mm_struct *mm = pvmw->vma->vm_mm; struct page *page = pvmw->page; pgd_t *pgd; + p4d_t *p4d; pud_t *pud; /* The only possible pmd mapping has been handled on last iteration */ @@ -133,7 +134,10 @@ restart: pgd = pgd_offset(mm, pvmw->address); if (!pgd_present(*pgd)) return false; - pud = pud_offset(pgd, pvmw->address); + p4d = p4d_offset(pgd, pvmw->address); + if (!p4d_present(*p4d)) + return false; + pud = pud_offset(p4d, pvmw->address); if (!pud_present(*pud)) return false; pvmw->pmd = pmd_offset(pud, pvmw->address); diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 03761577ae86..60f7856e508f 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -69,14 +69,14 @@ again: return err; } -static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, +static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, struct mm_walk *walk) { pud_t *pud; unsigned long next; int err = 0; - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); do { again: next = pud_addr_end(addr, end); @@ -113,6 +113,32 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, return err; } +static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + p4d_t *p4d; + unsigned long next; + int err = 0; + + p4d = p4d_offset(pgd, addr); + do { + next = p4d_addr_end(addr, end); + if (p4d_none_or_clear_bad(p4d)) { + if (walk->pte_hole) + err = walk->pte_hole(addr, next, walk); + if (err) + break; + continue; + } + if (walk->pmd_entry || walk->pte_entry) + err = walk_pud_range(p4d, addr, next, walk); + if (err) + break; + } while (p4d++, addr = next, addr != end); + + return err; +} + static int walk_pgd_range(unsigned long addr, unsigned long end, struct mm_walk *walk) { @@ -131,7 +157,7 @@ static int walk_pgd_range(unsigned long addr, unsigned long end, continue; } if (walk->pmd_entry || walk->pte_entry) - err = walk_pud_range(pgd, addr, next, walk); + err = walk_p4d_range(pgd, addr, next, walk); if (err) break; } while (pgd++, addr = next, addr != end); diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 4ed5908c65b0..c99d9512a45b 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -22,6 +22,12 @@ void pgd_clear_bad(pgd_t *pgd) pgd_clear(pgd); } +void p4d_clear_bad(p4d_t *p4d) +{ + p4d_ERROR(*p4d); + p4d_clear(p4d); +} + void pud_clear_bad(pud_t *pud) { pud_ERROR(*pud); diff --git a/mm/rmap.c b/mm/rmap.c index 250635dc47b7..49ed681ccc7b 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -684,6 +684,7 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd = NULL; pmd_t pmde; @@ -692,7 +693,11 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address) if (!pgd_present(*pgd)) goto out; - pud = pud_offset(pgd, address); + p4d = p4d_offset(pgd, address); + if (!p4d_present(*p4d)) + goto out; + + pud = pud_offset(p4d, address); if (!pud_present(*pud)) goto out; diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 574c67b663fe..a56c3989f773 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -196,9 +196,9 @@ pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node) return pmd; } -pud_t * __meminit vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node) +pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node) { - pud_t *pud = pud_offset(pgd, addr); + pud_t *pud = pud_offset(p4d, addr); if (pud_none(*pud)) { void *p = vmemmap_alloc_block(PAGE_SIZE, node); if (!p) @@ -208,6 +208,18 @@ pud_t * __meminit vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node) return pud; } +p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node) +{ + p4d_t *p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) { + void *p = vmemmap_alloc_block(PAGE_SIZE, node); + if (!p) + return NULL; + p4d_populate(&init_mm, p4d, p); + } + return p4d; +} + pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node) { pgd_t *pgd = pgd_offset_k(addr); @@ -225,6 +237,7 @@ int __meminit vmemmap_populate_basepages(unsigned long start, { unsigned long addr = start; pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -233,7 +246,10 @@ int __meminit vmemmap_populate_basepages(unsigned long start, pgd = vmemmap_pgd_populate(addr, node); if (!pgd) return -ENOMEM; - pud = vmemmap_pud_populate(pgd, addr, node); + p4d = vmemmap_p4d_populate(pgd, addr, node); + if (!p4d) + return -ENOMEM; + pud = vmemmap_pud_populate(p4d, addr, node); if (!pud) return -ENOMEM; pmd = vmemmap_pmd_populate(pud, addr, node); diff --git a/mm/swapfile.c b/mm/swapfile.c index 521ef9b6064f..178130880b90 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1517,7 +1517,7 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud, return 0; } -static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd, +static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d, unsigned long addr, unsigned long end, swp_entry_t entry, struct page *page) { @@ -1525,7 +1525,7 @@ static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd, unsigned long next; int ret; - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) @@ -1537,6 +1537,26 @@ static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd, return 0; } +static inline int unuse_p4d_range(struct vm_area_struct *vma, pgd_t *pgd, + unsigned long addr, unsigned long end, + swp_entry_t entry, struct page *page) +{ + p4d_t *p4d; + unsigned long next; + int ret; + + p4d = p4d_offset(pgd, addr); + do { + next = p4d_addr_end(addr, end); + if (p4d_none_or_clear_bad(p4d)) + continue; + ret = unuse_pud_range(vma, p4d, addr, next, entry, page); + if (ret) + return ret; + } while (p4d++, addr = next, addr != end); + return 0; +} + static int unuse_vma(struct vm_area_struct *vma, swp_entry_t entry, struct page *page) { @@ -1560,7 +1580,7 @@ static int unuse_vma(struct vm_area_struct *vma, next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; - ret = unuse_pud_range(vma, pgd, addr, next, entry, page); + ret = unuse_p4d_range(vma, pgd, addr, next, entry, page); if (ret) return ret; } while (pgd++, addr = next, addr != end); diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 479e631d43c2..8bcb501bce60 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -128,19 +128,22 @@ out_unlock: static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; - pmd_t *pmd = NULL; pgd = pgd_offset(mm, address); - pud = pud_alloc(mm, pgd, address); - if (pud) - /* - * Note that we didn't run this because the pmd was - * missing, the *pmd may be already established and in - * turn it may also be a trans_huge_pmd. - */ - pmd = pmd_alloc(mm, pud, address); - return pmd; + p4d = p4d_alloc(mm, pgd, address); + if (!p4d) + return NULL; + pud = pud_alloc(mm, p4d, address); + if (!pud) + return NULL; + /* + * Note that we didn't run this because the pmd was + * missing, the *pmd may be already established and in + * turn it may also be a trans_huge_pmd. + */ + return pmd_alloc(mm, pud, address); } #ifdef CONFIG_HUGETLB_PAGE diff --git a/mm/vmalloc.c b/mm/vmalloc.c index b4024d688f38..0dd80222b20b 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -86,12 +86,12 @@ static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end) } while (pmd++, addr = next, addr != end); } -static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end) +static void vunmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end) { pud_t *pud; unsigned long next; - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); if (pud_clear_huge(pud)) @@ -102,6 +102,22 @@ static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end) } while (pud++, addr = next, addr != end); } +static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end) +{ + p4d_t *p4d; + unsigned long next; + + p4d = p4d_offset(pgd, addr); + do { + next = p4d_addr_end(addr, end); + if (p4d_clear_huge(p4d)) + continue; + if (p4d_none_or_clear_bad(p4d)) + continue; + vunmap_pud_range(p4d, addr, next); + } while (p4d++, addr = next, addr != end); +} + static void vunmap_page_range(unsigned long addr, unsigned long end) { pgd_t *pgd; @@ -113,7 +129,7 @@ static void vunmap_page_range(unsigned long addr, unsigned long end) next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; - vunmap_pud_range(pgd, addr, next); + vunmap_p4d_range(pgd, addr, next); } while (pgd++, addr = next, addr != end); } @@ -160,13 +176,13 @@ static int vmap_pmd_range(pud_t *pud, unsigned long addr, return 0; } -static int vmap_pud_range(pgd_t *pgd, unsigned long addr, +static int vmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, int *nr) { pud_t *pud; unsigned long next; - pud = pud_alloc(&init_mm, pgd, addr); + pud = pud_alloc(&init_mm, p4d, addr); if (!pud) return -ENOMEM; do { @@ -177,6 +193,23 @@ static int vmap_pud_range(pgd_t *pgd, unsigned long addr, return 0; } +static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, + unsigned long end, pgprot_t prot, struct page **pages, int *nr) +{ + p4d_t *p4d; + unsigned long next; + + p4d = p4d_alloc(&init_mm, pgd, addr); + if (!p4d) + return -ENOMEM; + do { + next = p4d_addr_end(addr, end); + if (vmap_pud_range(p4d, addr, next, prot, pages, nr)) + return -ENOMEM; + } while (p4d++, addr = next, addr != end); + return 0; +} + /* * Set up page tables in kva (addr, end). The ptes shall have prot "prot", and * will have pfns corresponding to the "pages" array. @@ -196,7 +229,7 @@ static int vmap_page_range_noflush(unsigned long start, unsigned long end, pgd = pgd_offset_k(addr); do { next = pgd_addr_end(addr, end); - err = vmap_pud_range(pgd, addr, next, prot, pages, &nr); + err = vmap_p4d_range(pgd, addr, next, prot, pages, &nr); if (err) return err; } while (pgd++, addr = next, addr != end); @@ -237,6 +270,10 @@ struct page *vmalloc_to_page(const void *vmalloc_addr) unsigned long addr = (unsigned long) vmalloc_addr; struct page *page = NULL; pgd_t *pgd = pgd_offset_k(addr); + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *ptep, pte; /* * XXX we might need to change this if we add VIRTUAL_BUG_ON for @@ -244,21 +281,23 @@ struct page *vmalloc_to_page(const void *vmalloc_addr) */ VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr)); - if (!pgd_none(*pgd)) { - pud_t *pud = pud_offset(pgd, addr); - if (!pud_none(*pud)) { - pmd_t *pmd = pmd_offset(pud, addr); - if (!pmd_none(*pmd)) { - pte_t *ptep, pte; - - ptep = pte_offset_map(pmd, addr); - pte = *ptep; - if (pte_present(pte)) - page = pte_page(pte); - pte_unmap(ptep); - } - } - } + if (pgd_none(*pgd)) + return NULL; + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) + return NULL; + pud = pud_offset(p4d, addr); + if (pud_none(*pud)) + return NULL; + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + return NULL; + + ptep = pte_offset_map(pmd, addr); + pte = *ptep; + if (pte_present(pte)) + page = pte_page(pte); + pte_unmap(ptep); return page; } EXPORT_SYMBOL(vmalloc_to_page); |