From 7b4b425897cd582897ccc38b637ce7ab5ffc5593 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 14 Apr 2015 15:42:37 -0700 Subject: arch/sh/kernel/dwarf.c: destroy mempools on cleanup dwarf_reg_pool and dwarf_frame_pool are not properly destroyed when cleaning up the dwarf unwinder. Destroy them with mempool_destroy(). Also mark dwarf_unwinder_cleanup() as __init. Signed-off-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/kernel/dwarf.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/sh/kernel/dwarf.c b/arch/sh/kernel/dwarf.c index 67a049e75ec1..60437e9c345e 100644 --- a/arch/sh/kernel/dwarf.c +++ b/arch/sh/kernel/dwarf.c @@ -993,7 +993,7 @@ static struct unwinder dwarf_unwinder = { .rating = 150, }; -static void dwarf_unwinder_cleanup(void) +static void __init dwarf_unwinder_cleanup(void) { struct dwarf_fde *fde, *next_fde; struct dwarf_cie *cie, *next_cie; @@ -1009,6 +1009,10 @@ static void dwarf_unwinder_cleanup(void) rbtree_postorder_for_each_entry_safe(cie, next_cie, &cie_root, node) kfree(cie); + if (dwarf_reg_pool) + mempool_destroy(dwarf_reg_pool); + if (dwarf_frame_pool) + mempool_destroy(dwarf_frame_pool); kmem_cache_destroy(dwarf_reg_cachep); kmem_cache_destroy(dwarf_frame_cachep); } -- cgit v1.2.3 From 1cf370c61179e01313457363b21f0859be0d8cb7 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 14 Apr 2015 15:42:40 -0700 Subject: arch/sh/kernel/dwarf.c: use mempool_create_slab_pool() Mempools created for slab caches should use mempool_create_slab_pool(). Signed-off-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/kernel/dwarf.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/sh/kernel/dwarf.c b/arch/sh/kernel/dwarf.c index 60437e9c345e..9d209a07235e 100644 --- a/arch/sh/kernel/dwarf.c +++ b/arch/sh/kernel/dwarf.c @@ -1180,17 +1180,13 @@ static int __init dwarf_unwinder_init(void) sizeof(struct dwarf_reg), 0, SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL); - dwarf_frame_pool = mempool_create(DWARF_FRAME_MIN_REQ, - mempool_alloc_slab, - mempool_free_slab, - dwarf_frame_cachep); + dwarf_frame_pool = mempool_create_slab_pool(DWARF_FRAME_MIN_REQ, + dwarf_frame_cachep); if (!dwarf_frame_pool) goto out; - dwarf_reg_pool = mempool_create(DWARF_REG_MIN_REQ, - mempool_alloc_slab, - mempool_free_slab, - dwarf_reg_cachep); + dwarf_reg_pool = mempool_create_slab_pool(DWARF_REG_MIN_REQ, + dwarf_reg_cachep); if (!dwarf_reg_pool) goto out; -- cgit v1.2.3 From 692297d8f96887f836d9049a653ed05a71cf48fb Mon Sep 17 00:00:00 2001 From: Ulrich Obergfell Date: Tue, 14 Apr 2015 15:44:19 -0700 Subject: watchdog: introduce the hardlockup_detector_disable() function Have kvm_guest_init() use hardlockup_detector_disable() instead of watchdog_enable_hardlockup_detector(false). Remove the watchdog_hardlockup_detector_is_enabled() and the watchdog_enable_hardlockup_detector() function which are no longer needed. Signed-off-by: Ulrich Obergfell Signed-off-by: Don Zickus Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/kvm.c | 2 +- include/linux/nmi.h | 9 ++------- kernel/watchdog.c | 21 ++------------------- 3 files changed, 5 insertions(+), 27 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index e354cc6446ab..9435620062df 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -513,7 +513,7 @@ void __init kvm_guest_init(void) * can get false positives too easily, for example if the host is * overcommitted. */ - watchdog_enable_hardlockup_detector(false); + hardlockup_detector_disable(); } static noinline uint32_t __kvm_cpuid_base(void) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 0426357297d5..3d46fb4708e0 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -25,16 +25,11 @@ static inline void touch_nmi_watchdog(void) #endif #if defined(CONFIG_HARDLOCKUP_DETECTOR) -extern void watchdog_enable_hardlockup_detector(bool val); -extern bool watchdog_hardlockup_detector_is_enabled(void); +extern void hardlockup_detector_disable(void); #else -static inline void watchdog_enable_hardlockup_detector(bool val) +static inline void hardlockup_detector_disable(void) { } -static inline bool watchdog_hardlockup_detector_is_enabled(void) -{ - return true; -} #endif /* diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 49d02250aaac..f2be11ab7e08 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -83,8 +83,6 @@ static unsigned long soft_lockup_nmi_warn; #ifdef CONFIG_HARDLOCKUP_DETECTOR static int hardlockup_panic = CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE; - -static bool hardlockup_detector_enabled = true; /* * We may not want to enable hard lockup detection by default in all cases, * for example when running the kernel as a guest on a hypervisor. In these @@ -93,14 +91,9 @@ static bool hardlockup_detector_enabled = true; * kernel command line parameters are parsed, because otherwise it is not * possible to override this in hardlockup_panic_setup(). */ -void watchdog_enable_hardlockup_detector(bool val) -{ - hardlockup_detector_enabled = val; -} - -bool watchdog_hardlockup_detector_is_enabled(void) +void hardlockup_detector_disable(void) { - return hardlockup_detector_enabled; + watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; } static int __init hardlockup_panic_setup(char *str) @@ -530,15 +523,6 @@ static int watchdog_nmi_enable(unsigned int cpu) if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) goto out; - /* - * Some kernels need to default hard lockup detection to - * 'disabled', for example a guest on a hypervisor. - */ - if (!watchdog_hardlockup_detector_is_enabled()) { - event = ERR_PTR(-ENOENT); - goto handle_err; - } - /* is it already setup and enabled? */ if (event && event->state > PERF_EVENT_STATE_OFF) goto out; @@ -553,7 +537,6 @@ static int watchdog_nmi_enable(unsigned int cpu) /* Try to register using hardware perf events */ event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); -handle_err: /* save cpu0 error for future comparision */ if (cpu == 0 && IS_ERR(event)) cpu0_err = PTR_ERR(event); -- cgit v1.2.3 From f5d4547ad27737137fae8e67025e5664ecb8e790 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 14 Apr 2015 15:45:36 -0700 Subject: alpha: expose number of page table levels on Kconfig level I've implemented accounting for pmd page tables as we have for pte (see mm->nr_ptes). It's requires a new counter in mm_struct: mm->nr_pmds. But the feature doesn't make any sense if an architecture has PMD level folded and it would be nice get rid of the counter in this case. The problem is that we cannot use __PAGETABLE_PMD_FOLDED in due to circular dependencies: -> -> In most cases wants to get definition of struct page and struct vm_area_struct. I've tried to split mm_struct into separate header file to be able to user there. But it doesn't fly on some architectures, like ARM: it wants mm_struct to implement tlb flushing. I don't see how to fix it without massive de-inlining or coverting a lot for inline functions to macros. This is other approach: expose number of page tables in use via Kconfig and use it in instead of __PAGETABLE_PMD_FOLDED from . This patch (of 19): We would want to use number of page table level to define mm_struct. Let's expose it as CONFIG_PGTABLE_LEVELS. Signed-off-by: Kirill A. Shutemov Acked-by: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Tested-by: Guenter Roeck Cc: "David S. Miller" Cc: "H. Peter Anvin" Cc: "James E.J. Bottomley" Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Chris Metcalf Cc: David Howells Cc: Fenghua Yu Cc: Geert Uytterhoeven Cc: Heiko Carstens Cc: Helge Deller Cc: Ingo Molnar Cc: Jeff Dike Cc: Kirill A. Shutemov Cc: Koichi Yasutake Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Paul Mackerras Cc: Ralf Baechle Cc: Richard Weinberger Cc: Russell King Cc: Thomas Gleixner Cc: Tony Luck Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index b7ff9a318c31..bf9e9d3b3792 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -76,6 +76,10 @@ config GENERIC_ISA_DMA bool default y +config PGTABLE_LEVELS + int + default 3 + source "init/Kconfig" source "kernel/Kconfig.freezer" -- cgit v1.2.3 From 9f25e6ad58e1fb3b4d441e4c55635c4598a6fa94 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 14 Apr 2015 15:45:39 -0700 Subject: arm64: expose number of page table levels on Kconfig level We would want to use number of page table level to define mm_struct. Let's expose it as CONFIG_PGTABLE_LEVELS. ARM64_PGTABLE_LEVELS is renamed to PGTABLE_LEVELS and defined before sourcing init/Kconfig: arch/Kconfig will define default value and it's sourced from init/Kconfig. Signed-off-by: Kirill A. Shutemov Acked-by: Catalin Marinas Cc: Will Deacon Tested-by: Guenter Roeck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/Kconfig | 14 +++++++------- arch/arm64/include/asm/kvm_mmu.h | 4 ++-- arch/arm64/include/asm/page.h | 4 ++-- arch/arm64/include/asm/pgalloc.h | 8 ++++---- arch/arm64/include/asm/pgtable-hwdef.h | 6 +++--- arch/arm64/include/asm/pgtable-types.h | 12 ++++++------ arch/arm64/include/asm/pgtable.h | 8 ++++---- arch/arm64/include/asm/tlb.h | 4 ++-- arch/arm64/mm/mmu.c | 4 ++-- 9 files changed, 32 insertions(+), 32 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1b8e97331ffb..3f2fba996bc2 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -143,6 +143,13 @@ config KERNEL_MODE_NEON config FIX_EARLYCON_MEM def_bool y +config PGTABLE_LEVELS + int + default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42 + default 3 if ARM64_64K_PAGES && ARM64_VA_BITS_48 + default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39 + default 4 if ARM64_4K_PAGES && ARM64_VA_BITS_48 + source "init/Kconfig" source "kernel/Kconfig.freezer" @@ -413,13 +420,6 @@ config ARM64_VA_BITS default 42 if ARM64_VA_BITS_42 default 48 if ARM64_VA_BITS_48 -config ARM64_PGTABLE_LEVELS - int - default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42 - default 3 if ARM64_64K_PAGES && ARM64_VA_BITS_48 - default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39 - default 4 if ARM64_4K_PAGES && ARM64_VA_BITS_48 - config CPU_BIG_ENDIAN bool "Build big-endian kernel" help diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index bbfb600fa822..36250705dc4c 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -163,12 +163,12 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd) /* * If we are concatenating first level stage-2 page tables, we would have less * than or equal to 16 pointers in the fake PGD, because that's what the - * architecture allows. In this case, (4 - CONFIG_ARM64_PGTABLE_LEVELS) + * architecture allows. In this case, (4 - CONFIG_PGTABLE_LEVELS) * represents the first level for the host, and we add 1 to go to the next * level (which uses contatenation) for the stage-2 tables. */ #if PTRS_PER_S2_PGD <= 16 -#define KVM_PREALLOC_LEVEL (4 - CONFIG_ARM64_PGTABLE_LEVELS + 1) +#define KVM_PREALLOC_LEVEL (4 - CONFIG_PGTABLE_LEVELS + 1) #else #define KVM_PREALLOC_LEVEL (0) #endif diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 22b16232bd60..8fc8fa280e92 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -36,9 +36,9 @@ * for more information). */ #ifdef CONFIG_ARM64_64K_PAGES -#define SWAPPER_PGTABLE_LEVELS (CONFIG_ARM64_PGTABLE_LEVELS) +#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS) #else -#define SWAPPER_PGTABLE_LEVELS (CONFIG_ARM64_PGTABLE_LEVELS - 1) +#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1) #endif #define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE) diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index e20df38a8ff3..76420568d66a 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -28,7 +28,7 @@ #define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) -#if CONFIG_ARM64_PGTABLE_LEVELS > 2 +#if CONFIG_PGTABLE_LEVELS > 2 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { @@ -46,9 +46,9 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) set_pud(pud, __pud(__pa(pmd) | PMD_TYPE_TABLE)); } -#endif /* CONFIG_ARM64_PGTABLE_LEVELS > 2 */ +#endif /* CONFIG_PGTABLE_LEVELS > 2 */ -#if CONFIG_ARM64_PGTABLE_LEVELS > 3 +#if CONFIG_PGTABLE_LEVELS > 3 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { @@ -66,7 +66,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) set_pgd(pgd, __pgd(__pa(pud) | PUD_TYPE_TABLE)); } -#endif /* CONFIG_ARM64_PGTABLE_LEVELS > 3 */ +#endif /* CONFIG_PGTABLE_LEVELS > 3 */ extern pgd_t *pgd_alloc(struct mm_struct *mm); extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 5f930cc9ea83..80f3d241cff8 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -21,7 +21,7 @@ /* * PMD_SHIFT determines the size a level 2 page table entry can map. */ -#if CONFIG_ARM64_PGTABLE_LEVELS > 2 +#if CONFIG_PGTABLE_LEVELS > 2 #define PMD_SHIFT ((PAGE_SHIFT - 3) * 2 + 3) #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) @@ -31,7 +31,7 @@ /* * PUD_SHIFT determines the size a level 1 page table entry can map. */ -#if CONFIG_ARM64_PGTABLE_LEVELS > 3 +#if CONFIG_PGTABLE_LEVELS > 3 #define PUD_SHIFT ((PAGE_SHIFT - 3) * 3 + 3) #define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) #define PUD_MASK (~(PUD_SIZE-1)) @@ -42,7 +42,7 @@ * PGDIR_SHIFT determines the size a top-level page table entry can map * (depending on the configuration, this level can be 0, 1 or 2). */ -#define PGDIR_SHIFT ((PAGE_SHIFT - 3) * CONFIG_ARM64_PGTABLE_LEVELS + 3) +#define PGDIR_SHIFT ((PAGE_SHIFT - 3) * CONFIG_PGTABLE_LEVELS + 3) #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) #define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT)) diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h index ca9df80af896..2b1bd7e52c3b 100644 --- a/arch/arm64/include/asm/pgtable-types.h +++ b/arch/arm64/include/asm/pgtable-types.h @@ -38,13 +38,13 @@ typedef struct { pteval_t pte; } pte_t; #define pte_val(x) ((x).pte) #define __pte(x) ((pte_t) { (x) } ) -#if CONFIG_ARM64_PGTABLE_LEVELS > 2 +#if CONFIG_PGTABLE_LEVELS > 2 typedef struct { pmdval_t pmd; } pmd_t; #define pmd_val(x) ((x).pmd) #define __pmd(x) ((pmd_t) { (x) } ) #endif -#if CONFIG_ARM64_PGTABLE_LEVELS > 3 +#if CONFIG_PGTABLE_LEVELS > 3 typedef struct { pudval_t pud; } pud_t; #define pud_val(x) ((x).pud) #define __pud(x) ((pud_t) { (x) } ) @@ -64,13 +64,13 @@ typedef pteval_t pte_t; #define pte_val(x) (x) #define __pte(x) (x) -#if CONFIG_ARM64_PGTABLE_LEVELS > 2 +#if CONFIG_PGTABLE_LEVELS > 2 typedef pmdval_t pmd_t; #define pmd_val(x) (x) #define __pmd(x) (x) #endif -#if CONFIG_ARM64_PGTABLE_LEVELS > 3 +#if CONFIG_PGTABLE_LEVELS > 3 typedef pudval_t pud_t; #define pud_val(x) (x) #define __pud(x) (x) @@ -86,9 +86,9 @@ typedef pteval_t pgprot_t; #endif /* STRICT_MM_TYPECHECKS */ -#if CONFIG_ARM64_PGTABLE_LEVELS == 2 +#if CONFIG_PGTABLE_LEVELS == 2 #include -#elif CONFIG_ARM64_PGTABLE_LEVELS == 3 +#elif CONFIG_PGTABLE_LEVELS == 3 #include #endif diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 800ec0e87ed9..56283f8a675c 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -374,7 +374,7 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) */ #define mk_pte(page,prot) pfn_pte(page_to_pfn(page),prot) -#if CONFIG_ARM64_PGTABLE_LEVELS > 2 +#if CONFIG_PGTABLE_LEVELS > 2 #define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd)) @@ -409,9 +409,9 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) #define pud_page(pud) pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK)) -#endif /* CONFIG_ARM64_PGTABLE_LEVELS > 2 */ +#endif /* CONFIG_PGTABLE_LEVELS > 2 */ -#if CONFIG_ARM64_PGTABLE_LEVELS > 3 +#if CONFIG_PGTABLE_LEVELS > 3 #define pud_ERROR(pud) __pud_error(__FILE__, __LINE__, pud_val(pud)) @@ -445,7 +445,7 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr) #define pgd_page(pgd) pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK)) -#endif /* CONFIG_ARM64_PGTABLE_LEVELS > 3 */ +#endif /* CONFIG_PGTABLE_LEVELS > 3 */ #define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 53d9c354219f..3a0242c7eb8d 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -53,7 +53,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, tlb_remove_entry(tlb, pte); } -#if CONFIG_ARM64_PGTABLE_LEVELS > 2 +#if CONFIG_PGTABLE_LEVELS > 2 static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) { @@ -62,7 +62,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, } #endif -#if CONFIG_ARM64_PGTABLE_LEVELS > 3 +#if CONFIG_PGTABLE_LEVELS > 3 static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, unsigned long addr) { diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index c6daaf6c6f97..79e01163a981 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -550,10 +550,10 @@ void vmemmap_free(unsigned long start, unsigned long end) #endif /* CONFIG_SPARSEMEM_VMEMMAP */ static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; -#if CONFIG_ARM64_PGTABLE_LEVELS > 2 +#if CONFIG_PGTABLE_LEVELS > 2 static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; #endif -#if CONFIG_ARM64_PGTABLE_LEVELS > 3 +#if CONFIG_PGTABLE_LEVELS > 3 static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss; #endif -- cgit v1.2.3 From 1bcad26e9d5362d4890ab5718d729ee9cd85a493 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 14 Apr 2015 15:45:42 -0700 Subject: arm: expose number of page table levels on Kconfig level We would want to use number of page table level to define mm_struct. Let's expose it as CONFIG_PGTABLE_LEVELS. Signed-off-by: Kirill A. Shutemov Cc: Russell King Tested-by: Guenter Roeck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/Kconfig | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index cf4c0c99aa25..696cf3c61e0f 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -286,6 +286,11 @@ config GENERIC_BUG def_bool y depends on BUG +config PGTABLE_LEVELS + int + default 3 if ARM_LPAE + default 2 + source "init/Kconfig" source "kernel/Kconfig.freezer" -- cgit v1.2.3 From 4d66bcc7cf762e82703b94d416245d4a216c79ae Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 14 Apr 2015 15:45:45 -0700 Subject: ia64: expose number of page table levels on Kconfig level We would want to use number of page table level to define mm_struct. Let's expose it as CONFIG_PGTABLE_LEVELS. We need to define PGTABLE_LEVELS before sourcing init/Kconfig: arch/Kconfig will define default value and it's sourced from init/Kconfig. Signed-off-by: Kirill A. Shutemov Cc: Tony Luck Cc: Fenghua Yu Tested-by: Guenter Roeck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/Kconfig | 18 +++++------------- arch/ia64/include/asm/page.h | 4 ++-- arch/ia64/include/asm/pgalloc.h | 4 ++-- arch/ia64/include/asm/pgtable.h | 12 ++++++------ arch/ia64/kernel/ivt.S | 12 ++++++------ arch/ia64/kernel/machine_kexec.c | 4 ++-- 6 files changed, 23 insertions(+), 31 deletions(-) (limited to 'arch') diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 074e52bf815c..4f9a6661491b 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -1,3 +1,8 @@ +config PGTABLE_LEVELS + int "Page Table Levels" if !IA64_PAGE_SIZE_64KB + range 3 4 if !IA64_PAGE_SIZE_64KB + default 3 + source "init/Kconfig" source "kernel/Kconfig.freezer" @@ -286,19 +291,6 @@ config IA64_PAGE_SIZE_64KB endchoice -choice - prompt "Page Table Levels" - default PGTABLE_3 - -config PGTABLE_3 - bool "3 Levels" - -config PGTABLE_4 - depends on !IA64_PAGE_SIZE_64KB - bool "4 Levels" - -endchoice - if IA64_HP_SIM config HZ default 32 diff --git a/arch/ia64/include/asm/page.h b/arch/ia64/include/asm/page.h index 1f1bf144fe62..ec48bb9f95e1 100644 --- a/arch/ia64/include/asm/page.h +++ b/arch/ia64/include/asm/page.h @@ -173,7 +173,7 @@ get_order (unsigned long size) */ typedef struct { unsigned long pte; } pte_t; typedef struct { unsigned long pmd; } pmd_t; -#ifdef CONFIG_PGTABLE_4 +#if CONFIG_PGTABLE_LEVELS == 4 typedef struct { unsigned long pud; } pud_t; #endif typedef struct { unsigned long pgd; } pgd_t; @@ -182,7 +182,7 @@ get_order (unsigned long size) # define pte_val(x) ((x).pte) # define pmd_val(x) ((x).pmd) -#ifdef CONFIG_PGTABLE_4 +#if CONFIG_PGTABLE_LEVELS == 4 # define pud_val(x) ((x).pud) #endif # define pgd_val(x) ((x).pgd) diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h index 5767cdfc08db..f5e70e961948 100644 --- a/arch/ia64/include/asm/pgalloc.h +++ b/arch/ia64/include/asm/pgalloc.h @@ -32,7 +32,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) quicklist_free(0, NULL, pgd); } -#ifdef CONFIG_PGTABLE_4 +#if CONFIG_PGTABLE_LEVELS == 4 static inline void pgd_populate(struct mm_struct *mm, pgd_t * pgd_entry, pud_t * pud) { @@ -49,7 +49,7 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud) quicklist_free(0, NULL, pud); } #define __pud_free_tlb(tlb, pud, address) pud_free((tlb)->mm, pud) -#endif /* CONFIG_PGTABLE_4 */ +#endif /* CONFIG_PGTABLE_LEVELS == 4 */ static inline void pud_populate(struct mm_struct *mm, pud_t * pud_entry, pmd_t * pmd) diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h index 7b6f8801df57..9f3ed9ee8f13 100644 --- a/arch/ia64/include/asm/pgtable.h +++ b/arch/ia64/include/asm/pgtable.h @@ -99,7 +99,7 @@ #define PMD_MASK (~(PMD_SIZE-1)) #define PTRS_PER_PMD (1UL << (PTRS_PER_PTD_SHIFT)) -#ifdef CONFIG_PGTABLE_4 +#if CONFIG_PGTABLE_LEVELS == 4 /* * Definitions for second level: * @@ -117,7 +117,7 @@ * * PGDIR_SHIFT determines what a first-level page table entry can map. */ -#ifdef CONFIG_PGTABLE_4 +#if CONFIG_PGTABLE_LEVELS == 4 #define PGDIR_SHIFT (PUD_SHIFT + (PTRS_PER_PTD_SHIFT)) #else #define PGDIR_SHIFT (PMD_SHIFT + (PTRS_PER_PTD_SHIFT)) @@ -180,7 +180,7 @@ #define __S111 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RWX) #define pgd_ERROR(e) printk("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e)) -#ifdef CONFIG_PGTABLE_4 +#if CONFIG_PGTABLE_LEVELS == 4 #define pud_ERROR(e) printk("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e)) #endif #define pmd_ERROR(e) printk("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e)) @@ -281,7 +281,7 @@ extern unsigned long VMALLOC_END; #define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & _PFN_MASK)) #define pud_page(pud) virt_to_page((pud_val(pud) + PAGE_OFFSET)) -#ifdef CONFIG_PGTABLE_4 +#if CONFIG_PGTABLE_LEVELS == 4 #define pgd_none(pgd) (!pgd_val(pgd)) #define pgd_bad(pgd) (!ia64_phys_addr_valid(pgd_val(pgd))) #define pgd_present(pgd) (pgd_val(pgd) != 0UL) @@ -384,7 +384,7 @@ pgd_offset (const struct mm_struct *mm, unsigned long address) here. */ #define pgd_offset_gate(mm, addr) pgd_offset_k(addr) -#ifdef CONFIG_PGTABLE_4 +#if CONFIG_PGTABLE_LEVELS == 4 /* Find an entry in the second-level page table.. */ #define pud_offset(dir,addr) \ ((pud_t *) pgd_page_vaddr(*(dir)) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) @@ -586,7 +586,7 @@ extern struct page *zero_page_memmap_ptr; #define __HAVE_ARCH_PGD_OFFSET_GATE -#ifndef CONFIG_PGTABLE_4 +#if CONFIG_PGTABLE_LEVELS == 3 #include #endif #include diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index 18e794a57248..e42bf7a913f3 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S @@ -146,7 +146,7 @@ ENTRY(vhpt_miss) (p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=pgd_offset for region 5 (p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=pgd_offset for region[0-4] cmp.eq p7,p6=0,r21 // unused address bits all zeroes? -#ifdef CONFIG_PGTABLE_4 +#if CONFIG_PGTABLE_LEVELS == 4 shr.u r28=r22,PUD_SHIFT // shift pud index into position #else shr.u r18=r22,PMD_SHIFT // shift pmd index into position @@ -155,7 +155,7 @@ ENTRY(vhpt_miss) ld8 r17=[r17] // get *pgd (may be 0) ;; (p7) cmp.eq p6,p7=r17,r0 // was pgd_present(*pgd) == NULL? -#ifdef CONFIG_PGTABLE_4 +#if CONFIG_PGTABLE_LEVELS == 4 dep r28=r28,r17,3,(PAGE_SHIFT-3) // r28=pud_offset(pgd,addr) ;; shr.u r18=r22,PMD_SHIFT // shift pmd index into position @@ -222,13 +222,13 @@ ENTRY(vhpt_miss) */ ld8 r25=[r21] // read *pte again ld8 r26=[r17] // read *pmd again -#ifdef CONFIG_PGTABLE_4 +#if CONFIG_PGTABLE_LEVELS == 4 ld8 r19=[r28] // read *pud again #endif cmp.ne p6,p7=r0,r0 ;; cmp.ne.or.andcm p6,p7=r26,r20 // did *pmd change -#ifdef CONFIG_PGTABLE_4 +#if CONFIG_PGTABLE_LEVELS == 4 cmp.ne.or.andcm p6,p7=r19,r29 // did *pud change #endif mov r27=PAGE_SHIFT<<2 @@ -476,7 +476,7 @@ ENTRY(nested_dtlb_miss) (p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=pgd_offset for region 5 (p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=pgd_offset for region[0-4] cmp.eq p7,p6=0,r21 // unused address bits all zeroes? -#ifdef CONFIG_PGTABLE_4 +#if CONFIG_PGTABLE_LEVELS == 4 shr.u r18=r22,PUD_SHIFT // shift pud index into position #else shr.u r18=r22,PMD_SHIFT // shift pmd index into position @@ -487,7 +487,7 @@ ENTRY(nested_dtlb_miss) (p7) cmp.eq p6,p7=r17,r0 // was pgd_present(*pgd) == NULL? dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=p[u|m]d_offset(pgd,addr) ;; -#ifdef CONFIG_PGTABLE_4 +#if CONFIG_PGTABLE_LEVELS == 4 (p7) ld8 r17=[r17] // get *pud (may be 0) shr.u r18=r22,PMD_SHIFT // shift pmd index into position ;; diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c index 5151a649c96b..b72cd7a07222 100644 --- a/arch/ia64/kernel/machine_kexec.c +++ b/arch/ia64/kernel/machine_kexec.c @@ -156,9 +156,9 @@ void arch_crash_save_vmcoreinfo(void) VMCOREINFO_OFFSET(node_memblk_s, start_paddr); VMCOREINFO_OFFSET(node_memblk_s, size); #endif -#ifdef CONFIG_PGTABLE_3 +#if CONFIG_PGTABLE_LEVELS == 3 VMCOREINFO_CONFIG(PGTABLE_3); -#elif defined(CONFIG_PGTABLE_4) +#elif CONFIG_PGTABLE_LEVELS == 4 VMCOREINFO_CONFIG(PGTABLE_4); #endif } -- cgit v1.2.3 From 980d5b7387c6bbc9b411a3469274b3226b161e45 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 14 Apr 2015 15:45:48 -0700 Subject: m68k: mark PMD folded and expose number of page table levels We would want to use number of page table level to define mm_struct. Let's expose it as CONFIG_PGTABLE_LEVELS. Core mm expects __PAGETABLE_{PUD,PMD}_FOLDED to be defined if these page table levels folded. Signed-off-by: Kirill A. Shutemov Cc: Geert Uytterhoeven Tested-by: Guenter Roeck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/m68k/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 87b7c7581b1d..2dd8f63bfbbb 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -67,6 +67,10 @@ config HZ default 1000 if CLEOPATRA default 100 +config PGTABLE_LEVELS + default 2 if SUN3 || COLDFIRE + default 3 + source "init/Kconfig" source "kernel/Kconfig.freezer" -- cgit v1.2.3 From a728ab52709e2fb9659d2661ac30c901df42ef02 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 14 Apr 2015 15:45:51 -0700 Subject: mips: expose number of page table levels on Kconfig level We would want to use number of page table level to define mm_struct. Let's expose it as CONFIG_PGTABLE_LEVELS. Signed-off-by: Kirill A. Shutemov Cc: Ralf Baechle Tested-by: Guenter Roeck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/Kconfig | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index c7a16904cd03..a9d112d2a135 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2600,6 +2600,11 @@ config STACKTRACE_SUPPORT bool default y +config PGTABLE_LEVELS + int + default 3 if 64BIT && !PAGE_SIZE_64KB + default 2 + source "init/Kconfig" source "kernel/Kconfig.freezer" -- cgit v1.2.3 From f24ffde43237755b290c46306a3dd2deb1428700 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 14 Apr 2015 15:45:54 -0700 Subject: parisc: expose number of page table levels on Kconfig level We would want to use number of page table level to define mm_struct. Let's expose it as CONFIG_PGTABLE_LEVELS. Signed-off-by: Kirill A. Shutemov Cc: "James E.J. Bottomley" Cc: Helge Deller Tested-by: Guenter Roeck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/parisc/Kconfig | 5 +++++ arch/parisc/include/asm/pgalloc.h | 2 +- arch/parisc/include/asm/pgtable.h | 16 +++++++--------- arch/parisc/kernel/entry.S | 4 ++-- arch/parisc/kernel/head.S | 4 ++-- arch/parisc/mm/init.c | 2 +- 6 files changed, 18 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 8014727a2743..c36546959e86 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -103,6 +103,11 @@ config ARCH_MAY_HAVE_PC_FDC depends on BROKEN default y +config PGTABLE_LEVELS + int + default 3 if 64BIT && PARISC_PAGE_SIZE_4KB + default 2 + source "init/Kconfig" source "kernel/Kconfig.freezer" diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h index d17437238a2c..1ba29369257c 100644 --- a/arch/parisc/include/asm/pgalloc.h +++ b/arch/parisc/include/asm/pgalloc.h @@ -51,7 +51,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) free_pages((unsigned long)pgd, PGD_ALLOC_ORDER); } -#if PT_NLEVELS == 3 +#if CONFIG_PGTABLE_LEVELS == 3 /* Three Level Page Table Support for pmd's */ diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 15207b9362bf..0a183756d6ec 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -68,13 +68,11 @@ extern void purge_tlb_entries(struct mm_struct *, unsigned long); #define KERNEL_INITIAL_ORDER 24 /* 0 to 1<<24 = 16MB */ #define KERNEL_INITIAL_SIZE (1 << KERNEL_INITIAL_ORDER) -#if defined(CONFIG_64BIT) && defined(CONFIG_PARISC_PAGE_SIZE_4KB) -#define PT_NLEVELS 3 +#if CONFIG_PGTABLE_LEVELS == 3 #define PGD_ORDER 1 /* Number of pages per pgd */ #define PMD_ORDER 1 /* Number of pages per pmd */ #define PGD_ALLOC_ORDER 2 /* first pgd contains pmd */ #else -#define PT_NLEVELS 2 #define PGD_ORDER 1 /* Number of pages per pgd */ #define PGD_ALLOC_ORDER PGD_ORDER #endif @@ -93,7 +91,7 @@ extern void purge_tlb_entries(struct mm_struct *, unsigned long); #define PMD_SHIFT (PLD_SHIFT + BITS_PER_PTE) #define PMD_SIZE (1UL << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) -#if PT_NLEVELS == 3 +#if CONFIG_PGTABLE_LEVELS == 3 #define BITS_PER_PMD (PAGE_SHIFT + PMD_ORDER - BITS_PER_PMD_ENTRY) #else #define __PAGETABLE_PMD_FOLDED @@ -277,7 +275,7 @@ extern unsigned long *empty_zero_page; #define pgd_flag(x) (pgd_val(x) & PxD_FLAG_MASK) #define pgd_address(x) ((unsigned long)(pgd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT) -#if PT_NLEVELS == 3 +#if CONFIG_PGTABLE_LEVELS == 3 /* The first entry of the permanent pmd is not there if it contains * the gateway marker */ #define pmd_none(x) (!pmd_val(x) || pmd_flag(x) == PxD_FLAG_ATTACHED) @@ -287,7 +285,7 @@ extern unsigned long *empty_zero_page; #define pmd_bad(x) (!(pmd_flag(x) & PxD_FLAG_VALID)) #define pmd_present(x) (pmd_flag(x) & PxD_FLAG_PRESENT) static inline void pmd_clear(pmd_t *pmd) { -#if PT_NLEVELS == 3 +#if CONFIG_PGTABLE_LEVELS == 3 if (pmd_flag(*pmd) & PxD_FLAG_ATTACHED) /* This is the entry pointing to the permanent pmd * attached to the pgd; cannot clear it */ @@ -299,7 +297,7 @@ static inline void pmd_clear(pmd_t *pmd) { -#if PT_NLEVELS == 3 +#if CONFIG_PGTABLE_LEVELS == 3 #define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_address(pgd))) #define pgd_page(pgd) virt_to_page((void *)pgd_page_vaddr(pgd)) @@ -309,7 +307,7 @@ static inline void pmd_clear(pmd_t *pmd) { #define pgd_bad(x) (!(pgd_flag(x) & PxD_FLAG_VALID)) #define pgd_present(x) (pgd_flag(x) & PxD_FLAG_PRESENT) static inline void pgd_clear(pgd_t *pgd) { -#if PT_NLEVELS == 3 +#if CONFIG_PGTABLE_LEVELS == 3 if(pgd_flag(*pgd) & PxD_FLAG_ATTACHED) /* This is the permanent pmd attached to the pgd; cannot * free it */ @@ -393,7 +391,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) /* Find an entry in the second-level page table.. */ -#if PT_NLEVELS == 3 +#if CONFIG_PGTABLE_LEVELS == 3 #define pmd_offset(dir,address) \ ((pmd_t *) pgd_page_vaddr(*(dir)) + (((address)>>PMD_SHIFT) & (PTRS_PER_PMD-1))) #else diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 2ab16bb160a8..75819617f93b 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -398,7 +398,7 @@ * can address up to 1TB */ .macro L2_ptep pmd,pte,index,va,fault -#if PT_NLEVELS == 3 +#if CONFIG_PGTABLE_LEVELS == 3 extru \va,31-ASM_PMD_SHIFT,ASM_BITS_PER_PMD,\index #else # if defined(CONFIG_64BIT) @@ -436,7 +436,7 @@ * all ILP32 processes and all the kernel for machines with * under 4GB of memory) */ .macro L3_ptep pgd,pte,index,va,fault -#if PT_NLEVELS == 3 /* we might have a 2-Level scheme, e.g. with 16kb page size */ +#if CONFIG_PGTABLE_LEVELS == 3 /* we might have a 2-Level scheme, e.g. with 16kb page size */ extrd,u \va,63-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index copy %r0,\pte extrd,u,*= \va,63-ASM_PGDIR_SHIFT,64-ASM_PGDIR_SHIFT,%r0 diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S index d4dc588c0dc1..e7d64527aff9 100644 --- a/arch/parisc/kernel/head.S +++ b/arch/parisc/kernel/head.S @@ -74,7 +74,7 @@ $bss_loop: mtctl %r4,%cr24 /* Initialize kernel root pointer */ mtctl %r4,%cr25 /* Initialize user root pointer */ -#if PT_NLEVELS == 3 +#if CONFIG_PGTABLE_LEVELS == 3 /* Set pmd in pgd */ load32 PA(pmd0),%r5 shrd %r5,PxD_VALUE_SHIFT,%r3 @@ -97,7 +97,7 @@ $bss_loop: stw %r3,0(%r4) ldo (PAGE_SIZE >> PxD_VALUE_SHIFT)(%r3),%r3 addib,> -1,%r1,1b -#if PT_NLEVELS == 3 +#if CONFIG_PGTABLE_LEVELS == 3 ldo ASM_PMD_ENTRY_SIZE(%r4),%r4 #else ldo ASM_PGD_ENTRY_SIZE(%r4),%r4 diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 15dbe81cf5f3..c229427fa546 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -34,7 +34,7 @@ extern int data_start; extern void parisc_kernel_start(void); /* Kernel entry point in head.S */ -#if PT_NLEVELS == 3 +#if CONFIG_PGTABLE_LEVELS == 3 /* NOTE: This layout exactly conforms to the hybrid L2/L3 page table layout * with the first pmd adjacent to the pgd and below it. gcc doesn't actually * guarantee that global objects will be laid out in memory in the same order -- cgit v1.2.3 From 06ef42a16f2dd8480ada1be3a549e12b02c45915 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 14 Apr 2015 15:45:57 -0700 Subject: powerpc: expose number of page table levels on Kconfig level We would want to use number of page table level to define mm_struct. Let's expose it as CONFIG_PGTABLE_LEVELS. Signed-off-by: Kirill A. Shutemov Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Tested-by: Guenter Roeck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/Kconfig | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 22b0940494bb..91ad76f30d18 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -297,6 +297,12 @@ config ZONE_DMA32 bool default y if PPC64 +config PGTABLE_LEVELS + int + default 2 if !PPC64 + default 3 if PPC_64K_PAGES + default 4 + source "init/Kconfig" source "kernel/Kconfig.freezer" -- cgit v1.2.3 From c81956c9cd587d36b87d4ab37c92016ebb79b517 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 14 Apr 2015 15:46:00 -0700 Subject: s390: expose number of page table levels We would want to use number of page table level to define mm_struct. Let's expose it as CONFIG_PGTABLE_LEVELS. Core mm expects __PAGETABLE_{PUD,PMD}_FOLDED to be defined if these page table levels folded. Signed-off-by: Kirill A. Shutemov Cc: Martin Schwidefsky Cc: Heiko Carstens Tested-by: Guenter Roeck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/Kconfig | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 373cd5badf1c..f6aebcb7a0f8 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -155,6 +155,11 @@ config S390 config SCHED_OMIT_FRAME_POINTER def_bool y +config PGTABLE_LEVELS + int + default 4 if 64BIT + default 2 + source "init/Kconfig" source "kernel/Kconfig.freezer" -- cgit v1.2.3 From 69543d639938fc71b3df69d32ecccc58beb7e578 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 14 Apr 2015 15:46:02 -0700 Subject: sh: expose number of page table levels We would want to use number of page table level to define mm_struct. Let's expose it as CONFIG_PGTABLE_LEVELS. Signed-off-by: Kirill A. Shutemov Cc: linux-sh@vger.kernel.org Tested-by: Guenter Roeck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index eb4ef274ae9b..50057fed819d 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -162,6 +162,10 @@ config NEED_DMA_MAP_STATE config NEED_SG_DMA_LENGTH def_bool y +config PGTABLE_LEVELS + default 3 if X2TLB + default 2 + source "init/Kconfig" source "kernel/Kconfig.freezer" -- cgit v1.2.3 From 81a2936c1a98dd007c510dc523c5bbdce263878b Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 14 Apr 2015 15:46:05 -0700 Subject: sparc: expose number of page table levels We would want to use number of page table level to define mm_struct. Let's expose it as CONFIG_PGTABLE_LEVELS. Signed-off-by: Kirill A. Shutemov Cc: "David S. Miller" Tested-by: Guenter Roeck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sparc/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index efb00ec75805..e49502acbab4 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -146,6 +146,10 @@ config GENERIC_ISA_DMA config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y if SPARC64 +config PGTABLE_LEVELS + default 4 if 64BIT + default 3 + source "init/Kconfig" source "kernel/Kconfig.freezer" -- cgit v1.2.3 From 909d45e62801ea77422d478519baa7d2287c77f7 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 14 Apr 2015 15:46:08 -0700 Subject: tile: expose number of page table levels We would want to use number of page table level to define mm_struct. Let's expose it as CONFIG_PGTABLE_LEVELS. Signed-off-by: Kirill A. Shutemov Acked-by: Chris Metcalf Tested-by: Guenter Roeck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/tile/Kconfig | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 7cca41842a9e..0142d578b5a8 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -147,6 +147,11 @@ config ARCH_DEFCONFIG default "arch/tile/configs/tilepro_defconfig" if !TILEGX default "arch/tile/configs/tilegx_defconfig" if TILEGX +config PGTABLE_LEVELS + int + default 3 if 64BIT + default 2 + source "init/Kconfig" source "kernel/Kconfig.freezer" -- cgit v1.2.3 From 6b8ce2a1a464526335672c33cbf3cb9fc638efff Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 14 Apr 2015 15:46:11 -0700 Subject: um: expose number of page table levels We would want to use number of page table level to define mm_struct. Let's expose it as CONFIG_PGTABLE_LEVELS. Signed-off-by: Kirill A. Shutemov Acked-by: Richard Weinberger Cc: Jeff Dike Tested-by: Guenter Roeck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/Kconfig.um | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/um/Kconfig.um b/arch/um/Kconfig.um index a7520c90f62d..5dbfe3d9107c 100644 --- a/arch/um/Kconfig.um +++ b/arch/um/Kconfig.um @@ -155,3 +155,8 @@ config MMAPPER config NO_DMA def_bool y + +config PGTABLE_LEVELS + int + default 3 if 3_LEVEL_PGTABLES + default 2 -- cgit v1.2.3 From 982333683385343d8d2db9a1df69c98406f42687 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 14 Apr 2015 15:46:14 -0700 Subject: x86: expose number of page table levels on Kconfig level We would want to use number of page table level to define mm_struct. Let's expose it as CONFIG_PGTABLE_LEVELS. Signed-off-by: Kirill A. Shutemov Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Tested-by: Guenter Roeck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 6 ++++++ arch/x86/include/asm/paravirt.h | 8 ++++---- arch/x86/include/asm/paravirt_types.h | 8 ++++---- arch/x86/include/asm/pgalloc.h | 8 ++++---- arch/x86/include/asm/pgtable-2level_types.h | 1 - arch/x86/include/asm/pgtable-3level_types.h | 2 -- arch/x86/include/asm/pgtable.h | 8 ++++---- arch/x86/include/asm/pgtable_64_types.h | 1 - arch/x86/include/asm/pgtable_types.h | 4 ++-- arch/x86/kernel/paravirt.c | 6 +++--- arch/x86/mm/pgtable.c | 14 +++++++------- arch/x86/xen/mmu.c | 14 +++++++------- include/trace/events/xen.h | 2 +- 13 files changed, 42 insertions(+), 40 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index faff6934c05a..3e3aaad23414 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -277,6 +277,12 @@ config ARCH_SUPPORTS_UPROBES config FIX_EARLYCON_MEM def_bool y +config PGTABLE_LEVELS + int + default 4 if X86_64 + default 3 if X86_PAE + default 2 + source "init/Kconfig" source "kernel/Kconfig.freezer" diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 5f6051d5d139..8957810ad7d1 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -545,7 +545,7 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, val); } -#if PAGETABLE_LEVELS >= 3 +#if CONFIG_PGTABLE_LEVELS >= 3 static inline pmd_t __pmd(pmdval_t val) { pmdval_t ret; @@ -585,7 +585,7 @@ static inline void set_pud(pud_t *pudp, pud_t pud) PVOP_VCALL2(pv_mmu_ops.set_pud, pudp, val); } -#if PAGETABLE_LEVELS == 4 +#if CONFIG_PGTABLE_LEVELS == 4 static inline pud_t __pud(pudval_t val) { pudval_t ret; @@ -636,9 +636,9 @@ static inline void pud_clear(pud_t *pudp) set_pud(pudp, __pud(0)); } -#endif /* PAGETABLE_LEVELS == 4 */ +#endif /* CONFIG_PGTABLE_LEVELS == 4 */ -#endif /* PAGETABLE_LEVELS >= 3 */ +#endif /* CONFIG_PGTABLE_LEVELS >= 3 */ #ifdef CONFIG_X86_PAE /* Special-case pte-setting operations for PAE, which can't update a diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 7549b8b369e4..f7b0b5c112f2 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -294,7 +294,7 @@ struct pv_mmu_ops { struct paravirt_callee_save pgd_val; struct paravirt_callee_save make_pgd; -#if PAGETABLE_LEVELS >= 3 +#if CONFIG_PGTABLE_LEVELS >= 3 #ifdef CONFIG_X86_PAE void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); void (*pte_clear)(struct mm_struct *mm, unsigned long addr, @@ -308,13 +308,13 @@ struct pv_mmu_ops { struct paravirt_callee_save pmd_val; struct paravirt_callee_save make_pmd; -#if PAGETABLE_LEVELS == 4 +#if CONFIG_PGTABLE_LEVELS == 4 struct paravirt_callee_save pud_val; struct paravirt_callee_save make_pud; void (*set_pgd)(pgd_t *pudp, pgd_t pgdval); -#endif /* PAGETABLE_LEVELS == 4 */ -#endif /* PAGETABLE_LEVELS >= 3 */ +#endif /* CONFIG_PGTABLE_LEVELS == 4 */ +#endif /* CONFIG_PGTABLE_LEVELS >= 3 */ struct pv_lazy_ops lazy_mode; diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index c4412e972bbd..bf7f8b55b0f9 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -77,7 +77,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, #define pmd_pgtable(pmd) pmd_page(pmd) -#if PAGETABLE_LEVELS > 2 +#if CONFIG_PGTABLE_LEVELS > 2 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { struct page *page; @@ -116,7 +116,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) } #endif /* CONFIG_X86_PAE */ -#if PAGETABLE_LEVELS > 3 +#if CONFIG_PGTABLE_LEVELS > 3 static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) { paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT); @@ -142,7 +142,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, ___pud_free_tlb(tlb, pud); } -#endif /* PAGETABLE_LEVELS > 3 */ -#endif /* PAGETABLE_LEVELS > 2 */ +#endif /* CONFIG_PGTABLE_LEVELS > 3 */ +#endif /* CONFIG_PGTABLE_LEVELS > 2 */ #endif /* _ASM_X86_PGALLOC_H */ diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h index daacc23e3fb9..392576433e77 100644 --- a/arch/x86/include/asm/pgtable-2level_types.h +++ b/arch/x86/include/asm/pgtable-2level_types.h @@ -17,7 +17,6 @@ typedef union { #endif /* !__ASSEMBLY__ */ #define SHARED_KERNEL_PMD 0 -#define PAGETABLE_LEVELS 2 /* * traditional i386 two-level paging structure: diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h index 1bd5876c8649..bcc89625ebe5 100644 --- a/arch/x86/include/asm/pgtable-3level_types.h +++ b/arch/x86/include/asm/pgtable-3level_types.h @@ -24,8 +24,6 @@ typedef union { #define SHARED_KERNEL_PMD 1 #endif -#define PAGETABLE_LEVELS 3 - /* * PGDIR_SHIFT determines what a top-level page table entry can map */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index a0c35bf6cb92..fe57e7a98839 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -551,7 +551,7 @@ static inline unsigned long pages_to_mb(unsigned long npg) return npg >> (20 - PAGE_SHIFT); } -#if PAGETABLE_LEVELS > 2 +#if CONFIG_PGTABLE_LEVELS > 2 static inline int pud_none(pud_t pud) { return native_pud_val(pud) == 0; @@ -594,9 +594,9 @@ static inline int pud_large(pud_t pud) { return 0; } -#endif /* PAGETABLE_LEVELS > 2 */ +#endif /* CONFIG_PGTABLE_LEVELS > 2 */ -#if PAGETABLE_LEVELS > 3 +#if CONFIG_PGTABLE_LEVELS > 3 static inline int pgd_present(pgd_t pgd) { return pgd_flags(pgd) & _PAGE_PRESENT; @@ -633,7 +633,7 @@ static inline int pgd_none(pgd_t pgd) { return !native_pgd_val(pgd); } -#endif /* PAGETABLE_LEVELS > 3 */ +#endif /* CONFIG_PGTABLE_LEVELS > 3 */ #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 602b6028c5b6..e6844dfb4471 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -20,7 +20,6 @@ typedef struct { pteval_t pte; } pte_t; #endif /* !__ASSEMBLY__ */ #define SHARED_KERNEL_PMD 0 -#define PAGETABLE_LEVELS 4 /* * PGDIR_SHIFT determines what a top-level page table entry can map diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 8c7c10802e9c..78f0c8cbe316 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -234,7 +234,7 @@ static inline pgdval_t pgd_flags(pgd_t pgd) return native_pgd_val(pgd) & PTE_FLAGS_MASK; } -#if PAGETABLE_LEVELS > 3 +#if CONFIG_PGTABLE_LEVELS > 3 typedef struct { pudval_t pud; } pud_t; static inline pud_t native_make_pud(pmdval_t val) @@ -255,7 +255,7 @@ static inline pudval_t native_pud_val(pud_t pud) } #endif -#if PAGETABLE_LEVELS > 2 +#if CONFIG_PGTABLE_LEVELS > 2 typedef struct { pmdval_t pmd; } pmd_t; static inline pmd_t native_make_pmd(pmdval_t val) diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 548d25f00c90..c614dd492f5f 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -443,7 +443,7 @@ struct pv_mmu_ops pv_mmu_ops = { .ptep_modify_prot_start = __ptep_modify_prot_start, .ptep_modify_prot_commit = __ptep_modify_prot_commit, -#if PAGETABLE_LEVELS >= 3 +#if CONFIG_PGTABLE_LEVELS >= 3 #ifdef CONFIG_X86_PAE .set_pte_atomic = native_set_pte_atomic, .pte_clear = native_pte_clear, @@ -454,13 +454,13 @@ struct pv_mmu_ops pv_mmu_ops = { .pmd_val = PTE_IDENT, .make_pmd = PTE_IDENT, -#if PAGETABLE_LEVELS == 4 +#if CONFIG_PGTABLE_LEVELS == 4 .pud_val = PTE_IDENT, .make_pud = PTE_IDENT, .set_pgd = native_set_pgd, #endif -#endif /* PAGETABLE_LEVELS >= 3 */ +#endif /* CONFIG_PGTABLE_LEVELS >= 3 */ .pte_val = PTE_IDENT, .pgd_val = PTE_IDENT, diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 5a7e5252c878..b28edfecbdfe 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -58,7 +58,7 @@ void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) tlb_remove_page(tlb, pte); } -#if PAGETABLE_LEVELS > 2 +#if CONFIG_PGTABLE_LEVELS > 2 void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) { struct page *page = virt_to_page(pmd); @@ -74,14 +74,14 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) tlb_remove_page(tlb, page); } -#if PAGETABLE_LEVELS > 3 +#if CONFIG_PGTABLE_LEVELS > 3 void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) { paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); tlb_remove_page(tlb, virt_to_page(pud)); } -#endif /* PAGETABLE_LEVELS > 3 */ -#endif /* PAGETABLE_LEVELS > 2 */ +#endif /* CONFIG_PGTABLE_LEVELS > 3 */ +#endif /* CONFIG_PGTABLE_LEVELS > 2 */ static inline void pgd_list_add(pgd_t *pgd) { @@ -117,9 +117,9 @@ static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) /* If the pgd points to a shared pagetable level (either the ptes in non-PAE, or shared PMD in PAE), then just copy the references from swapper_pg_dir. */ - if (PAGETABLE_LEVELS == 2 || - (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD) || - PAGETABLE_LEVELS == 4) { + if (CONFIG_PGTABLE_LEVELS == 2 || + (CONFIG_PGTABLE_LEVELS == 3 && SHARED_KERNEL_PMD) || + CONFIG_PGTABLE_LEVELS == 4) { clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY, swapper_pg_dir + KERNEL_PGD_BOUNDARY, KERNEL_PGD_PTRS); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index adca9e2b6553..65083ad63b6f 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -502,7 +502,7 @@ __visible pmd_t xen_make_pmd(pmdval_t pmd) } PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); -#if PAGETABLE_LEVELS == 4 +#if CONFIG_PGTABLE_LEVELS == 4 __visible pudval_t xen_pud_val(pud_t pud) { return pte_mfn_to_pfn(pud.pud); @@ -589,7 +589,7 @@ static void xen_set_pgd(pgd_t *ptr, pgd_t val) xen_mc_issue(PARAVIRT_LAZY_MMU); } -#endif /* PAGETABLE_LEVELS == 4 */ +#endif /* CONFIG_PGTABLE_LEVELS == 4 */ /* * (Yet another) pagetable walker. This one is intended for pinning a @@ -1628,7 +1628,7 @@ static void xen_release_pmd(unsigned long pfn) xen_release_ptpage(pfn, PT_PMD); } -#if PAGETABLE_LEVELS == 4 +#if CONFIG_PGTABLE_LEVELS == 4 static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) { xen_alloc_ptpage(mm, pfn, PT_PUD); @@ -2046,7 +2046,7 @@ static void __init xen_post_allocator_init(void) pv_mmu_ops.set_pte = xen_set_pte; pv_mmu_ops.set_pmd = xen_set_pmd; pv_mmu_ops.set_pud = xen_set_pud; -#if PAGETABLE_LEVELS == 4 +#if CONFIG_PGTABLE_LEVELS == 4 pv_mmu_ops.set_pgd = xen_set_pgd; #endif @@ -2056,7 +2056,7 @@ static void __init xen_post_allocator_init(void) pv_mmu_ops.alloc_pmd = xen_alloc_pmd; pv_mmu_ops.release_pte = xen_release_pte; pv_mmu_ops.release_pmd = xen_release_pmd; -#if PAGETABLE_LEVELS == 4 +#if CONFIG_PGTABLE_LEVELS == 4 pv_mmu_ops.alloc_pud = xen_alloc_pud; pv_mmu_ops.release_pud = xen_release_pud; #endif @@ -2122,14 +2122,14 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { .make_pmd = PV_CALLEE_SAVE(xen_make_pmd), .pmd_val = PV_CALLEE_SAVE(xen_pmd_val), -#if PAGETABLE_LEVELS == 4 +#if CONFIG_PGTABLE_LEVELS == 4 .pud_val = PV_CALLEE_SAVE(xen_pud_val), .make_pud = PV_CALLEE_SAVE(xen_make_pud), .set_pgd = xen_set_pgd_hyper, .alloc_pud = xen_alloc_pmd_init, .release_pud = xen_release_pmd_init, -#endif /* PAGETABLE_LEVELS == 4 */ +#endif /* CONFIG_PGTABLE_LEVELS == 4 */ .activate_mm = xen_activate_mm, .dup_mmap = xen_dup_mmap, diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h index d06b6da5c1e3..bce990f5a35d 100644 --- a/include/trace/events/xen.h +++ b/include/trace/events/xen.h @@ -224,7 +224,7 @@ TRACE_EVENT(xen_mmu_pmd_clear, TP_printk("pmdp %p", __entry->pmdp) ); -#if PAGETABLE_LEVELS >= 4 +#if CONFIG_PGTABLE_LEVELS >= 4 TRACE_EVENT(xen_mmu_set_pud, TP_PROTO(pud_t *pudp, pud_t pudval), -- cgit v1.2.3 From 235a8f0286d3de5754322e9b4abd472ed4d57209 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 14 Apr 2015 15:46:17 -0700 Subject: mm: define default PGTABLE_LEVELS to two By this time all architectures which support more than two page table levels should be covered. This patch add default definiton of PGTABLE_LEVELS equal 2. We also add assert to detect inconsistence between CONFIG_PGTABLE_LEVELS and __PAGETABLE_PMD_FOLDED/__PAGETABLE_PUD_FOLDED. Signed-off-by: Kirill A. Shutemov Tested-by: Guenter Roeck Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: "David S. Miller" Cc: "H. Peter Anvin" Cc: "James E.J. Bottomley" Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Chris Metcalf Cc: David Howells Cc: Fenghua Yu Cc: Geert Uytterhoeven Cc: Heiko Carstens Cc: Helge Deller Cc: Ingo Molnar Cc: Jeff Dike Cc: Kirill A. Shutemov Cc: Koichi Yasutake Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Paul Mackerras Cc: Ralf Baechle Cc: Richard Weinberger Cc: Russell King Cc: Thomas Gleixner Cc: Tony Luck Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/Kconfig | 4 ++++ include/asm-generic/pgtable.h | 5 +++++ 2 files changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index 05d7a8a458d5..a9c95d36ba70 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -484,6 +484,10 @@ config HAVE_IRQ_EXIT_ON_IRQ_STACK This spares a stack switch and improves cache usage on softirq processing. +config PGTABLE_LEVELS + int + default 2 + # # ABI hall of shame # diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 4d46085c1b90..1f9f5da6828f 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -7,6 +7,11 @@ #include #include +#if 4 - defined(__PAGETABLE_PUD_FOLDED) - defined(__PAGETABLE_PMD_FOLDED) != \ + CONFIG_PGTABLE_LEVELS +#error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{PUD,PMD}_FOLDED +#endif + /* * On almost all architectures and configurations, 0 can be used as the * upper ceiling to free_pgtables(): on many architectures it has the same -- cgit v1.2.3 From f91e8d6d76866eafba255864ca617438cec268de Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 14 Apr 2015 15:47:11 -0700 Subject: sparc: clarify __GFP_NOFAIL allocation Commit 920c3ed74134 ("[SPARC64]: Add basic infrastructure for MD add/remove notification") has added __GFP_NOFAIL for the allocation request but it hasn't mentioned why is this strict requirement really needed. The code was handling an allocation failure and propagated it properly up the callchain so it is not clear why it is needed. Dave has clarified the intention when I tried to remove the flag as not being necessary: : It is a serious failure. : : If we miss an MDESC update due to this allocation failure, the update : is not an event which gets retransmitted so we will lose the updated : machine description forever. : : We really need this allocation to succeed. So add a comment to clarify the nofail flag and get rid of the failure check because __GFP_NOFAIL allocation doesn't fail. Signed-off-by: Michal Hocko Cc: David Rientjes Cc: Johannes Weiner Cc: Dave Chinner Cc: "Theodore Ts'o" Cc: Mel Gorman Cc: Tetsuo Handa Cc: "David S. Miller" Cc: Vipul Pandya Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sparc/kernel/mdesc.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index 99632a87e697..26c80e18d7b1 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -130,26 +130,26 @@ static struct mdesc_mem_ops memblock_mdesc_ops = { static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size) { unsigned int handle_size; + struct mdesc_handle *hp; + unsigned long addr; void *base; handle_size = (sizeof(struct mdesc_handle) - sizeof(struct mdesc_hdr) + mdesc_size); + /* + * Allocation has to succeed because mdesc update would be missed + * and such events are not retransmitted. + */ base = kmalloc(handle_size + 15, GFP_KERNEL | __GFP_NOFAIL); - if (base) { - struct mdesc_handle *hp; - unsigned long addr; - - addr = (unsigned long)base; - addr = (addr + 15UL) & ~15UL; - hp = (struct mdesc_handle *) addr; + addr = (unsigned long)base; + addr = (addr + 15UL) & ~15UL; + hp = (struct mdesc_handle *) addr; - mdesc_handle_init(hp, handle_size, base); - return hp; - } + mdesc_handle_init(hp, handle_size, base); - return NULL; + return hp; } static void mdesc_kfree(struct mdesc_handle *hp) -- cgit v1.2.3 From 0ddab1d2ed664c85c95488eef569786a84aedf37 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 14 Apr 2015 15:47:20 -0700 Subject: lib/ioremap.c: add huge I/O map capability interfaces Add ioremap_pud_enabled() and ioremap_pmd_enabled(), which return 1 when I/O mappings with pud/pmd are enabled on the kernel. ioremap_huge_init() calls arch_ioremap_pud_supported() and arch_ioremap_pmd_supported() to initialize the capabilities at boot-time. A new kernel option "nohugeiomap" is also added, so that user can disable the huge I/O map capabilities when necessary. Signed-off-by: Toshi Kani Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Arnd Bergmann Cc: Dave Hansen Cc: Robert Elliott Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 2 ++ arch/Kconfig | 3 +++ include/linux/io.h | 8 ++++++++ init/main.c | 2 ++ lib/ioremap.c | 37 +++++++++++++++++++++++++++++++++++++ 5 files changed, 52 insertions(+) (limited to 'arch') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 71eecb263250..b1fa70907ccf 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2323,6 +2323,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. register save and restore. The kernel will only save legacy floating-point registers on task switch. + nohugeiomap [KNL,x86] Disable kernel huge I/O mappings. + noxsave [BUGS=X86] Disables x86 extended register state save and restore using xsave. The kernel will fallback to enabling legacy floating-point and sse state. diff --git a/arch/Kconfig b/arch/Kconfig index a9c95d36ba70..c88c23f0a1da 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -446,6 +446,9 @@ config HAVE_IRQ_TIME_ACCOUNTING config HAVE_ARCH_TRANSPARENT_HUGEPAGE bool +config HAVE_ARCH_HUGE_VMAP + bool + config HAVE_ARCH_SOFT_DIRTY bool diff --git a/include/linux/io.h b/include/linux/io.h index fa02e55e5a2e..4cc299c598e0 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -38,6 +38,14 @@ static inline int ioremap_page_range(unsigned long addr, unsigned long end, } #endif +#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP +void __init ioremap_huge_init(void); +int arch_ioremap_pud_supported(void); +int arch_ioremap_pmd_supported(void); +#else +static inline void ioremap_huge_init(void) { } +#endif + /* * Managed iomap interface */ diff --git a/init/main.c b/init/main.c index 4a6974e67839..f6dd8fe1f22c 100644 --- a/init/main.c +++ b/init/main.c @@ -80,6 +80,7 @@ #include #include #include +#include #include #include @@ -484,6 +485,7 @@ static void __init mm_init(void) percpu_init_late(); pgtable_init(); vmalloc_init(); + ioremap_huge_init(); } asmlinkage __visible void __init start_kernel(void) diff --git a/lib/ioremap.c b/lib/ioremap.c index 0c9216c48762..2008652c9a1f 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -13,6 +13,43 @@ #include #include +#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP +int __read_mostly ioremap_pud_capable; +int __read_mostly ioremap_pmd_capable; +int __read_mostly ioremap_huge_disabled; + +static int __init set_nohugeiomap(char *str) +{ + ioremap_huge_disabled = 1; + return 0; +} +early_param("nohugeiomap", set_nohugeiomap); + +void __init ioremap_huge_init(void) +{ + if (!ioremap_huge_disabled) { + if (arch_ioremap_pud_supported()) + ioremap_pud_capable = 1; + if (arch_ioremap_pmd_supported()) + ioremap_pmd_capable = 1; + } +} + +static inline int ioremap_pud_enabled(void) +{ + return ioremap_pud_capable; +} + +static inline int ioremap_pmd_enabled(void) +{ + return ioremap_pmd_capable; +} + +#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ +static inline int ioremap_pud_enabled(void) { return 0; } +static inline int ioremap_pmd_enabled(void) { return 0; } +#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ + static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { -- cgit v1.2.3 From 5d72b4fba40ef4b3f7a1a11d6aacc85d9af81561 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 14 Apr 2015 15:47:29 -0700 Subject: x86, mm: support huge I/O mapping capability I/F Implement huge I/O mapping capability interfaces for ioremap() on x86. IOREMAP_MAX_ORDER is defined to PUD_SHIFT on x86/64 and PMD_SHIFT on x86/32, which overrides the default value defined in . Signed-off-by: Toshi Kani Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Arnd Bergmann Cc: Dave Hansen Cc: Robert Elliott Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/page_types.h | 2 ++ arch/x86/mm/ioremap.c | 23 +++++++++++++++++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index f97fbe3abb67..c7c712f2648b 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -40,8 +40,10 @@ #ifdef CONFIG_X86_64 #include +#define IOREMAP_MAX_ORDER (PUD_SHIFT) #else #include +#define IOREMAP_MAX_ORDER (PMD_SHIFT) #endif /* CONFIG_X86_64 */ #ifndef __ASSEMBLY__ diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index fdf617c00e2f..5ead4d6cf3a7 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -67,8 +67,13 @@ static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages, /* * Remap an arbitrary physical address space into the kernel virtual - * address space. Needed when the kernel wants to access high addresses - * directly. + * address space. It transparently creates kernel huge I/O mapping when + * the physical address is aligned by a huge page size (1GB or 2MB) and + * the requested size is at least the huge page size. + * + * NOTE: MTRRs can override PAT memory types with a 4KB granularity. + * Therefore, the mapping code falls back to use a smaller page toward 4KB + * when a mapping range is covered by non-WB type of MTRRs. * * NOTE! We need to allow non-page-aligned mappings too: we will obviously * have to convert them into an offset in a page-aligned mapping, but the @@ -326,6 +331,20 @@ void iounmap(volatile void __iomem *addr) } EXPORT_SYMBOL(iounmap); +int arch_ioremap_pud_supported(void) +{ +#ifdef CONFIG_X86_64 + return cpu_has_gbpages; +#else + return 0; +#endif +} + +int arch_ioremap_pmd_supported(void) +{ + return cpu_has_pse; +} + /* * Convert a physical pointer to a virtual kernel pointer for /dev/mem * access -- cgit v1.2.3 From 6b6378355b925050eb6fa966742d8c2d65ff0d83 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 14 Apr 2015 15:47:32 -0700 Subject: x86, mm: support huge KVA mappings on x86 Implement huge KVA mapping interfaces on x86. On x86, MTRRs can override PAT memory types with a 4KB granularity. When using a huge page, MTRRs can override the memory type of the huge page, which may lead a performance penalty. The processor can also behave in an undefined manner if a huge page is mapped to a memory range that MTRRs have mapped with multiple different memory types. Therefore, the mapping code falls back to use a smaller page size toward 4KB when a mapping range is covered by non-WB type of MTRRs. The WB type of MTRRs has no affect on the PAT memory types. pud_set_huge() and pmd_set_huge() call mtrr_type_lookup() to see if a given range is covered by MTRRs. MTRR_TYPE_WRBACK indicates that the range is either covered by WB or not covered and the MTRR default value is set to WB. 0xFF indicates that MTRRs are disabled. HAVE_ARCH_HUGE_VMAP is selected when X86_64 or X86_32 with X86_PAE is set. X86_32 without X86_PAE is not supported since such config can unlikey be benefited from this feature, and there was an issue found in testing. [fengguang.wu@intel.com: ioremap_pud_capable can be static] Signed-off-by: Toshi Kani Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Arnd Bergmann Cc: Dave Hansen Cc: Robert Elliott Signed-off-by: Fengguang Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 1 + arch/x86/mm/pgtable.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++ lib/ioremap.c | 6 ++--- 3 files changed, 69 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3e3aaad23414..0f948cefaeb1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -99,6 +99,7 @@ config X86 select IRQ_FORCED_THREADING select HAVE_BPF_JIT if X86_64 select HAVE_ARCH_TRANSPARENT_HUGEPAGE + select HAVE_ARCH_HUGE_VMAP if X86_64 || (X86_32 && X86_PAE) select ARCH_HAS_SG_CHAIN select CLKEVT_I8253 select ARCH_HAVE_NMI_SAFE_CMPXCHG diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index b28edfecbdfe..0b97d2c75df3 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -4,6 +4,7 @@ #include #include #include +#include #define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO @@ -560,3 +561,67 @@ void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, { __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags)); } + +#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP +int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) +{ + u8 mtrr; + + /* + * Do not use a huge page when the range is covered by non-WB type + * of MTRRs. + */ + mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE); + if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != 0xFF)) + return 0; + + prot = pgprot_4k_2_large(prot); + + set_pte((pte_t *)pud, pfn_pte( + (u64)addr >> PAGE_SHIFT, + __pgprot(pgprot_val(prot) | _PAGE_PSE))); + + return 1; +} + +int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) +{ + u8 mtrr; + + /* + * Do not use a huge page when the range is covered by non-WB type + * of MTRRs. + */ + mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE); + if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != 0xFF)) + return 0; + + prot = pgprot_4k_2_large(prot); + + set_pte((pte_t *)pmd, pfn_pte( + (u64)addr >> PAGE_SHIFT, + __pgprot(pgprot_val(prot) | _PAGE_PSE))); + + return 1; +} + +int pud_clear_huge(pud_t *pud) +{ + if (pud_large(*pud)) { + pud_clear(pud); + return 1; + } + + return 0; +} + +int pmd_clear_huge(pmd_t *pmd) +{ + if (pmd_large(*pmd)) { + pmd_clear(pmd); + return 1; + } + + return 0; +} +#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ diff --git a/lib/ioremap.c b/lib/ioremap.c index be249062ba6e..86c8911b0e3a 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -14,9 +14,9 @@ #include #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP -int __read_mostly ioremap_pud_capable; -int __read_mostly ioremap_pmd_capable; -int __read_mostly ioremap_huge_disabled; +static int __read_mostly ioremap_pud_capable; +static int __read_mostly ioremap_pmd_capable; +static int __read_mostly ioremap_huge_disabled; static int __init set_nohugeiomap(char *str) { -- cgit v1.2.3 From fbbc400f3924ce095b466c776dc294727ec0a202 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 14 Apr 2015 15:47:41 -0700 Subject: arm: factor out mmap ASLR into mmap_rnd To address the "offset2lib" ASLR weakness[1], this separates ET_DYN ASLR from mmap ASLR, as already done on s390. The architectures that are already randomizing mmap (arm, arm64, mips, powerpc, s390, and x86), have their various forms of arch_mmap_rnd() made available via the new CONFIG_ARCH_HAS_ELF_RANDOMIZE. For these architectures, arch_randomize_brk() is collapsed as well. This is an alternative to the solutions in: https://lkml.org/lkml/2015/2/23/442 I've been able to test x86 and arm, and the buildbot (so far) seems happy with building the rest. [1] http://cybersecurity.upv.es/attacks/offset2lib/offset2lib.html This patch (of 10): In preparation for splitting out ET_DYN ASLR, this moves the ASLR calculations for mmap on ARM into a separate routine, similar to x86. This also removes the redundant check of personality (PF_RANDOMIZE is already set before calling arch_pick_mmap_layout). Signed-off-by: Kees Cook Cc: Hector Marco-Gisbert Cc: Russell King Reviewed-by: Ingo Molnar Cc: Catalin Marinas Cc: Will Deacon Cc: Ralf Baechle Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Alexander Viro Cc: Oleg Nesterov Cc: Andy Lutomirski Cc: "David A. Long" Cc: Andrey Ryabinin Cc: Arun Chandran Cc: Yann Droneaud Cc: Min-Hua Chen Cc: Paul Burton Cc: Alex Smith Cc: Markos Chandras Cc: Vineeth Vijayan Cc: Jeff Bailey Cc: Michael Holzheu Cc: Ben Hutchings Cc: Behan Webster Cc: Ismael Ripoll Cc: Jan-Simon Mller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mm/mmap.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 5e85ed371364..15a8160096b3 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -169,14 +169,22 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, return addr; } +static unsigned long mmap_rnd(void) +{ + unsigned long rnd; + + /* 8 bits of randomness in 20 address space bits */ + rnd = (unsigned long)get_random_int() % (1 << 8); + + return rnd << PAGE_SHIFT; +} + void arch_pick_mmap_layout(struct mm_struct *mm) { unsigned long random_factor = 0UL; - /* 8 bits of randomness in 20 address space bits */ - if ((current->flags & PF_RANDOMIZE) && - !(current->personality & ADDR_NO_RANDOMIZE)) - random_factor = (get_random_int() % (1 << 8)) << PAGE_SHIFT; + if (current->flags & PF_RANDOMIZE) + random_factor = mmap_rnd(); if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; -- cgit v1.2.3 From 82168140bc4cec7ec9bad39705518541149ff8b7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 14 Apr 2015 15:47:45 -0700 Subject: x86: standardize mmap_rnd() usage In preparation for splitting out ET_DYN ASLR, this refactors the use of mmap_rnd() to be used similarly to arm, and extracts the checking of PF_RANDOMIZE. Signed-off-by: Kees Cook Reviewed-by: Ingo Molnar Cc: Oleg Nesterov Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/mmap.c | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index df4552bd239e..ebfa52030d5c 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -67,22 +67,21 @@ static int mmap_is_legacy(void) static unsigned long mmap_rnd(void) { - unsigned long rnd = 0; + unsigned long rnd; /* - * 8 bits of randomness in 32bit mmaps, 20 address space bits - * 28 bits of randomness in 64bit mmaps, 40 address space bits - */ - if (current->flags & PF_RANDOMIZE) { - if (mmap_is_ia32()) - rnd = get_random_int() % (1<<8); - else - rnd = get_random_int() % (1<<28); - } + * 8 bits of randomness in 32bit mmaps, 20 address space bits + * 28 bits of randomness in 64bit mmaps, 40 address space bits + */ + if (mmap_is_ia32()) + rnd = (unsigned long)get_random_int() % (1<<8); + else + rnd = (unsigned long)get_random_int() % (1<<28); + return rnd << PAGE_SHIFT; } -static unsigned long mmap_base(void) +static unsigned long mmap_base(unsigned long rnd) { unsigned long gap = rlimit(RLIMIT_STACK); @@ -91,19 +90,19 @@ static unsigned long mmap_base(void) else if (gap > MAX_GAP) gap = MAX_GAP; - return PAGE_ALIGN(TASK_SIZE - gap - mmap_rnd()); + return PAGE_ALIGN(TASK_SIZE - gap - rnd); } /* * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64 * does, but not when emulating X86_32 */ -static unsigned long mmap_legacy_base(void) +static unsigned long mmap_legacy_base(unsigned long rnd) { if (mmap_is_ia32()) return TASK_UNMAPPED_BASE; else - return TASK_UNMAPPED_BASE + mmap_rnd(); + return TASK_UNMAPPED_BASE + rnd; } /* @@ -112,13 +111,18 @@ static unsigned long mmap_legacy_base(void) */ void arch_pick_mmap_layout(struct mm_struct *mm) { - mm->mmap_legacy_base = mmap_legacy_base(); - mm->mmap_base = mmap_base(); + unsigned long random_factor = 0UL; + + if (current->flags & PF_RANDOMIZE) + random_factor = mmap_rnd(); + + mm->mmap_legacy_base = mmap_legacy_base(random_factor); if (mmap_is_legacy()) { mm->mmap_base = mm->mmap_legacy_base; mm->get_unmapped_area = arch_get_unmapped_area; } else { + mm->mmap_base = mmap_base(random_factor); mm->get_unmapped_area = arch_get_unmapped_area_topdown; } } -- cgit v1.2.3 From dd04cff1dceab18226853b555cf07914648a235f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 14 Apr 2015 15:47:48 -0700 Subject: arm64: standardize mmap_rnd() usage In preparation for splitting out ET_DYN ASLR, this refactors the use of mmap_rnd() to be used similarly to arm and x86. This additionally enables mmap ASLR on legacy mmap layouts, which appeared to be missing on arm64, and was already supported on arm. Additionally removes a copy/pasted declaration of an unused function. Signed-off-by: Kees Cook Cc: Russell King Cc: Catalin Marinas Reviewed-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/include/asm/elf.h | 1 - arch/arm64/mm/mmap.c | 18 +++++++++++------- 2 files changed, 11 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index 1f65be393139..f724db00b235 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -125,7 +125,6 @@ typedef struct user_fpsimd_state elf_fpregset_t; * the loader. We need to make sure that it is out of the way of the program * that it will "exec", and that there is sufficient room for the brk. */ -extern unsigned long randomize_et_dyn(unsigned long base); #define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3) /* diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index 54922d1275b8..ba776c01b552 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -49,15 +49,14 @@ static int mmap_is_legacy(void) static unsigned long mmap_rnd(void) { - unsigned long rnd = 0; + unsigned long rnd; - if (current->flags & PF_RANDOMIZE) - rnd = (long)get_random_int() & STACK_RND_MASK; + rnd = (unsigned long)get_random_int() & STACK_RND_MASK; return rnd << PAGE_SHIFT; } -static unsigned long mmap_base(void) +static unsigned long mmap_base(unsigned long rnd) { unsigned long gap = rlimit(RLIMIT_STACK); @@ -66,7 +65,7 @@ static unsigned long mmap_base(void) else if (gap > MAX_GAP) gap = MAX_GAP; - return PAGE_ALIGN(STACK_TOP - gap - mmap_rnd()); + return PAGE_ALIGN(STACK_TOP - gap - rnd); } /* @@ -75,15 +74,20 @@ static unsigned long mmap_base(void) */ void arch_pick_mmap_layout(struct mm_struct *mm) { + unsigned long random_factor = 0UL; + + if (current->flags & PF_RANDOMIZE) + random_factor = mmap_rnd(); + /* * Fall back to the standard layout if the personality bit is set, or * if the expected stack growth is unlimited: */ if (mmap_is_legacy()) { - mm->mmap_base = TASK_UNMAPPED_BASE; + mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; mm->get_unmapped_area = arch_get_unmapped_area; } else { - mm->mmap_base = mmap_base(); + mm->mmap_base = mmap_base(random_factor); mm->get_unmapped_area = arch_get_unmapped_area_topdown; } } -- cgit v1.2.3 From 1f0569df0b0285e7ec2432d804a4921b06a61618 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 14 Apr 2015 15:47:51 -0700 Subject: mips: extract logic for mmap_rnd() In preparation for splitting out ET_DYN ASLR, extract the mmap ASLR selection into a separate function. Signed-off-by: Kees Cook Reviewed-by: Ingo Molnar Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/mm/mmap.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c index f1baadd56e82..9a4f1f5c1f0e 100644 --- a/arch/mips/mm/mmap.c +++ b/arch/mips/mm/mmap.c @@ -142,18 +142,26 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, addr0, len, pgoff, flags, DOWN); } +static unsigned long mmap_rnd(void) +{ + unsigned long rnd; + + rnd = (unsigned long)get_random_int(); + rnd <<= PAGE_SHIFT; + if (TASK_IS_32BIT_ADDR) + rnd &= 0xfffffful; + else + rnd &= 0xffffffful; + + return rnd; +} + void arch_pick_mmap_layout(struct mm_struct *mm) { unsigned long random_factor = 0UL; - if (current->flags & PF_RANDOMIZE) { - random_factor = get_random_int(); - random_factor = random_factor << PAGE_SHIFT; - if (TASK_IS_32BIT_ADDR) - random_factor &= 0xfffffful; - else - random_factor &= 0xffffffful; - } + if (current->flags & PF_RANDOMIZE) + random_factor = mmap_rnd(); if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; -- cgit v1.2.3 From ed6322746afb74c2509e2f3a6464182793b16eb9 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 14 Apr 2015 15:47:54 -0700 Subject: powerpc: standardize mmap_rnd() usage In preparation for splitting out ET_DYN ASLR, this refactors the use of mmap_rnd() to be used similarly to arm and x86. (Can mmap ASLR be safely enabled in the legacy mmap case here? Other archs use "mm->mmap_base = TASK_UNMAPPED_BASE + random_factor".) Signed-off-by: Kees Cook Reviewed-by: Ingo Molnar Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/mm/mmap.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c index cb8bdbe4972f..1ad2299d795d 100644 --- a/arch/powerpc/mm/mmap.c +++ b/arch/powerpc/mm/mmap.c @@ -55,19 +55,18 @@ static inline int mmap_is_legacy(void) static unsigned long mmap_rnd(void) { - unsigned long rnd = 0; + unsigned long rnd; + + /* 8MB for 32bit, 1GB for 64bit */ + if (is_32bit_task()) + rnd = (unsigned long)get_random_int() % (1<<(23-PAGE_SHIFT)); + else + rnd = (unsigned long)get_random_int() % (1<<(30-PAGE_SHIFT)); - if (current->flags & PF_RANDOMIZE) { - /* 8MB for 32bit, 1GB for 64bit */ - if (is_32bit_task()) - rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT))); - else - rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT))); - } return rnd << PAGE_SHIFT; } -static inline unsigned long mmap_base(void) +static inline unsigned long mmap_base(unsigned long rnd) { unsigned long gap = rlimit(RLIMIT_STACK); @@ -76,7 +75,7 @@ static inline unsigned long mmap_base(void) else if (gap > MAX_GAP) gap = MAX_GAP; - return PAGE_ALIGN(TASK_SIZE - gap - mmap_rnd()); + return PAGE_ALIGN(TASK_SIZE - gap - rnd); } /* @@ -85,6 +84,11 @@ static inline unsigned long mmap_base(void) */ void arch_pick_mmap_layout(struct mm_struct *mm) { + unsigned long random_factor = 0UL; + + if (current->flags & PF_RANDOMIZE) + random_factor = mmap_rnd(); + /* * Fall back to the standard layout if the personality * bit is set, or if the expected stack growth is unlimited: @@ -93,7 +97,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) mm->mmap_base = TASK_UNMAPPED_BASE; mm->get_unmapped_area = arch_get_unmapped_area; } else { - mm->mmap_base = mmap_base(); + mm->mmap_base = mmap_base(random_factor); mm->get_unmapped_area = arch_get_unmapped_area_topdown; } } -- cgit v1.2.3 From 8e89a356feb6f196824a72101861d931a97ac2d2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 14 Apr 2015 15:47:57 -0700 Subject: s390: standardize mmap_rnd() usage In preparation for splitting out ET_DYN ASLR, this refactors the use of mmap_rnd() to be used similarly to arm and x86, and extracts the checking of PF_RANDOMIZE. Signed-off-by: Kees Cook Acked-by: Martin Schwidefsky Cc: Heiko Carstens Reviewed-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/mm/mmap.c | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 179a2c20b01f..db57078075c5 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -62,20 +62,18 @@ static inline int mmap_is_legacy(void) static unsigned long mmap_rnd(void) { - if (!(current->flags & PF_RANDOMIZE)) - return 0; if (is_32bit_task()) return (get_random_int() & 0x7ff) << PAGE_SHIFT; else return (get_random_int() & mmap_rnd_mask) << PAGE_SHIFT; } -static unsigned long mmap_base_legacy(void) +static unsigned long mmap_base_legacy(unsigned long rnd) { - return TASK_UNMAPPED_BASE + mmap_rnd(); + return TASK_UNMAPPED_BASE + rnd; } -static inline unsigned long mmap_base(void) +static inline unsigned long mmap_base(unsigned long rnd) { unsigned long gap = rlimit(RLIMIT_STACK); @@ -84,7 +82,7 @@ static inline unsigned long mmap_base(void) else if (gap > MAX_GAP) gap = MAX_GAP; gap &= PAGE_MASK; - return STACK_TOP - stack_maxrandom_size() - mmap_rnd() - gap; + return STACK_TOP - stack_maxrandom_size() - rnd - gap; } unsigned long @@ -187,7 +185,11 @@ unsigned long randomize_et_dyn(void) if (!is_32bit_task()) /* Align to 4GB */ base &= ~((1UL << 32) - 1); - return base + mmap_rnd(); + + if (current->flags & PF_RANDOMIZE) + base += mmap_rnd(); + + return base; } #ifndef CONFIG_64BIT @@ -198,15 +200,20 @@ unsigned long randomize_et_dyn(void) */ void arch_pick_mmap_layout(struct mm_struct *mm) { + unsigned long random_factor = 0UL; + + if (current->flags & PF_RANDOMIZE) + random_factor = mmap_rnd(); + /* * Fall back to the standard layout if the personality * bit is set, or if the expected stack growth is unlimited: */ if (mmap_is_legacy()) { - mm->mmap_base = mmap_base_legacy(); + mm->mmap_base = mmap_base_legacy(random_factor); mm->get_unmapped_area = arch_get_unmapped_area; } else { - mm->mmap_base = mmap_base(); + mm->mmap_base = mmap_base(random_factor); mm->get_unmapped_area = arch_get_unmapped_area_topdown; } } @@ -273,15 +280,20 @@ s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr, */ void arch_pick_mmap_layout(struct mm_struct *mm) { + unsigned long random_factor = 0UL; + + if (current->flags & PF_RANDOMIZE) + random_factor = mmap_rnd(); + /* * Fall back to the standard layout if the personality * bit is set, or if the expected stack growth is unlimited: */ if (mmap_is_legacy()) { - mm->mmap_base = mmap_base_legacy(); + mm->mmap_base = mmap_base_legacy(random_factor); mm->get_unmapped_area = s390_get_unmapped_area; } else { - mm->mmap_base = mmap_base(); + mm->mmap_base = mmap_base(random_factor); mm->get_unmapped_area = s390_get_unmapped_area_topdown; } } -- cgit v1.2.3 From 2b68f6caeac271620cd2f9362aeaed360e317df0 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 14 Apr 2015 15:48:00 -0700 Subject: mm: expose arch_mmap_rnd when available When an architecture fully supports randomizing the ELF load location, a per-arch mmap_rnd() function is used to find a randomized mmap base. In preparation for randomizing the location of ET_DYN binaries separately from mmap, this renames and exports these functions as arch_mmap_rnd(). Additionally introduces CONFIG_ARCH_HAS_ELF_RANDOMIZE for describing this feature on architectures that support it (which is a superset of ARCH_BINFMT_ELF_RANDOMIZE_PIE, since s390 already supports a separated ET_DYN ASLR from mmap ASLR without the ARCH_BINFMT_ELF_RANDOMIZE_PIE logic). Signed-off-by: Kees Cook Cc: Hector Marco-Gisbert Cc: Russell King Reviewed-by: Ingo Molnar Cc: Catalin Marinas Cc: Will Deacon Cc: Ralf Baechle Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Alexander Viro Cc: Oleg Nesterov Cc: Andy Lutomirski Cc: "David A. Long" Cc: Andrey Ryabinin Cc: Arun Chandran Cc: Yann Droneaud Cc: Min-Hua Chen Cc: Paul Burton Cc: Alex Smith Cc: Markos Chandras Cc: Vineeth Vijayan Cc: Jeff Bailey Cc: Michael Holzheu Cc: Ben Hutchings Cc: Behan Webster Cc: Ismael Ripoll Cc: Jan-Simon Mller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/Kconfig | 7 +++++++ arch/arm/Kconfig | 1 + arch/arm/mm/mmap.c | 4 ++-- arch/arm64/Kconfig | 1 + arch/arm64/mm/mmap.c | 4 ++-- arch/mips/Kconfig | 1 + arch/mips/mm/mmap.c | 4 ++-- arch/powerpc/Kconfig | 1 + arch/powerpc/mm/mmap.c | 4 ++-- arch/s390/Kconfig | 1 + arch/s390/mm/mmap.c | 8 ++++---- arch/x86/Kconfig | 1 + arch/x86/mm/mmap.c | 4 ++-- include/linux/elf-randomize.h | 10 ++++++++++ 14 files changed, 37 insertions(+), 14 deletions(-) create mode 100644 include/linux/elf-randomize.h (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index c88c23f0a1da..474904a8e540 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -491,6 +491,13 @@ config PGTABLE_LEVELS int default 2 +config ARCH_HAS_ELF_RANDOMIZE + bool + help + An architecture supports choosing randomized locations for + stack, mmap, brk, and ET_DYN. Defined functions: + - arch_mmap_rnd() + # # ABI hall of shame # diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 696cf3c61e0f..f85200a63a8b 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -3,6 +3,7 @@ config ARM default y select ARCH_BINFMT_ELF_RANDOMIZE_PIE select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE + select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_HAS_GCOV_PROFILE_ALL diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 15a8160096b3..407dc786583a 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -169,7 +169,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, return addr; } -static unsigned long mmap_rnd(void) +unsigned long arch_mmap_rnd(void) { unsigned long rnd; @@ -184,7 +184,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) unsigned long random_factor = 0UL; if (current->flags & PF_RANDOMIZE) - random_factor = mmap_rnd(); + random_factor = arch_mmap_rnd(); if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 3f2fba996bc2..7c1dbeb73e8d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2,6 +2,7 @@ config ARM64 def_bool y select ARCH_BINFMT_ELF_RANDOMIZE_PIE select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE + select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_SG_CHAIN select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index ba776c01b552..ed177475dd8c 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -47,7 +47,7 @@ static int mmap_is_legacy(void) return sysctl_legacy_va_layout; } -static unsigned long mmap_rnd(void) +unsigned long arch_mmap_rnd(void) { unsigned long rnd; @@ -77,7 +77,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) unsigned long random_factor = 0UL; if (current->flags & PF_RANDOMIZE) - random_factor = mmap_rnd(); + random_factor = arch_mmap_rnd(); /* * Fall back to the standard layout if the personality bit is set, or diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index a9d112d2a135..688ce274f59d 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -24,6 +24,7 @@ config MIPS select HAVE_DEBUG_KMEMLEAK select HAVE_SYSCALL_TRACEPOINTS select ARCH_BINFMT_ELF_RANDOMIZE_PIE + select ARCH_HAS_ELF_RANDOMIZE select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES && 64BIT select RTC_LIB if !MACH_LOONGSON select GENERIC_ATOMIC64 if !64BIT diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c index 9a4f1f5c1f0e..5c81fdd032c3 100644 --- a/arch/mips/mm/mmap.c +++ b/arch/mips/mm/mmap.c @@ -142,7 +142,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, addr0, len, pgoff, flags, DOWN); } -static unsigned long mmap_rnd(void) +unsigned long arch_mmap_rnd(void) { unsigned long rnd; @@ -161,7 +161,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) unsigned long random_factor = 0UL; if (current->flags & PF_RANDOMIZE) - random_factor = mmap_rnd(); + random_factor = arch_mmap_rnd(); if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 91ad76f30d18..fc5fffbb331b 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -89,6 +89,7 @@ config PPC select ARCH_MIGHT_HAVE_PC_SERIO select BINFMT_ELF select ARCH_BINFMT_ELF_RANDOMIZE_PIE + select ARCH_HAS_ELF_RANDOMIZE select OF select OF_EARLY_FLATTREE select OF_RESERVED_MEM diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c index 1ad2299d795d..0f0502e12f6c 100644 --- a/arch/powerpc/mm/mmap.c +++ b/arch/powerpc/mm/mmap.c @@ -53,7 +53,7 @@ static inline int mmap_is_legacy(void) return sysctl_legacy_va_layout; } -static unsigned long mmap_rnd(void) +unsigned long arch_mmap_rnd(void) { unsigned long rnd; @@ -87,7 +87,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) unsigned long random_factor = 0UL; if (current->flags & PF_RANDOMIZE) - random_factor = mmap_rnd(); + random_factor = arch_mmap_rnd(); /* * Fall back to the standard layout if the personality diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index f6aebcb7a0f8..ac2b75d74cd2 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -65,6 +65,7 @@ config S390 def_bool y select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS + select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_SG_CHAIN select ARCH_HAVE_NMI_SAFE_CMPXCHG diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index db57078075c5..a94504d99c47 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -60,7 +60,7 @@ static inline int mmap_is_legacy(void) return sysctl_legacy_va_layout; } -static unsigned long mmap_rnd(void) +unsigned long arch_mmap_rnd(void) { if (is_32bit_task()) return (get_random_int() & 0x7ff) << PAGE_SHIFT; @@ -187,7 +187,7 @@ unsigned long randomize_et_dyn(void) base &= ~((1UL << 32) - 1); if (current->flags & PF_RANDOMIZE) - base += mmap_rnd(); + base += arch_mmap_rnd(); return base; } @@ -203,7 +203,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) unsigned long random_factor = 0UL; if (current->flags & PF_RANDOMIZE) - random_factor = mmap_rnd(); + random_factor = arch_mmap_rnd(); /* * Fall back to the standard layout if the personality @@ -283,7 +283,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) unsigned long random_factor = 0UL; if (current->flags & PF_RANDOMIZE) - random_factor = mmap_rnd(); + random_factor = arch_mmap_rnd(); /* * Fall back to the standard layout if the personality diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0f948cefaeb1..782ddbbc1c9a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -88,6 +88,7 @@ config X86 select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP select HAVE_USER_RETURN_NOTIFIER select ARCH_BINFMT_ELF_RANDOMIZE_PIE + select ARCH_HAS_ELF_RANDOMIZE select HAVE_ARCH_JUMP_LABEL select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select SPARSE_IRQ diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index ebfa52030d5c..9d518d693b4b 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -65,7 +65,7 @@ static int mmap_is_legacy(void) return sysctl_legacy_va_layout; } -static unsigned long mmap_rnd(void) +unsigned long arch_mmap_rnd(void) { unsigned long rnd; @@ -114,7 +114,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) unsigned long random_factor = 0UL; if (current->flags & PF_RANDOMIZE) - random_factor = mmap_rnd(); + random_factor = arch_mmap_rnd(); mm->mmap_legacy_base = mmap_legacy_base(random_factor); diff --git a/include/linux/elf-randomize.h b/include/linux/elf-randomize.h new file mode 100644 index 000000000000..7a4eda02d2b1 --- /dev/null +++ b/include/linux/elf-randomize.h @@ -0,0 +1,10 @@ +#ifndef _ELF_RANDOMIZE_H +#define _ELF_RANDOMIZE_H + +#ifndef CONFIG_ARCH_HAS_ELF_RANDOMIZE +static inline unsigned long arch_mmap_rnd(void) { return 0; } +#else +extern unsigned long arch_mmap_rnd(void); +#endif + +#endif -- cgit v1.2.3 From c6f5b001e65cdac592b65a08c5d2dd179cfba568 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 14 Apr 2015 15:48:04 -0700 Subject: s390: redefine randomize_et_dyn for ELF_ET_DYN_BASE In preparation for moving ET_DYN randomization into the ELF loader (which requires a static ELF_ET_DYN_BASE), this redefines s390's existing ET_DYN randomization in a call to arch_mmap_rnd(). This refactoring results in the same ET_DYN randomization on s390. Signed-off-by: Kees Cook Acked-by: Martin Schwidefsky Cc: Heiko Carstens Reviewed-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/include/asm/elf.h | 8 +++++--- arch/s390/mm/mmap.c | 11 ++--------- 2 files changed, 7 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index c9c875d9ed31..f8db4781a4c2 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -161,10 +161,12 @@ extern unsigned int vdso_enabled; /* This is the location that an ET_DYN program is loaded if exec'ed. Typical use of this is to invoke "./ld.so someprog" to test out a new version of the loader. We need to make sure that it is out of the way of the program - that it will "exec", and that there is sufficient room for the brk. */ - + that it will "exec", and that there is sufficient room for the brk. 64-bit + tasks are aligned to 4GB. */ extern unsigned long randomize_et_dyn(void); -#define ELF_ET_DYN_BASE randomize_et_dyn() +#define ELF_ET_DYN_BASE (randomize_et_dyn() + (is_32bit_task() ? \ + (STACK_TOP / 3 * 2) : \ + (STACK_TOP / 3 * 2) & ~((1UL << 32) - 1))) /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. */ diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index a94504d99c47..8c11536f972d 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -179,17 +179,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, unsigned long randomize_et_dyn(void) { - unsigned long base; - - base = STACK_TOP / 3 * 2; - if (!is_32bit_task()) - /* Align to 4GB */ - base &= ~((1UL << 32) - 1); - if (current->flags & PF_RANDOMIZE) - base += arch_mmap_rnd(); + return arch_mmap_rnd(); - return base; + return 0UL; } #ifndef CONFIG_64BIT -- cgit v1.2.3 From d1fd836dcf00d2028c700c7e44d2c23404062c90 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 14 Apr 2015 15:48:07 -0700 Subject: mm: split ET_DYN ASLR from mmap ASLR This fixes the "offset2lib" weakness in ASLR for arm, arm64, mips, powerpc, and x86. The problem is that if there is a leak of ASLR from the executable (ET_DYN), it means a leak of shared library offset as well (mmap), and vice versa. Further details and a PoC of this attack is available here: http://cybersecurity.upv.es/attacks/offset2lib/offset2lib.html With this patch, a PIE linked executable (ET_DYN) has its own ASLR region: $ ./show_mmaps_pie 54859ccd6000-54859ccd7000 r-xp ... /tmp/show_mmaps_pie 54859ced6000-54859ced7000 r--p ... /tmp/show_mmaps_pie 54859ced7000-54859ced8000 rw-p ... /tmp/show_mmaps_pie 7f75be764000-7f75be91f000 r-xp ... /lib/x86_64-linux-gnu/libc.so.6 7f75be91f000-7f75beb1f000 ---p ... /lib/x86_64-linux-gnu/libc.so.6 7f75beb1f000-7f75beb23000 r--p ... /lib/x86_64-linux-gnu/libc.so.6 7f75beb23000-7f75beb25000 rw-p ... /lib/x86_64-linux-gnu/libc.so.6 7f75beb25000-7f75beb2a000 rw-p ... 7f75beb2a000-7f75beb4d000 r-xp ... /lib64/ld-linux-x86-64.so.2 7f75bed45000-7f75bed46000 rw-p ... 7f75bed46000-7f75bed47000 r-xp ... 7f75bed47000-7f75bed4c000 rw-p ... 7f75bed4c000-7f75bed4d000 r--p ... /lib64/ld-linux-x86-64.so.2 7f75bed4d000-7f75bed4e000 rw-p ... /lib64/ld-linux-x86-64.so.2 7f75bed4e000-7f75bed4f000 rw-p ... 7fffb3741000-7fffb3762000 rw-p ... [stack] 7fffb377b000-7fffb377d000 r--p ... [vvar] 7fffb377d000-7fffb377f000 r-xp ... [vdso] The change is to add a call the newly created arch_mmap_rnd() into the ELF loader for handling ET_DYN ASLR in a separate region from mmap ASLR, as was already done on s390. Removes CONFIG_BINFMT_ELF_RANDOMIZE_PIE, which is no longer needed. Signed-off-by: Kees Cook Reported-by: Hector Marco-Gisbert Cc: Russell King Reviewed-by: Ingo Molnar Cc: Catalin Marinas Cc: Will Deacon Cc: Ralf Baechle Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Alexander Viro Cc: Oleg Nesterov Cc: Andy Lutomirski Cc: "David A. Long" Cc: Andrey Ryabinin Cc: Arun Chandran Cc: Yann Droneaud Cc: Min-Hua Chen Cc: Paul Burton Cc: Alex Smith Cc: Markos Chandras Cc: Vineeth Vijayan Cc: Jeff Bailey Cc: Michael Holzheu Cc: Ben Hutchings Cc: Behan Webster Cc: Ismael Ripoll Cc: Jan-Simon Mller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/Kconfig | 1 - arch/arm64/Kconfig | 1 - arch/mips/Kconfig | 1 - arch/powerpc/Kconfig | 1 - arch/s390/include/asm/elf.h | 5 ++--- arch/s390/mm/mmap.c | 8 -------- arch/x86/Kconfig | 1 - fs/Kconfig.binfmt | 3 --- fs/binfmt_elf.c | 18 ++++-------------- 9 files changed, 6 insertions(+), 33 deletions(-) (limited to 'arch') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index f85200a63a8b..4b62f4caf0ce 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1,7 +1,6 @@ config ARM bool default y - select ARCH_BINFMT_ELF_RANDOMIZE_PIE select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7c1dbeb73e8d..34f487d5d84e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1,6 +1,5 @@ config ARM64 def_bool y - select ARCH_BINFMT_ELF_RANDOMIZE_PIE select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_GCOV_PROFILE_ALL diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 688ce274f59d..a326c4cb8cf0 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -23,7 +23,6 @@ config MIPS select HAVE_KRETPROBES select HAVE_DEBUG_KMEMLEAK select HAVE_SYSCALL_TRACEPOINTS - select ARCH_BINFMT_ELF_RANDOMIZE_PIE select ARCH_HAS_ELF_RANDOMIZE select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES && 64BIT select RTC_LIB if !MACH_LOONGSON diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index fc5fffbb331b..e99014adf017 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -88,7 +88,6 @@ config PPC select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select BINFMT_ELF - select ARCH_BINFMT_ELF_RANDOMIZE_PIE select ARCH_HAS_ELF_RANDOMIZE select OF select OF_EARLY_FLATTREE diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index f8db4781a4c2..ff662155b2c4 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -163,10 +163,9 @@ extern unsigned int vdso_enabled; the loader. We need to make sure that it is out of the way of the program that it will "exec", and that there is sufficient room for the brk. 64-bit tasks are aligned to 4GB. */ -extern unsigned long randomize_et_dyn(void); -#define ELF_ET_DYN_BASE (randomize_et_dyn() + (is_32bit_task() ? \ +#define ELF_ET_DYN_BASE (is_32bit_task() ? \ (STACK_TOP / 3 * 2) : \ - (STACK_TOP / 3 * 2) & ~((1UL << 32) - 1))) + (STACK_TOP / 3 * 2) & ~((1UL << 32) - 1)) /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. */ diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 8c11536f972d..bb3367c5cb0b 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -177,14 +177,6 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, return addr; } -unsigned long randomize_et_dyn(void) -{ - if (current->flags & PF_RANDOMIZE) - return arch_mmap_rnd(); - - return 0UL; -} - #ifndef CONFIG_64BIT /* diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 782ddbbc1c9a..1f7f185934a5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -87,7 +87,6 @@ config X86 select HAVE_ARCH_KMEMCHECK select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP select HAVE_USER_RETURN_NOTIFIER - select ARCH_BINFMT_ELF_RANDOMIZE_PIE select ARCH_HAS_ELF_RANDOMIZE select HAVE_ARCH_JUMP_LABEL select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 270c48148f79..2d0cbbd14cfc 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -27,9 +27,6 @@ config COMPAT_BINFMT_ELF bool depends on COMPAT && BINFMT_ELF -config ARCH_BINFMT_ELF_RANDOMIZE_PIE - bool - config ARCH_BINFMT_ELF_STATE bool diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index d925f55e4857..b20c05477e90 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -910,21 +911,10 @@ static int load_elf_binary(struct linux_binprm *bprm) * default mmap base, as well as whatever program they * might try to exec. This is because the brk will * follow the loader, and is not movable. */ -#ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE - /* Memory randomization might have been switched off - * in runtime via sysctl or explicit setting of - * personality flags. - * If that is the case, retain the original non-zero - * load_bias value in order to establish proper - * non-randomized mappings. - */ + load_bias = ELF_ET_DYN_BASE - vaddr; if (current->flags & PF_RANDOMIZE) - load_bias = 0; - else - load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); -#else - load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); -#endif + load_bias += arch_mmap_rnd(); + load_bias = ELF_PAGESTART(load_bias); total_size = total_mapping_size(elf_phdata, loc->elf_ex.e_phnum); if (!total_size) { -- cgit v1.2.3 From 204db6ed17743000691d930368a5abd6ea541c58 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 14 Apr 2015 15:48:12 -0700 Subject: mm: fold arch_randomize_brk into ARCH_HAS_ELF_RANDOMIZE The arch_randomize_brk() function is used on several architectures, even those that don't support ET_DYN ASLR. To avoid bulky extern/#define tricks, consolidate the support under CONFIG_ARCH_HAS_ELF_RANDOMIZE for the architectures that support it, while still handling CONFIG_COMPAT_BRK. Signed-off-by: Kees Cook Cc: Hector Marco-Gisbert Cc: Russell King Reviewed-by: Ingo Molnar Cc: Catalin Marinas Cc: Will Deacon Cc: Ralf Baechle Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Alexander Viro Cc: Oleg Nesterov Cc: Andy Lutomirski Cc: "David A. Long" Cc: Andrey Ryabinin Cc: Arun Chandran Cc: Yann Droneaud Cc: Min-Hua Chen Cc: Paul Burton Cc: Alex Smith Cc: Markos Chandras Cc: Vineeth Vijayan Cc: Jeff Bailey Cc: Michael Holzheu Cc: Ben Hutchings Cc: Behan Webster Cc: Ismael Ripoll Cc: Jan-Simon Mller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/Kconfig | 1 + arch/arm/include/asm/elf.h | 4 ---- arch/arm64/include/asm/elf.h | 4 ---- arch/mips/include/asm/elf.h | 4 ---- arch/powerpc/include/asm/elf.h | 4 ---- arch/s390/include/asm/elf.h | 3 --- arch/x86/include/asm/elf.h | 3 --- fs/binfmt_elf.c | 4 +--- include/linux/elf-randomize.h | 12 ++++++++++++ 9 files changed, 14 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index 474904a8e540..e1068987bad1 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -497,6 +497,7 @@ config ARCH_HAS_ELF_RANDOMIZE An architecture supports choosing randomized locations for stack, mmap, brk, and ET_DYN. Defined functions: - arch_mmap_rnd() + - arch_randomize_brk() # # ABI hall of shame diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index afb9cafd3786..c1ff8ab12914 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -125,10 +125,6 @@ int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs); extern void elf_set_personality(const struct elf32_hdr *); #define SET_PERSONALITY(ex) elf_set_personality(&(ex)) -struct mm_struct; -extern unsigned long arch_randomize_brk(struct mm_struct *mm); -#define arch_randomize_brk arch_randomize_brk - #ifdef CONFIG_MMU #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 struct linux_binprm; diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index f724db00b235..faad6df49e5b 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -156,10 +156,6 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, #define STACK_RND_MASK (0x3ffff >> (PAGE_SHIFT - 12)) #endif -struct mm_struct; -extern unsigned long arch_randomize_brk(struct mm_struct *mm); -#define arch_randomize_brk arch_randomize_brk - #ifdef CONFIG_COMPAT #ifdef __AARCH64EB__ diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h index 535f196ffe02..31d747d46a23 100644 --- a/arch/mips/include/asm/elf.h +++ b/arch/mips/include/asm/elf.h @@ -410,10 +410,6 @@ struct linux_binprm; extern int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp); -struct mm_struct; -extern unsigned long arch_randomize_brk(struct mm_struct *mm); -#define arch_randomize_brk arch_randomize_brk - struct arch_elf_state { int fp_abi; int interp_fp_abi; diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index 57d289acb803..ee46ffef608e 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -128,10 +128,6 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, (0x7ff >> (PAGE_SHIFT - 12)) : \ (0x3ffff >> (PAGE_SHIFT - 12))) -extern unsigned long arch_randomize_brk(struct mm_struct *mm); -#define arch_randomize_brk arch_randomize_brk - - #ifdef CONFIG_SPU_BASE /* Notes used in ET_CORE. Note name is "SPU//". */ #define NT_SPU 1 diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index ff662155b2c4..a5c4978462c1 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -226,9 +226,6 @@ struct linux_binprm; #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 int arch_setup_additional_pages(struct linux_binprm *, int); -extern unsigned long arch_randomize_brk(struct mm_struct *mm); -#define arch_randomize_brk arch_randomize_brk - void *fill_cpu_elf_notes(void *ptr, struct save_area *sa, __vector128 *vxrs); #endif diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 935588d95c82..f161c189c27b 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -339,9 +339,6 @@ extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp); #define compat_arch_setup_additional_pages compat_arch_setup_additional_pages -extern unsigned long arch_randomize_brk(struct mm_struct *mm); -#define arch_randomize_brk arch_randomize_brk - /* * True on X86_32 or when emulating IA32 on X86_64 */ diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index b20c05477e90..241ef68d2893 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1050,15 +1050,13 @@ static int load_elf_binary(struct linux_binprm *bprm) current->mm->end_data = end_data; current->mm->start_stack = bprm->p; -#ifdef arch_randomize_brk if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) { current->mm->brk = current->mm->start_brk = arch_randomize_brk(current->mm); -#ifdef CONFIG_COMPAT_BRK +#ifdef compat_brk_randomized current->brk_randomized = 1; #endif } -#endif if (current->personality & MMAP_PAGE_ZERO) { /* Why this, you ask??? Well SVr4 maps page 0 as read-only, diff --git a/include/linux/elf-randomize.h b/include/linux/elf-randomize.h index 7a4eda02d2b1..b5f0bda9472e 100644 --- a/include/linux/elf-randomize.h +++ b/include/linux/elf-randomize.h @@ -1,10 +1,22 @@ #ifndef _ELF_RANDOMIZE_H #define _ELF_RANDOMIZE_H +struct mm_struct; + #ifndef CONFIG_ARCH_HAS_ELF_RANDOMIZE static inline unsigned long arch_mmap_rnd(void) { return 0; } +# if defined(arch_randomize_brk) && defined(CONFIG_COMPAT_BRK) +# define compat_brk_randomized +# endif +# ifndef arch_randomize_brk +# define arch_randomize_brk(mm) (mm->brk) +# endif #else extern unsigned long arch_mmap_rnd(void); +extern unsigned long arch_randomize_brk(struct mm_struct *mm); +# ifdef CONFIG_COMPAT_BRK +# define compat_brk_randomized +# endif #endif #endif -- cgit v1.2.3 From 4a20799d11f64e6b8725cacc7619b1ae1dbf9acd Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Tue, 14 Apr 2015 15:48:27 -0700 Subject: mm: move memtest under mm Memtest is a simple feature which fills the memory with a given set of patterns and validates memory contents, if bad memory regions is detected it reserves them via memblock API. Since memblock API is widely used by other architectures this feature can be enabled outside of x86 world. This patch set promotes memtest to live under generic mm umbrella and enables memtest feature for arm/arm64. It was reported that this patch set was useful for tracking down an issue with some errant DMA on an arm64 platform. This patch (of 6): There is nothing platform dependent in the core memtest code, so other platforms might benefit from this feature too. [linux@roeck-us.net: MEMTEST depends on MEMBLOCK] Signed-off-by: Vladimir Murzin Acked-by: Will Deacon Tested-by: Mark Rutland Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Catalin Marinas Cc: Russell King Cc: Paul Bolle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 11 ----- arch/x86/include/asm/e820.h | 8 --- arch/x86/mm/Makefile | 2 - arch/x86/mm/memtest.c | 118 -------------------------------------------- include/linux/memblock.h | 8 +++ lib/Kconfig.debug | 12 +++++ mm/Makefile | 1 + mm/memtest.c | 118 ++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 139 insertions(+), 139 deletions(-) delete mode 100644 arch/x86/mm/memtest.c create mode 100644 mm/memtest.c (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1f7f185934a5..d43e7e1c784b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -721,17 +721,6 @@ endif #HYPERVISOR_GUEST config NO_BOOTMEM def_bool y -config MEMTEST - bool "Memtest" - ---help--- - This option adds a kernel parameter 'memtest', which allows memtest - to be set. - memtest=0, mean disabled; -- default - memtest=1, mean do 1 test pattern; - ... - memtest=4, mean do 4 test patterns. - If you are unsure how to answer this question, answer N. - source "arch/x86/Kconfig.cpu" config HPET_TIMER diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 779c2efe2e97..3ab0537872fb 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h @@ -40,14 +40,6 @@ static inline void e820_mark_nosave_regions(unsigned long limit_pfn) } #endif -#ifdef CONFIG_MEMTEST -extern void early_memtest(unsigned long start, unsigned long end); -#else -static inline void early_memtest(unsigned long start, unsigned long end) -{ -} -#endif - extern unsigned long e820_end_of_ram_pfn(void); extern unsigned long e820_end_of_low_ram_pfn(void); extern u64 early_reserve_e820(u64 sizet, u64 align); diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index c4cc74006c61..a482d105172b 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -32,6 +32,4 @@ obj-$(CONFIG_AMD_NUMA) += amdtopology.o obj-$(CONFIG_ACPI_NUMA) += srat.o obj-$(CONFIG_NUMA_EMU) += numa_emulation.o -obj-$(CONFIG_MEMTEST) += memtest.o - obj-$(CONFIG_X86_INTEL_MPX) += mpx.o diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c deleted file mode 100644 index 1e9da795767a..000000000000 --- a/arch/x86/mm/memtest.c +++ /dev/null @@ -1,118 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static u64 patterns[] __initdata = { - /* The first entry has to be 0 to leave memtest with zeroed memory */ - 0, - 0xffffffffffffffffULL, - 0x5555555555555555ULL, - 0xaaaaaaaaaaaaaaaaULL, - 0x1111111111111111ULL, - 0x2222222222222222ULL, - 0x4444444444444444ULL, - 0x8888888888888888ULL, - 0x3333333333333333ULL, - 0x6666666666666666ULL, - 0x9999999999999999ULL, - 0xccccccccccccccccULL, - 0x7777777777777777ULL, - 0xbbbbbbbbbbbbbbbbULL, - 0xddddddddddddddddULL, - 0xeeeeeeeeeeeeeeeeULL, - 0x7a6c7258554e494cULL, /* yeah ;-) */ -}; - -static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad) -{ - printk(KERN_INFO " %016llx bad mem addr %010llx - %010llx reserved\n", - (unsigned long long) pattern, - (unsigned long long) start_bad, - (unsigned long long) end_bad); - memblock_reserve(start_bad, end_bad - start_bad); -} - -static void __init memtest(u64 pattern, u64 start_phys, u64 size) -{ - u64 *p, *start, *end; - u64 start_bad, last_bad; - u64 start_phys_aligned; - const size_t incr = sizeof(pattern); - - start_phys_aligned = ALIGN(start_phys, incr); - start = __va(start_phys_aligned); - end = start + (size - (start_phys_aligned - start_phys)) / incr; - start_bad = 0; - last_bad = 0; - - for (p = start; p < end; p++) - *p = pattern; - - for (p = start; p < end; p++, start_phys_aligned += incr) { - if (*p == pattern) - continue; - if (start_phys_aligned == last_bad + incr) { - last_bad += incr; - continue; - } - if (start_bad) - reserve_bad_mem(pattern, start_bad, last_bad + incr); - start_bad = last_bad = start_phys_aligned; - } - if (start_bad) - reserve_bad_mem(pattern, start_bad, last_bad + incr); -} - -static void __init do_one_pass(u64 pattern, u64 start, u64 end) -{ - u64 i; - phys_addr_t this_start, this_end; - - for_each_free_mem_range(i, NUMA_NO_NODE, &this_start, &this_end, NULL) { - this_start = clamp_t(phys_addr_t, this_start, start, end); - this_end = clamp_t(phys_addr_t, this_end, start, end); - if (this_start < this_end) { - printk(KERN_INFO " %010llx - %010llx pattern %016llx\n", - (unsigned long long)this_start, - (unsigned long long)this_end, - (unsigned long long)cpu_to_be64(pattern)); - memtest(pattern, this_start, this_end - this_start); - } - } -} - -/* default is disabled */ -static int memtest_pattern __initdata; - -static int __init parse_memtest(char *arg) -{ - if (arg) - memtest_pattern = simple_strtoul(arg, NULL, 0); - else - memtest_pattern = ARRAY_SIZE(patterns); - - return 0; -} - -early_param("memtest", parse_memtest); - -void __init early_memtest(unsigned long start, unsigned long end) -{ - unsigned int i; - unsigned int idx = 0; - - if (!memtest_pattern) - return; - - printk(KERN_INFO "early_memtest: # of tests: %d\n", memtest_pattern); - for (i = memtest_pattern-1; i < UINT_MAX; --i) { - idx = i % ARRAY_SIZE(patterns); - do_one_pass(patterns[idx], start, end); - } -} diff --git a/include/linux/memblock.h b/include/linux/memblock.h index e8cc45307f8f..6724cb020f5e 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -365,6 +365,14 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo #define __initdata_memblock #endif +#ifdef CONFIG_MEMTEST +extern void early_memtest(unsigned long start, unsigned long end); +#else +static inline void early_memtest(unsigned long start, unsigned long end) +{ +} +#endif + #else static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align) { diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 36b6fa88ce5b..5c7a3183423b 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1745,6 +1745,18 @@ config TEST_UDELAY If unsure, say N. +config MEMTEST + bool "Memtest" + depends on HAVE_MEMBLOCK + ---help--- + This option adds a kernel parameter 'memtest', which allows memtest + to be set. + memtest=0, mean disabled; -- default + memtest=1, mean do 1 test pattern; + ... + memtest=4, mean do 4 test patterns. + If you are unsure how to answer this question, answer N. + source "samples/Kconfig" source "lib/Kconfig.kgdb" diff --git a/mm/Makefile b/mm/Makefile index 668a9bb82be4..98c4eaeabdcb 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -55,6 +55,7 @@ obj-$(CONFIG_KMEMCHECK) += kmemcheck.o obj-$(CONFIG_KASAN) += kasan/ obj-$(CONFIG_FAILSLAB) += failslab.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o +obj-$(CONFIG_MEMTEST) += memtest.o obj-$(CONFIG_MIGRATION) += migrate.o obj-$(CONFIG_QUICKLIST) += quicklist.o obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o diff --git a/mm/memtest.c b/mm/memtest.c new file mode 100644 index 000000000000..1e9da795767a --- /dev/null +++ b/mm/memtest.c @@ -0,0 +1,118 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static u64 patterns[] __initdata = { + /* The first entry has to be 0 to leave memtest with zeroed memory */ + 0, + 0xffffffffffffffffULL, + 0x5555555555555555ULL, + 0xaaaaaaaaaaaaaaaaULL, + 0x1111111111111111ULL, + 0x2222222222222222ULL, + 0x4444444444444444ULL, + 0x8888888888888888ULL, + 0x3333333333333333ULL, + 0x6666666666666666ULL, + 0x9999999999999999ULL, + 0xccccccccccccccccULL, + 0x7777777777777777ULL, + 0xbbbbbbbbbbbbbbbbULL, + 0xddddddddddddddddULL, + 0xeeeeeeeeeeeeeeeeULL, + 0x7a6c7258554e494cULL, /* yeah ;-) */ +}; + +static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad) +{ + printk(KERN_INFO " %016llx bad mem addr %010llx - %010llx reserved\n", + (unsigned long long) pattern, + (unsigned long long) start_bad, + (unsigned long long) end_bad); + memblock_reserve(start_bad, end_bad - start_bad); +} + +static void __init memtest(u64 pattern, u64 start_phys, u64 size) +{ + u64 *p, *start, *end; + u64 start_bad, last_bad; + u64 start_phys_aligned; + const size_t incr = sizeof(pattern); + + start_phys_aligned = ALIGN(start_phys, incr); + start = __va(start_phys_aligned); + end = start + (size - (start_phys_aligned - start_phys)) / incr; + start_bad = 0; + last_bad = 0; + + for (p = start; p < end; p++) + *p = pattern; + + for (p = start; p < end; p++, start_phys_aligned += incr) { + if (*p == pattern) + continue; + if (start_phys_aligned == last_bad + incr) { + last_bad += incr; + continue; + } + if (start_bad) + reserve_bad_mem(pattern, start_bad, last_bad + incr); + start_bad = last_bad = start_phys_aligned; + } + if (start_bad) + reserve_bad_mem(pattern, start_bad, last_bad + incr); +} + +static void __init do_one_pass(u64 pattern, u64 start, u64 end) +{ + u64 i; + phys_addr_t this_start, this_end; + + for_each_free_mem_range(i, NUMA_NO_NODE, &this_start, &this_end, NULL) { + this_start = clamp_t(phys_addr_t, this_start, start, end); + this_end = clamp_t(phys_addr_t, this_end, start, end); + if (this_start < this_end) { + printk(KERN_INFO " %010llx - %010llx pattern %016llx\n", + (unsigned long long)this_start, + (unsigned long long)this_end, + (unsigned long long)cpu_to_be64(pattern)); + memtest(pattern, this_start, this_end - this_start); + } + } +} + +/* default is disabled */ +static int memtest_pattern __initdata; + +static int __init parse_memtest(char *arg) +{ + if (arg) + memtest_pattern = simple_strtoul(arg, NULL, 0); + else + memtest_pattern = ARRAY_SIZE(patterns); + + return 0; +} + +early_param("memtest", parse_memtest); + +void __init early_memtest(unsigned long start, unsigned long end) +{ + unsigned int i; + unsigned int idx = 0; + + if (!memtest_pattern) + return; + + printk(KERN_INFO "early_memtest: # of tests: %d\n", memtest_pattern); + for (i = memtest_pattern-1; i < UINT_MAX; --i) { + idx = i % ARRAY_SIZE(patterns); + do_one_pass(patterns[idx], start, end); + } +} -- cgit v1.2.3 From 36dd9086cb31613ace45e94c18a17241d3d0aac4 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Tue, 14 Apr 2015 15:48:33 -0700 Subject: arm64: add support for memtest Add support for memtest command line option. Signed-off-by: Vladimir Murzin Acked-by: Will Deacon Tested-by: Mark Rutland Cc: Catalin Marinas Cc: Russell King Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/mm/init.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index ae85da6307bb..597831bdddf3 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -190,6 +190,8 @@ void __init bootmem_init(void) min = PFN_UP(memblock_start_of_DRAM()); max = PFN_DOWN(memblock_end_of_DRAM()); + early_memtest(min << PAGE_SHIFT, max << PAGE_SHIFT); + /* * Sparsemem tries to allocate bootmem in memory_present(), so must be * done after the fixed reservations. -- cgit v1.2.3 From d30eae473360aea25e0584d4fbf6a70417d89784 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Tue, 14 Apr 2015 15:48:37 -0700 Subject: arm: add support for memtest Add support for memtest command line option. Signed-off-by: Vladimir Murzin Acked-by: Will Deacon Cc: "H. Peter Anvin" Cc: Catalin Marinas Cc: Ingo Molnar Cc: Mark Rutland Cc: Russell King Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mm/init.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 1609b022a72f..3d0e9aed4b40 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -335,6 +335,9 @@ void __init bootmem_init(void) find_limits(&min, &max_low, &max_high); + early_memtest((phys_addr_t)min << PAGE_SHIFT, + (phys_addr_t)max_low << PAGE_SHIFT); + /* * Sparsemem tries to allocate bootmem in memory_present(), * so must be done after the fixed reservations -- cgit v1.2.3