diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-16 09:26:42 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-16 09:26:42 -0800 |
commit | de399813b521ea7e38bbfb5e5b620b5e202e5783 (patch) | |
tree | ceb8302f9d6a7a4f2e25b64c5dc42c1fb80b435b /arch/powerpc/mm/hugetlbpage.c | |
parent | 57ca04ab440168e101da746ef9edd1ec583b7214 (diff) | |
parent | c6f6634721c871bfab4235e1cbcad208d3063798 (diff) |
Merge tag 'powerpc-4.10-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman:
"Highlights include:
- Support for the kexec_file_load() syscall, which is a prereq for
secure and trusted boot.
- Prevent kernel execution of userspace on P9 Radix (similar to
SMEP/PXN).
- Sort the exception tables at build time, to save time at boot, and
store them as relative offsets to save space in the kernel image &
memory.
- Allow building the kernel with thin archives, which should allow us
to build an allyesconfig once some other fixes land.
- Build fixes to allow us to correctly rebuild when changing the
kernel endian from big to little or vice versa.
- Plumbing so that we can avoid doing a full mm TLB flush on P9
Radix.
- Initial stack protector support (-fstack-protector).
- Support for dumping the radix (aka. Linux) and hash page tables via
debugfs.
- Fix an oops in cxl coredump generation when cxl_get_fd() is used.
- Freescale updates from Scott: "Highlights include 8xx hugepage
support, qbman fixes/cleanup, device tree updates, and some misc
cleanup."
- Many and varied fixes and minor enhancements as always.
Thanks to:
Alexey Kardashevskiy, Andrew Donnellan, Aneesh Kumar K.V, Anshuman
Khandual, Anton Blanchard, Balbir Singh, Bartlomiej Zolnierkiewicz,
Christophe Jaillet, Christophe Leroy, Denis Kirjanov, Elimar
Riesebieter, Frederic Barrat, Gautham R. Shenoy, Geliang Tang, Geoff
Levand, Jack Miller, Johan Hovold, Lars-Peter Clausen, Libin,
Madhavan Srinivasan, Michael Neuling, Nathan Fontenot, Naveen N.
Rao, Nicholas Piggin, Pan Xinhui, Peter Senna Tschudin, Rashmica
Gupta, Rui Teng, Russell Currey, Scott Wood, Simon Guo, Suraj
Jitindar Singh, Thiago Jung Bauermann, Tobias Klauser, Vaibhav Jain"
[ And thanks to Michael, who took time off from a new baby to get this
pull request done. - Linus ]
* tag 'powerpc-4.10-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (174 commits)
powerpc/fsl/dts: add FMan node for t1042d4rdb
powerpc/fsl/dts: add sg_2500_aqr105_phy4 alias on t1024rdb
powerpc/fsl/dts: add QMan and BMan nodes on t1024
powerpc/fsl/dts: add QMan and BMan nodes on t1023
soc/fsl/qman: test: use DEFINE_SPINLOCK()
powerpc/fsl-lbc: use DEFINE_SPINLOCK()
powerpc/8xx: Implement support of hugepages
powerpc: get hugetlbpage handling more generic
powerpc: port 64 bits pgtable_cache to 32 bits
powerpc/boot: Request no dynamic linker for boot wrapper
soc/fsl/bman: Use resource_size instead of computation
soc/fsl/qe: use builtin_platform_driver
powerpc/fsl_pmc: use builtin_platform_driver
powerpc/83xx/suspend: use builtin_platform_driver
powerpc/ftrace: Fix the comments for ftrace_modify_code
powerpc/perf: macros for power9 format encoding
powerpc/perf: power9 raw event format encoding
powerpc/perf: update attribute_group data structure
powerpc/perf: factor out the event format field
powerpc/mm/iommu, vfio/spapr: Put pages on VFIO container shutdown
...
Diffstat (limited to 'arch/powerpc/mm/hugetlbpage.c')
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 216 |
1 files changed, 96 insertions, 120 deletions
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index a5d3ecdabc44..289df38fb7e0 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -26,6 +26,8 @@ #ifdef CONFIG_HUGETLB_PAGE #define PAGE_SHIFT_64K 16 +#define PAGE_SHIFT_512K 19 +#define PAGE_SHIFT_8M 23 #define PAGE_SHIFT_16M 24 #define PAGE_SHIFT_16G 34 @@ -38,7 +40,7 @@ unsigned int HPAGE_SHIFT; * implementations may have more than one gpage size, so we need multiple * arrays */ -#ifdef CONFIG_PPC_FSL_BOOK3E +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) #define MAX_NUMBER_GPAGES 128 struct psize_gpages { u64 gpage_list[MAX_NUMBER_GPAGES]; @@ -64,14 +66,16 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, { struct kmem_cache *cachep; pte_t *new; - -#ifdef CONFIG_PPC_FSL_BOOK3E int i; - int num_hugepd = 1 << (pshift - pdshift); - cachep = hugepte_cache; -#else - cachep = PGT_CACHE(pdshift - pshift); -#endif + int num_hugepd; + + if (pshift >= pdshift) { + cachep = hugepte_cache; + num_hugepd = 1 << (pshift - pdshift); + } else { + cachep = PGT_CACHE(pdshift - pshift); + num_hugepd = 1; + } new = kmem_cache_zalloc(cachep, GFP_KERNEL); @@ -89,7 +93,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, smp_wmb(); spin_lock(&mm->page_table_lock); -#ifdef CONFIG_PPC_FSL_BOOK3E + /* * We have multiple higher-level entries that point to the same * actual pte location. Fill in each as we go and backtrack on error. @@ -100,8 +104,18 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, if (unlikely(!hugepd_none(*hpdp))) break; else +#ifdef CONFIG_PPC_BOOK3S_64 + hpdp->pd = __pa(new) | + (shift_to_mmu_psize(pshift) << 2); +#elif defined(CONFIG_PPC_8xx) + hpdp->pd = __pa(new) | + (pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M : + _PMD_PAGE_512K) | + _PMD_PRESENT; +#else /* We use the old format for PPC_FSL_BOOK3E */ hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; +#endif } /* If we bailed from the for loop early, an error occurred, clean up */ if (i < num_hugepd) { @@ -109,17 +123,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, hpdp->pd = 0; kmem_cache_free(cachep, new); } -#else - if (!hugepd_none(*hpdp)) - kmem_cache_free(cachep, new); - else { -#ifdef CONFIG_PPC_BOOK3S_64 - hpdp->pd = __pa(new) | (shift_to_mmu_psize(pshift) << 2); -#else - hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; -#endif - } -#endif spin_unlock(&mm->page_table_lock); return 0; } @@ -128,7 +131,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, * These macros define how to determine which level of the page table holds * the hpdp. */ -#ifdef CONFIG_PPC_FSL_BOOK3E +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) #define HUGEPD_PGD_SHIFT PGDIR_SHIFT #define HUGEPD_PUD_SHIFT PUD_SHIFT #else @@ -136,7 +139,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, #define HUGEPD_PUD_SHIFT PMD_SHIFT #endif -#ifdef CONFIG_PPC_BOOK3S_64 /* * At this point we do the placement change only for BOOK3S 64. This would * possibly work on other subarchs. @@ -153,6 +155,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz addr &= ~(sz-1); pg = pgd_offset(mm, addr); +#ifdef CONFIG_PPC_BOOK3S_64 if (pshift == PGDIR_SHIFT) /* 16GB huge page */ return (pte_t *) pg; @@ -178,32 +181,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz hpdp = (hugepd_t *)pm; } } - if (!hpdp) - return NULL; - - BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp)); - - if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift)) - return NULL; - - return hugepte_offset(*hpdp, addr, pdshift); -} - #else - -pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) -{ - pgd_t *pg; - pud_t *pu; - pmd_t *pm; - hugepd_t *hpdp = NULL; - unsigned pshift = __ffs(sz); - unsigned pdshift = PGDIR_SHIFT; - - addr &= ~(sz-1); - - pg = pgd_offset(mm, addr); - if (pshift >= HUGEPD_PGD_SHIFT) { hpdp = (hugepd_t *)pg; } else { @@ -217,7 +195,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz hpdp = (hugepd_t *)pm; } } - +#endif if (!hpdp) return NULL; @@ -228,9 +206,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz return hugepte_offset(*hpdp, addr, pdshift); } -#endif -#ifdef CONFIG_PPC_FSL_BOOK3E +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) /* Build list of addresses of gigantic pages. This function is used in early * boot before the buddy allocator is setup. */ @@ -310,7 +287,11 @@ static int __init do_gpage_early_setup(char *param, char *val, npages = 0; if (npages > MAX_NUMBER_GPAGES) { pr_warn("MMU: %lu pages requested for page " +#ifdef CONFIG_PHYS_ADDR_T_64BIT "size %llu KB, limiting to " +#else + "size %u KB, limiting to " +#endif __stringify(MAX_NUMBER_GPAGES) "\n", npages, size / 1024); npages = MAX_NUMBER_GPAGES; @@ -392,7 +373,7 @@ int alloc_bootmem_huge_page(struct hstate *hstate) } #endif -#ifdef CONFIG_PPC_FSL_BOOK3E +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) #define HUGEPD_FREELIST_SIZE \ ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t)) @@ -442,6 +423,8 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte) } put_cpu_var(hugepd_freelist_cur); } +#else +static inline void hugepd_free(struct mmu_gather *tlb, void *hugepte) {} #endif static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift, @@ -453,13 +436,11 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif unsigned long pdmask = ~((1UL << pdshift) - 1); unsigned int num_hugepd = 1; + unsigned int shift = hugepd_shift(*hpdp); -#ifdef CONFIG_PPC_FSL_BOOK3E /* Note: On fsl the hpdp may be the first of several */ - num_hugepd = (1 << (hugepd_shift(*hpdp) - pdshift)); -#else - unsigned int shift = hugepd_shift(*hpdp); -#endif + if (shift > pdshift) + num_hugepd = 1 << (shift - pdshift); start &= pdmask; if (start < floor) @@ -475,11 +456,10 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif for (i = 0; i < num_hugepd; i++, hpdp++) hpdp->pd = 0; -#ifdef CONFIG_PPC_FSL_BOOK3E - hugepd_free(tlb, hugepte); -#else - pgtable_free_tlb(tlb, hugepte, pdshift - shift); -#endif + if (shift >= pdshift) + hugepd_free(tlb, hugepte); + else + pgtable_free_tlb(tlb, hugepte, pdshift - shift); } static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, @@ -492,6 +472,8 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, start = addr; do { + unsigned long more; + pmd = pmd_offset(pud, addr); next = pmd_addr_end(addr, end); if (!is_hugepd(__hugepd(pmd_val(*pmd)))) { @@ -502,15 +484,16 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, WARN_ON(!pmd_none_or_clear_bad(pmd)); continue; } -#ifdef CONFIG_PPC_FSL_BOOK3E /* * Increment next by the size of the huge mapping since * there may be more than one entry at this level for a * single hugepage, but all of them point to * the same kmem cache that holds the hugepte. */ - next = addr + (1 << hugepd_shift(*(hugepd_t *)pmd)); -#endif + more = addr + (1 << hugepd_shift(*(hugepd_t *)pmd)); + if (more > next) + next = more; + free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT, addr, next, floor, ceiling); } while (addr = next, addr != end); @@ -550,15 +533,17 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling); } else { -#ifdef CONFIG_PPC_FSL_BOOK3E + unsigned long more; /* * Increment next by the size of the huge mapping since * there may be more than one entry at this level for a * single hugepage, but all of them point to * the same kmem cache that holds the hugepte. */ - next = addr + (1 << hugepd_shift(*(hugepd_t *)pud)); -#endif + more = addr + (1 << hugepd_shift(*(hugepd_t *)pud)); + if (more > next) + next = more; + free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT, addr, next, floor, ceiling); } @@ -615,15 +600,17 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, continue; hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); } else { -#ifdef CONFIG_PPC_FSL_BOOK3E + unsigned long more; /* * Increment next by the size of the huge mapping since * there may be more than one entry at the pgd level * for a single hugepage, but all of them point to the * same kmem cache that holds the hugepte. */ - next = addr + (1 << hugepd_shift(*(hugepd_t *)pgd)); -#endif + more = addr + (1 << hugepd_shift(*(hugepd_t *)pgd)); + if (more > next) + next = more; + free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, addr, next, floor, ceiling); } @@ -753,12 +740,13 @@ static int __init add_huge_page_size(unsigned long long size) /* Check that it is a page size supported by the hardware and * that it fits within pagetable and slice limits. */ -#ifdef CONFIG_PPC_FSL_BOOK3E - if ((size < PAGE_SIZE) || !is_power_of_4(size)) + if (size <= PAGE_SIZE) return -EINVAL; -#else - if (!is_power_of_2(size) - || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT)) +#if defined(CONFIG_PPC_FSL_BOOK3E) + if (!is_power_of_4(size)) + return -EINVAL; +#elif !defined(CONFIG_PPC_8xx) + if (!is_power_of_2(size) || (shift > SLICE_HIGH_SHIFT)) return -EINVAL; #endif @@ -791,53 +779,15 @@ static int __init hugepage_setup_sz(char *str) } __setup("hugepagesz=", hugepage_setup_sz); -#ifdef CONFIG_PPC_FSL_BOOK3E struct kmem_cache *hugepte_cache; static int __init hugetlbpage_init(void) { int psize; - for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { - unsigned shift; - - if (!mmu_psize_defs[psize].shift) - continue; - - shift = mmu_psize_to_shift(psize); - - /* Don't treat normal page sizes as huge... */ - if (shift != PAGE_SHIFT) - if (add_huge_page_size(1ULL << shift) < 0) - continue; - } - - /* - * Create a kmem cache for hugeptes. The bottom bits in the pte have - * size information encoded in them, so align them to allow this - */ - hugepte_cache = kmem_cache_create("hugepte-cache", sizeof(pte_t), - HUGEPD_SHIFT_MASK + 1, 0, NULL); - if (hugepte_cache == NULL) - panic("%s: Unable to create kmem cache for hugeptes\n", - __func__); - - /* Default hpage size = 4M */ - if (mmu_psize_defs[MMU_PAGE_4M].shift) - HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift; - else - panic("%s: Unable to set default huge page size\n", __func__); - - - return 0; -} -#else -static int __init hugetlbpage_init(void) -{ - int psize; - +#if !defined(CONFIG_PPC_FSL_BOOK3E) && !defined(CONFIG_PPC_8xx) if (!radix_enabled() && !mmu_has_feature(MMU_FTR_16M_PAGE)) return -ENODEV; - +#endif for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { unsigned shift; unsigned pdshift; @@ -850,9 +800,9 @@ static int __init hugetlbpage_init(void) if (add_huge_page_size(1ULL << shift) < 0) continue; - if (shift < PMD_SHIFT) + if (shift < HUGEPD_PUD_SHIFT) pdshift = PMD_SHIFT; - else if (shift < PUD_SHIFT) + else if (shift < HUGEPD_PGD_SHIFT) pdshift = PUD_SHIFT; else pdshift = PGDIR_SHIFT; @@ -860,14 +810,38 @@ static int __init hugetlbpage_init(void) * if we have pdshift and shift value same, we don't * use pgt cache for hugepd. */ - if (pdshift != shift) { + if (pdshift > shift) { pgtable_cache_add(pdshift - shift, NULL); if (!PGT_CACHE(pdshift - shift)) panic("hugetlbpage_init(): could not create " "pgtable cache for %d bit pagesize\n", shift); } +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) + else if (!hugepte_cache) { + /* + * Create a kmem cache for hugeptes. The bottom bits in + * the pte have size information encoded in them, so + * align them to allow this + */ + hugepte_cache = kmem_cache_create("hugepte-cache", + sizeof(pte_t), + HUGEPD_SHIFT_MASK + 1, + 0, NULL); + if (hugepte_cache == NULL) + panic("%s: Unable to create kmem cache " + "for hugeptes\n", __func__); + + } +#endif } +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) + /* Default hpage size = 4M on FSL_BOOK3E and 512k on 8xx */ + if (mmu_psize_defs[MMU_PAGE_4M].shift) + HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift; + else if (mmu_psize_defs[MMU_PAGE_512K].shift) + HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_512K].shift; +#else /* Set default large page size. Currently, we pick 16M or 1M * depending on what is available */ @@ -877,11 +851,13 @@ static int __init hugetlbpage_init(void) HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift; else if (mmu_psize_defs[MMU_PAGE_2M].shift) HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_2M].shift; - +#endif + else + panic("%s: Unable to set default huge page size\n", __func__); return 0; } -#endif + arch_initcall(hugetlbpage_init); void flush_dcache_icache_hugepage(struct page *page) |