summaryrefslogtreecommitdiff
path: root/arch/powerpc/mm/hugetlbpage.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-12-16 09:26:42 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2016-12-16 09:26:42 -0800
commitde399813b521ea7e38bbfb5e5b620b5e202e5783 (patch)
treeceb8302f9d6a7a4f2e25b64c5dc42c1fb80b435b /arch/powerpc/mm/hugetlbpage.c
parent57ca04ab440168e101da746ef9edd1ec583b7214 (diff)
parentc6f6634721c871bfab4235e1cbcad208d3063798 (diff)
Merge tag 'powerpc-4.10-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman: "Highlights include: - Support for the kexec_file_load() syscall, which is a prereq for secure and trusted boot. - Prevent kernel execution of userspace on P9 Radix (similar to SMEP/PXN). - Sort the exception tables at build time, to save time at boot, and store them as relative offsets to save space in the kernel image & memory. - Allow building the kernel with thin archives, which should allow us to build an allyesconfig once some other fixes land. - Build fixes to allow us to correctly rebuild when changing the kernel endian from big to little or vice versa. - Plumbing so that we can avoid doing a full mm TLB flush on P9 Radix. - Initial stack protector support (-fstack-protector). - Support for dumping the radix (aka. Linux) and hash page tables via debugfs. - Fix an oops in cxl coredump generation when cxl_get_fd() is used. - Freescale updates from Scott: "Highlights include 8xx hugepage support, qbman fixes/cleanup, device tree updates, and some misc cleanup." - Many and varied fixes and minor enhancements as always. Thanks to: Alexey Kardashevskiy, Andrew Donnellan, Aneesh Kumar K.V, Anshuman Khandual, Anton Blanchard, Balbir Singh, Bartlomiej Zolnierkiewicz, Christophe Jaillet, Christophe Leroy, Denis Kirjanov, Elimar Riesebieter, Frederic Barrat, Gautham R. Shenoy, Geliang Tang, Geoff Levand, Jack Miller, Johan Hovold, Lars-Peter Clausen, Libin, Madhavan Srinivasan, Michael Neuling, Nathan Fontenot, Naveen N. Rao, Nicholas Piggin, Pan Xinhui, Peter Senna Tschudin, Rashmica Gupta, Rui Teng, Russell Currey, Scott Wood, Simon Guo, Suraj Jitindar Singh, Thiago Jung Bauermann, Tobias Klauser, Vaibhav Jain" [ And thanks to Michael, who took time off from a new baby to get this pull request done. - Linus ] * tag 'powerpc-4.10-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (174 commits) powerpc/fsl/dts: add FMan node for t1042d4rdb powerpc/fsl/dts: add sg_2500_aqr105_phy4 alias on t1024rdb powerpc/fsl/dts: add QMan and BMan nodes on t1024 powerpc/fsl/dts: add QMan and BMan nodes on t1023 soc/fsl/qman: test: use DEFINE_SPINLOCK() powerpc/fsl-lbc: use DEFINE_SPINLOCK() powerpc/8xx: Implement support of hugepages powerpc: get hugetlbpage handling more generic powerpc: port 64 bits pgtable_cache to 32 bits powerpc/boot: Request no dynamic linker for boot wrapper soc/fsl/bman: Use resource_size instead of computation soc/fsl/qe: use builtin_platform_driver powerpc/fsl_pmc: use builtin_platform_driver powerpc/83xx/suspend: use builtin_platform_driver powerpc/ftrace: Fix the comments for ftrace_modify_code powerpc/perf: macros for power9 format encoding powerpc/perf: power9 raw event format encoding powerpc/perf: update attribute_group data structure powerpc/perf: factor out the event format field powerpc/mm/iommu, vfio/spapr: Put pages on VFIO container shutdown ...
Diffstat (limited to 'arch/powerpc/mm/hugetlbpage.c')
-rw-r--r--arch/powerpc/mm/hugetlbpage.c216
1 files changed, 96 insertions, 120 deletions
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index a5d3ecdabc44..289df38fb7e0 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -26,6 +26,8 @@
#ifdef CONFIG_HUGETLB_PAGE
#define PAGE_SHIFT_64K 16
+#define PAGE_SHIFT_512K 19
+#define PAGE_SHIFT_8M 23
#define PAGE_SHIFT_16M 24
#define PAGE_SHIFT_16G 34
@@ -38,7 +40,7 @@ unsigned int HPAGE_SHIFT;
* implementations may have more than one gpage size, so we need multiple
* arrays
*/
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
#define MAX_NUMBER_GPAGES 128
struct psize_gpages {
u64 gpage_list[MAX_NUMBER_GPAGES];
@@ -64,14 +66,16 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
{
struct kmem_cache *cachep;
pte_t *new;
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
int i;
- int num_hugepd = 1 << (pshift - pdshift);
- cachep = hugepte_cache;
-#else
- cachep = PGT_CACHE(pdshift - pshift);
-#endif
+ int num_hugepd;
+
+ if (pshift >= pdshift) {
+ cachep = hugepte_cache;
+ num_hugepd = 1 << (pshift - pdshift);
+ } else {
+ cachep = PGT_CACHE(pdshift - pshift);
+ num_hugepd = 1;
+ }
new = kmem_cache_zalloc(cachep, GFP_KERNEL);
@@ -89,7 +93,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
smp_wmb();
spin_lock(&mm->page_table_lock);
-#ifdef CONFIG_PPC_FSL_BOOK3E
+
/*
* We have multiple higher-level entries that point to the same
* actual pte location. Fill in each as we go and backtrack on error.
@@ -100,8 +104,18 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
if (unlikely(!hugepd_none(*hpdp)))
break;
else
+#ifdef CONFIG_PPC_BOOK3S_64
+ hpdp->pd = __pa(new) |
+ (shift_to_mmu_psize(pshift) << 2);
+#elif defined(CONFIG_PPC_8xx)
+ hpdp->pd = __pa(new) |
+ (pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M :
+ _PMD_PAGE_512K) |
+ _PMD_PRESENT;
+#else
/* We use the old format for PPC_FSL_BOOK3E */
hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
+#endif
}
/* If we bailed from the for loop early, an error occurred, clean up */
if (i < num_hugepd) {
@@ -109,17 +123,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
hpdp->pd = 0;
kmem_cache_free(cachep, new);
}
-#else
- if (!hugepd_none(*hpdp))
- kmem_cache_free(cachep, new);
- else {
-#ifdef CONFIG_PPC_BOOK3S_64
- hpdp->pd = __pa(new) | (shift_to_mmu_psize(pshift) << 2);
-#else
- hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
-#endif
- }
-#endif
spin_unlock(&mm->page_table_lock);
return 0;
}
@@ -128,7 +131,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
* These macros define how to determine which level of the page table holds
* the hpdp.
*/
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
#define HUGEPD_PGD_SHIFT PGDIR_SHIFT
#define HUGEPD_PUD_SHIFT PUD_SHIFT
#else
@@ -136,7 +139,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
#define HUGEPD_PUD_SHIFT PMD_SHIFT
#endif
-#ifdef CONFIG_PPC_BOOK3S_64
/*
* At this point we do the placement change only for BOOK3S 64. This would
* possibly work on other subarchs.
@@ -153,6 +155,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
addr &= ~(sz-1);
pg = pgd_offset(mm, addr);
+#ifdef CONFIG_PPC_BOOK3S_64
if (pshift == PGDIR_SHIFT)
/* 16GB huge page */
return (pte_t *) pg;
@@ -178,32 +181,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
hpdp = (hugepd_t *)pm;
}
}
- if (!hpdp)
- return NULL;
-
- BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
-
- if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
- return NULL;
-
- return hugepte_offset(*hpdp, addr, pdshift);
-}
-
#else
-
-pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
-{
- pgd_t *pg;
- pud_t *pu;
- pmd_t *pm;
- hugepd_t *hpdp = NULL;
- unsigned pshift = __ffs(sz);
- unsigned pdshift = PGDIR_SHIFT;
-
- addr &= ~(sz-1);
-
- pg = pgd_offset(mm, addr);
-
if (pshift >= HUGEPD_PGD_SHIFT) {
hpdp = (hugepd_t *)pg;
} else {
@@ -217,7 +195,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
hpdp = (hugepd_t *)pm;
}
}
-
+#endif
if (!hpdp)
return NULL;
@@ -228,9 +206,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
return hugepte_offset(*hpdp, addr, pdshift);
}
-#endif
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
/* Build list of addresses of gigantic pages. This function is used in early
* boot before the buddy allocator is setup.
*/
@@ -310,7 +287,11 @@ static int __init do_gpage_early_setup(char *param, char *val,
npages = 0;
if (npages > MAX_NUMBER_GPAGES) {
pr_warn("MMU: %lu pages requested for page "
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
"size %llu KB, limiting to "
+#else
+ "size %u KB, limiting to "
+#endif
__stringify(MAX_NUMBER_GPAGES) "\n",
npages, size / 1024);
npages = MAX_NUMBER_GPAGES;
@@ -392,7 +373,7 @@ int alloc_bootmem_huge_page(struct hstate *hstate)
}
#endif
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
#define HUGEPD_FREELIST_SIZE \
((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t))
@@ -442,6 +423,8 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
}
put_cpu_var(hugepd_freelist_cur);
}
+#else
+static inline void hugepd_free(struct mmu_gather *tlb, void *hugepte) {}
#endif
static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift,
@@ -453,13 +436,11 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
unsigned long pdmask = ~((1UL << pdshift) - 1);
unsigned int num_hugepd = 1;
+ unsigned int shift = hugepd_shift(*hpdp);
-#ifdef CONFIG_PPC_FSL_BOOK3E
/* Note: On fsl the hpdp may be the first of several */
- num_hugepd = (1 << (hugepd_shift(*hpdp) - pdshift));
-#else
- unsigned int shift = hugepd_shift(*hpdp);
-#endif
+ if (shift > pdshift)
+ num_hugepd = 1 << (shift - pdshift);
start &= pdmask;
if (start < floor)
@@ -475,11 +456,10 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
for (i = 0; i < num_hugepd; i++, hpdp++)
hpdp->pd = 0;
-#ifdef CONFIG_PPC_FSL_BOOK3E
- hugepd_free(tlb, hugepte);
-#else
- pgtable_free_tlb(tlb, hugepte, pdshift - shift);
-#endif
+ if (shift >= pdshift)
+ hugepd_free(tlb, hugepte);
+ else
+ pgtable_free_tlb(tlb, hugepte, pdshift - shift);
}
static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
@@ -492,6 +472,8 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
start = addr;
do {
+ unsigned long more;
+
pmd = pmd_offset(pud, addr);
next = pmd_addr_end(addr, end);
if (!is_hugepd(__hugepd(pmd_val(*pmd)))) {
@@ -502,15 +484,16 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
WARN_ON(!pmd_none_or_clear_bad(pmd));
continue;
}
-#ifdef CONFIG_PPC_FSL_BOOK3E
/*
* Increment next by the size of the huge mapping since
* there may be more than one entry at this level for a
* single hugepage, but all of them point to
* the same kmem cache that holds the hugepte.
*/
- next = addr + (1 << hugepd_shift(*(hugepd_t *)pmd));
-#endif
+ more = addr + (1 << hugepd_shift(*(hugepd_t *)pmd));
+ if (more > next)
+ next = more;
+
free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT,
addr, next, floor, ceiling);
} while (addr = next, addr != end);
@@ -550,15 +533,17 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
ceiling);
} else {
-#ifdef CONFIG_PPC_FSL_BOOK3E
+ unsigned long more;
/*
* Increment next by the size of the huge mapping since
* there may be more than one entry at this level for a
* single hugepage, but all of them point to
* the same kmem cache that holds the hugepte.
*/
- next = addr + (1 << hugepd_shift(*(hugepd_t *)pud));
-#endif
+ more = addr + (1 << hugepd_shift(*(hugepd_t *)pud));
+ if (more > next)
+ next = more;
+
free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT,
addr, next, floor, ceiling);
}
@@ -615,15 +600,17 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
continue;
hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
} else {
-#ifdef CONFIG_PPC_FSL_BOOK3E
+ unsigned long more;
/*
* Increment next by the size of the huge mapping since
* there may be more than one entry at the pgd level
* for a single hugepage, but all of them point to the
* same kmem cache that holds the hugepte.
*/
- next = addr + (1 << hugepd_shift(*(hugepd_t *)pgd));
-#endif
+ more = addr + (1 << hugepd_shift(*(hugepd_t *)pgd));
+ if (more > next)
+ next = more;
+
free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT,
addr, next, floor, ceiling);
}
@@ -753,12 +740,13 @@ static int __init add_huge_page_size(unsigned long long size)
/* Check that it is a page size supported by the hardware and
* that it fits within pagetable and slice limits. */
-#ifdef CONFIG_PPC_FSL_BOOK3E
- if ((size < PAGE_SIZE) || !is_power_of_4(size))
+ if (size <= PAGE_SIZE)
return -EINVAL;
-#else
- if (!is_power_of_2(size)
- || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT))
+#if defined(CONFIG_PPC_FSL_BOOK3E)
+ if (!is_power_of_4(size))
+ return -EINVAL;
+#elif !defined(CONFIG_PPC_8xx)
+ if (!is_power_of_2(size) || (shift > SLICE_HIGH_SHIFT))
return -EINVAL;
#endif
@@ -791,53 +779,15 @@ static int __init hugepage_setup_sz(char *str)
}
__setup("hugepagesz=", hugepage_setup_sz);
-#ifdef CONFIG_PPC_FSL_BOOK3E
struct kmem_cache *hugepte_cache;
static int __init hugetlbpage_init(void)
{
int psize;
- for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
- unsigned shift;
-
- if (!mmu_psize_defs[psize].shift)
- continue;
-
- shift = mmu_psize_to_shift(psize);
-
- /* Don't treat normal page sizes as huge... */
- if (shift != PAGE_SHIFT)
- if (add_huge_page_size(1ULL << shift) < 0)
- continue;
- }
-
- /*
- * Create a kmem cache for hugeptes. The bottom bits in the pte have
- * size information encoded in them, so align them to allow this
- */
- hugepte_cache = kmem_cache_create("hugepte-cache", sizeof(pte_t),
- HUGEPD_SHIFT_MASK + 1, 0, NULL);
- if (hugepte_cache == NULL)
- panic("%s: Unable to create kmem cache for hugeptes\n",
- __func__);
-
- /* Default hpage size = 4M */
- if (mmu_psize_defs[MMU_PAGE_4M].shift)
- HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift;
- else
- panic("%s: Unable to set default huge page size\n", __func__);
-
-
- return 0;
-}
-#else
-static int __init hugetlbpage_init(void)
-{
- int psize;
-
+#if !defined(CONFIG_PPC_FSL_BOOK3E) && !defined(CONFIG_PPC_8xx)
if (!radix_enabled() && !mmu_has_feature(MMU_FTR_16M_PAGE))
return -ENODEV;
-
+#endif
for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
unsigned shift;
unsigned pdshift;
@@ -850,9 +800,9 @@ static int __init hugetlbpage_init(void)
if (add_huge_page_size(1ULL << shift) < 0)
continue;
- if (shift < PMD_SHIFT)
+ if (shift < HUGEPD_PUD_SHIFT)
pdshift = PMD_SHIFT;
- else if (shift < PUD_SHIFT)
+ else if (shift < HUGEPD_PGD_SHIFT)
pdshift = PUD_SHIFT;
else
pdshift = PGDIR_SHIFT;
@@ -860,14 +810,38 @@ static int __init hugetlbpage_init(void)
* if we have pdshift and shift value same, we don't
* use pgt cache for hugepd.
*/
- if (pdshift != shift) {
+ if (pdshift > shift) {
pgtable_cache_add(pdshift - shift, NULL);
if (!PGT_CACHE(pdshift - shift))
panic("hugetlbpage_init(): could not create "
"pgtable cache for %d bit pagesize\n", shift);
}
+#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
+ else if (!hugepte_cache) {
+ /*
+ * Create a kmem cache for hugeptes. The bottom bits in
+ * the pte have size information encoded in them, so
+ * align them to allow this
+ */
+ hugepte_cache = kmem_cache_create("hugepte-cache",
+ sizeof(pte_t),
+ HUGEPD_SHIFT_MASK + 1,
+ 0, NULL);
+ if (hugepte_cache == NULL)
+ panic("%s: Unable to create kmem cache "
+ "for hugeptes\n", __func__);
+
+ }
+#endif
}
+#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
+ /* Default hpage size = 4M on FSL_BOOK3E and 512k on 8xx */
+ if (mmu_psize_defs[MMU_PAGE_4M].shift)
+ HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift;
+ else if (mmu_psize_defs[MMU_PAGE_512K].shift)
+ HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_512K].shift;
+#else
/* Set default large page size. Currently, we pick 16M or 1M
* depending on what is available
*/
@@ -877,11 +851,13 @@ static int __init hugetlbpage_init(void)
HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift;
else if (mmu_psize_defs[MMU_PAGE_2M].shift)
HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_2M].shift;
-
+#endif
+ else
+ panic("%s: Unable to set default huge page size\n", __func__);
return 0;
}
-#endif
+
arch_initcall(hugetlbpage_init);
void flush_dcache_icache_hugepage(struct page *page)