diff options
author | Kirill A. Shutemov <kirill.shutemov@linux.intel.com> | 2016-07-26 15:25:29 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-26 16:19:19 -0700 |
commit | 1010245964415bb7403463115bab2cd26244b445 (patch) | |
tree | 0bb07c499e1816334dd405ea9f82381223c7479a /mm | |
parent | dd78fedde4b99b322f2dc849d467d365a82e23ca (diff) |
mm: introduce do_set_pmd()
With postponed page table allocation we have chance to setup huge pages.
do_set_pte() calls do_set_pmd() if following criteria met:
- page is compound;
- pmd entry in pmd_none();
- vma has suitable size and alignment;
Link: http://lkml.kernel.org/r/1466021202-61880-12-git-send-email-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/huge_memory.c | 5 | ||||
-rw-r--r-- | mm/memory.c | 72 | ||||
-rw-r--r-- | mm/migrate.c | 3 |
3 files changed, 72 insertions, 8 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 90f5dd22b1c8..5bc058ad12c2 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -796,11 +796,6 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) return pmd; } -static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot) -{ - return pmd_mkhuge(mk_pmd(page, prot)); -} - static inline struct list_head *page_deferred_list(struct page *page) { /* diff --git a/mm/memory.c b/mm/memory.c index 30cda24ff205..650622a3a0a1 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2920,6 +2920,66 @@ map_pte: return 0; } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + +#define HPAGE_CACHE_INDEX_MASK (HPAGE_PMD_NR - 1) +static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, + unsigned long haddr) +{ + if (((vma->vm_start >> PAGE_SHIFT) & HPAGE_CACHE_INDEX_MASK) != + (vma->vm_pgoff & HPAGE_CACHE_INDEX_MASK)) + return false; + if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end) + return false; + return true; +} + +static int do_set_pmd(struct fault_env *fe, struct page *page) +{ + struct vm_area_struct *vma = fe->vma; + bool write = fe->flags & FAULT_FLAG_WRITE; + unsigned long haddr = fe->address & HPAGE_PMD_MASK; + pmd_t entry; + int i, ret; + + if (!transhuge_vma_suitable(vma, haddr)) + return VM_FAULT_FALLBACK; + + ret = VM_FAULT_FALLBACK; + page = compound_head(page); + + fe->ptl = pmd_lock(vma->vm_mm, fe->pmd); + if (unlikely(!pmd_none(*fe->pmd))) + goto out; + + for (i = 0; i < HPAGE_PMD_NR; i++) + flush_icache_page(vma, page + i); + + entry = mk_huge_pmd(page, vma->vm_page_prot); + if (write) + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); + + add_mm_counter(vma->vm_mm, MM_FILEPAGES, HPAGE_PMD_NR); + page_add_file_rmap(page, true); + + set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry); + + update_mmu_cache_pmd(vma, haddr, fe->pmd); + + /* fault is handled */ + ret = 0; +out: + spin_unlock(fe->ptl); + return ret; +} +#else +static int do_set_pmd(struct fault_env *fe, struct page *page) +{ + BUILD_BUG(); + return 0; +} +#endif + /** * alloc_set_pte - setup new PTE entry for given page and add reverse page * mapping. If needed, the fucntion allocates page table or use pre-allocated. @@ -2939,9 +2999,19 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg, struct vm_area_struct *vma = fe->vma; bool write = fe->flags & FAULT_FLAG_WRITE; pte_t entry; + int ret; + + if (pmd_none(*fe->pmd) && PageTransCompound(page)) { + /* THP on COW? */ + VM_BUG_ON_PAGE(memcg, page); + + ret = do_set_pmd(fe, page); + if (ret != VM_FAULT_FALLBACK) + return ret; + } if (!fe->pte) { - int ret = pte_alloc_one_map(fe); + ret = pte_alloc_one_map(fe); if (ret) return ret; } diff --git a/mm/migrate.c b/mm/migrate.c index e85a72c0d6f0..2232f6923cc7 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1986,8 +1986,7 @@ fail_putback: } orig_entry = *pmd; - entry = mk_pmd(new_page, vma->vm_page_prot); - entry = pmd_mkhuge(entry); + entry = mk_huge_pmd(new_page, vma->vm_page_prot); entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); /* |