diff options
author | Miaohe Lin <linmiaohe@huawei.com> | 2022-06-25 17:28:11 +0800 |
---|---|---|
committer | akpm <akpm@linux-foundation.org> | 2022-07-03 18:08:51 -0700 |
commit | 4d928e20fd5b9fd97e1e631500386ef12a2858d7 (patch) | |
tree | 9d01290c76ccf5a0a2742225fcbe370f597f5317 /mm/khugepaged.c | |
parent | dd5ff79d4ab85ac0cdb5f87e8fee4c4725255c4b (diff) |
mm/khugepaged: stop swapping in page when VM_FAULT_RETRY occurs
When do_swap_page returns VM_FAULT_RETRY, we do not retry here and thus
swap entry will remain in pagetable. This will result in later failure.
So stop swapping in pages in this case to save cpu cycles. As A further
optimization, mmap_lock is released when __collapse_huge_page_swapin()
fails to avoid relocking mmap_lock. And "swapped_in++" is moved after
error handling to make it more accurate.
Link: https://lkml.kernel.org/r/20220625092816.4856-3-linmiaohe@huawei.com
Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: NeilBrown <neilb@suse.de>
Cc: Peter Xu <peterx@redhat.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zach O'Keefe <zokeefe@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm/khugepaged.c')
-rw-r--r-- | mm/khugepaged.c | 32 |
1 files changed, 14 insertions, 18 deletions
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 34e6b4604aa1..16e98395f362 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -972,8 +972,8 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address, * Bring missing pages in from swap, to complete THP collapse. * Only done if khugepaged_scan_pmd believes it is worthwhile. * - * Called and returns without pte mapped or spinlocks held, - * but with mmap_lock held to protect against vma changes. + * Called and returns without pte mapped or spinlocks held. + * Note that if false is returned, mmap_lock will be released. */ static bool __collapse_huge_page_swapin(struct mm_struct *mm, @@ -1000,27 +1000,24 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm, pte_unmap(vmf.pte); continue; } - swapped_in++; ret = do_swap_page(&vmf); - /* do_swap_page returns VM_FAULT_RETRY with released mmap_lock */ + /* + * do_swap_page returns VM_FAULT_RETRY with released mmap_lock. + * Note we treat VM_FAULT_RETRY as VM_FAULT_ERROR here because + * we do not retry here and swap entry will remain in pagetable + * resulting in later failure. + */ if (ret & VM_FAULT_RETRY) { - mmap_read_lock(mm); - if (hugepage_vma_revalidate(mm, haddr, &vma)) { - /* vma is no longer available, don't continue to swapin */ - trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); - return false; - } - /* check if the pmd is still valid */ - if (mm_find_pmd(mm, haddr) != pmd) { - trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); - return false; - } + trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); + return false; } if (ret & VM_FAULT_ERROR) { + mmap_read_unlock(mm); trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); return false; } + swapped_in++; } /* Drain LRU add pagevec to remove extra pin on the swapped in pages */ @@ -1086,13 +1083,12 @@ static void collapse_huge_page(struct mm_struct *mm, } /* - * __collapse_huge_page_swapin always returns with mmap_lock locked. - * If it fails, we release mmap_lock and jump out_nolock. + * __collapse_huge_page_swapin will return with mmap_lock released + * when it fails. So we jump out_nolock directly in that case. * Continuing to collapse causes inconsistency. */ if (unmapped && !__collapse_huge_page_swapin(mm, vma, address, pmd, referenced)) { - mmap_read_unlock(mm); goto out_nolock; } |