diff options
author | Hugh Dickins <hughd@google.com> | 2023-06-08 18:34:03 -0700 |
---|---|---|
committer | Andrew Morton <akpm@linux-foundation.org> | 2023-06-19 16:19:16 -0700 |
commit | f3cd4ab0aabf0c7f25ad438b37954db970174731 (patch) | |
tree | 434e218bfa58c4765a3f96c8644ba9a9085ca4c3 /mm/madvise.c | |
parent | a5be621ee2925b6ee2db455c45c2af2d8a195b0c (diff) |
mm/madvise: clean up pte_offset_map_lock() scans
Came here to make madvise's several pte_offset_map_lock() scans advance to
next extent on failure, and remove superfluous pmd_trans_unstable() and
pmd_none_or_trans_huge_or_clear_bad() calls. But also did some nearby
cleanup.
swapin_walk_pmd_entry(): don't name an address "index"; don't drop the
lock after every pte, only when calling out to read_swap_cache_async().
madvise_cold_or_pageout_pte_range() and madvise_free_pte_range(): prefer
"start_pte" for pointer, orig_pte usually denotes a saved pte value; leave
lazy MMU mode before unlocking; merge the success and failure paths after
split_folio().
Link: https://lkml.kernel.org/r/cc4d9a88-9da6-362-50d9-6735c2b125c6@google.com
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Song Liu <song@kernel.org>
Cc: Steven Price <steven.price@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Will Deacon <will@kernel.org>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Zack Rusin <zackr@vmware.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm/madvise.c')
-rw-r--r-- | mm/madvise.c | 122 |
1 files changed, 68 insertions, 54 deletions
diff --git a/mm/madvise.c b/mm/madvise.c index b5ffbaf616f5..0af64c4a8f82 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -188,37 +188,43 @@ success: #ifdef CONFIG_SWAP static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start, - unsigned long end, struct mm_walk *walk) + unsigned long end, struct mm_walk *walk) { struct vm_area_struct *vma = walk->private; - unsigned long index; struct swap_iocb *splug = NULL; + pte_t *ptep = NULL; + spinlock_t *ptl; + unsigned long addr; - if (pmd_none_or_trans_huge_or_clear_bad(pmd)) - return 0; - - for (index = start; index != end; index += PAGE_SIZE) { + for (addr = start; addr < end; addr += PAGE_SIZE) { pte_t pte; swp_entry_t entry; struct page *page; - spinlock_t *ptl; - pte_t *ptep; - ptep = pte_offset_map_lock(vma->vm_mm, pmd, index, &ptl); - pte = *ptep; - pte_unmap_unlock(ptep, ptl); + if (!ptep++) { + ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); + if (!ptep) + break; + } + pte = *ptep; if (!is_swap_pte(pte)) continue; entry = pte_to_swp_entry(pte); if (unlikely(non_swap_entry(entry))) continue; + pte_unmap_unlock(ptep, ptl); + ptep = NULL; + page = read_swap_cache_async(entry, GFP_HIGHUSER_MOVABLE, - vma, index, false, &splug); + vma, addr, false, &splug); if (page) put_page(page); } + + if (ptep) + pte_unmap_unlock(ptep, ptl); swap_read_unplug(splug); cond_resched(); @@ -340,7 +346,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, bool pageout = private->pageout; struct mm_struct *mm = tlb->mm; struct vm_area_struct *vma = walk->vma; - pte_t *orig_pte, *pte, ptent; + pte_t *start_pte, *pte, ptent; spinlock_t *ptl; struct folio *folio = NULL; LIST_HEAD(folio_list); @@ -422,11 +428,11 @@ huge_unlock: } regular_folio: - if (pmd_trans_unstable(pmd)) - return 0; #endif tlb_change_page_size(tlb, PAGE_SIZE); - orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); + start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); + if (!start_pte) + return 0; flush_tlb_batched_pending(mm); arch_enter_lazy_mmu_mode(); for (; addr < end; pte++, addr += PAGE_SIZE) { @@ -447,25 +453,28 @@ regular_folio: * are sure it's worth. Split it if we are only owner. */ if (folio_test_large(folio)) { + int err; + if (folio_mapcount(folio) != 1) break; if (pageout_anon_only_filter && !folio_test_anon(folio)) break; - folio_get(folio); - if (!folio_trylock(folio)) { - folio_put(folio); - break; - } - pte_unmap_unlock(orig_pte, ptl); - if (split_folio(folio)) { - folio_unlock(folio); - folio_put(folio); - orig_pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + if (!folio_trylock(folio)) break; - } + folio_get(folio); + arch_leave_lazy_mmu_mode(); + pte_unmap_unlock(start_pte, ptl); + start_pte = NULL; + err = split_folio(folio); folio_unlock(folio); folio_put(folio); - orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + if (err) + break; + start_pte = pte = + pte_offset_map_lock(mm, pmd, addr, &ptl); + if (!start_pte) + break; + arch_enter_lazy_mmu_mode(); pte--; addr -= PAGE_SIZE; continue; @@ -510,8 +519,10 @@ regular_folio: folio_deactivate(folio); } - arch_leave_lazy_mmu_mode(); - pte_unmap_unlock(orig_pte, ptl); + if (start_pte) { + arch_leave_lazy_mmu_mode(); + pte_unmap_unlock(start_pte, ptl); + } if (pageout) reclaim_pages(&folio_list); cond_resched(); @@ -612,7 +623,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, struct mm_struct *mm = tlb->mm; struct vm_area_struct *vma = walk->vma; spinlock_t *ptl; - pte_t *orig_pte, *pte, ptent; + pte_t *start_pte, *pte, ptent; struct folio *folio; int nr_swap = 0; unsigned long next; @@ -620,13 +631,12 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, next = pmd_addr_end(addr, end); if (pmd_trans_huge(*pmd)) if (madvise_free_huge_pmd(tlb, vma, pmd, addr, next)) - goto next; - - if (pmd_trans_unstable(pmd)) - return 0; + return 0; tlb_change_page_size(tlb, PAGE_SIZE); - orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + start_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + if (!start_pte) + return 0; flush_tlb_batched_pending(mm); arch_enter_lazy_mmu_mode(); for (; addr != end; pte++, addr += PAGE_SIZE) { @@ -664,23 +674,26 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, * deactivate all pages. */ if (folio_test_large(folio)) { + int err; + if (folio_mapcount(folio) != 1) - goto out; + break; + if (!folio_trylock(folio)) + break; folio_get(folio); - if (!folio_trylock(folio)) { - folio_put(folio); - goto out; - } - pte_unmap_unlock(orig_pte, ptl); - if (split_folio(folio)) { - folio_unlock(folio); - folio_put(folio); - orig_pte = pte_offset_map_lock(mm, pmd, addr, &ptl); - goto out; - } + arch_leave_lazy_mmu_mode(); + pte_unmap_unlock(start_pte, ptl); + start_pte = NULL; + err = split_folio(folio); folio_unlock(folio); folio_put(folio); - orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + if (err) + break; + start_pte = pte = + pte_offset_map_lock(mm, pmd, addr, &ptl); + if (!start_pte) + break; + arch_enter_lazy_mmu_mode(); pte--; addr -= PAGE_SIZE; continue; @@ -725,17 +738,18 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, } folio_mark_lazyfree(folio); } -out: + if (nr_swap) { if (current->mm == mm) sync_mm_rss(mm); - add_mm_counter(mm, MM_SWAPENTS, nr_swap); } - arch_leave_lazy_mmu_mode(); - pte_unmap_unlock(orig_pte, ptl); + if (start_pte) { + arch_leave_lazy_mmu_mode(); + pte_unmap_unlock(start_pte, ptl); + } cond_resched(); -next: + return 0; } |