summaryrefslogtreecommitdiff
path: root/mm/rmap.c
diff options
context:
space:
mode:
authorHugh Dickins <hughd@google.com>2022-02-14 18:20:24 -0800
committerMatthew Wilcox (Oracle) <willy@infradead.org>2022-02-17 11:56:13 -0500
commitebcbc6ea7d8a604ad8504dae70a6ac1b1e64a0b7 (patch)
tree014fb16d25e6e420bbaef89f800f58fd24983747 /mm/rmap.c
parentf71077a4d84bbe8c7b91b7db7c4ef815755ac5e3 (diff)
mm/munlock: delete page_mlock() and all its works
We have recommended some applications to mlock their userspace, but that turns out to be counter-productive: when many processes mlock the same file, contention on rmap's i_mmap_rwsem can become intolerable at exit: it is needed for write, to remove any vma mapping that file from rmap's tree; but hogged for read by those with mlocks calling page_mlock() (formerly known as try_to_munlock()) on *each* page mapped from the file (the purpose being to find out whether another process has the page mlocked, so therefore it should not be unmlocked yet). Several optimizations have been made in the past: one is to skip page_mlock() when mapcount tells that nothing else has this page mapped; but that doesn't help at all when others do have it mapped. This time around, I initially intended to add a preliminary search of the rmap tree for overlapping VM_LOCKED ranges; but that gets messy with locking order, when in doubt whether a page is actually present; and risks adding even more contention on the i_mmap_rwsem. A solution would be much easier, if only there were space in struct page for an mlock_count... but actually, most of the time, there is space for it - an mlocked page spends most of its life on an unevictable LRU, but since 3.18 removed the scan_unevictable_pages sysctl, that "LRU" has been redundant. Let's try to reuse its page->lru. But leave that until a later patch: in this patch, clear the ground by removing page_mlock(), and all the infrastructure that has gathered around it - which mostly hinders understanding, and will make reviewing new additions harder. Don't mind those old comments about THPs, they date from before 4.5's refcounting rework: splitting is not a risk here. Just keep a minimal version of munlock_vma_page(), as reminder of what it should attend to (in particular, the odd way PGSTRANDED is counted out of PGMUNLOCKED), and likewise a stub for munlock_vma_pages_range(). Move unchanged __mlock_posix_error_return() out of the way, down to above its caller: this series then makes no further change after mlock_fixup(). After this and each following commit, the kernel builds, boots and runs; but with deficiencies which may show up in testing of mlock and munlock. The system calls succeed or fail as before, and mlock remains effective in preventing page reclaim; but meminfo's Unevictable and Mlocked amounts may be shown too low after mlock, grow, then stay too high after munlock: with previously mlocked pages remaining unevictable for too long, until finally unmapped and freed and counts corrected. Normal service will be resumed in "mm/munlock: mlock_pte_range() when mlocking or munlocking". Signed-off-by: Hugh Dickins <hughd@google.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Diffstat (limited to 'mm/rmap.c')
-rw-r--r--mm/rmap.c80
1 files changed, 0 insertions, 80 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index 6a1e8c7f6213..7ce7f1946cff 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1996,76 +1996,6 @@ void try_to_migrate(struct page *page, enum ttu_flags flags)
rmap_walk(page, &rwc);
}
-/*
- * Walks the vma's mapping a page and mlocks the page if any locked vma's are
- * found. Once one is found the page is locked and the scan can be terminated.
- */
-static bool page_mlock_one(struct page *page, struct vm_area_struct *vma,
- unsigned long address, void *unused)
-{
- struct page_vma_mapped_walk pvmw = {
- .page = page,
- .vma = vma,
- .address = address,
- };
-
- /* An un-locked vma doesn't have any pages to lock, continue the scan */
- if (!(vma->vm_flags & VM_LOCKED))
- return true;
-
- while (page_vma_mapped_walk(&pvmw)) {
- /*
- * Need to recheck under the ptl to serialise with
- * __munlock_pagevec_fill() after VM_LOCKED is cleared in
- * munlock_vma_pages_range().
- */
- if (vma->vm_flags & VM_LOCKED) {
- /*
- * PTE-mapped THP are never marked as mlocked; but
- * this function is never called on a DoubleMap THP,
- * nor on an Anon THP (which may still be PTE-mapped
- * after DoubleMap was cleared).
- */
- mlock_vma_page(page);
- /*
- * No need to scan further once the page is marked
- * as mlocked.
- */
- page_vma_mapped_walk_done(&pvmw);
- return false;
- }
- }
-
- return true;
-}
-
-/**
- * page_mlock - try to mlock a page
- * @page: the page to be mlocked
- *
- * Called from munlock code. Checks all of the VMAs mapping the page and mlocks
- * the page if any are found. The page will be returned with PG_mlocked cleared
- * if it is not mapped by any locked vmas.
- */
-void page_mlock(struct page *page)
-{
- struct rmap_walk_control rwc = {
- .rmap_one = page_mlock_one,
- .done = page_not_mapped,
- .anon_lock = page_lock_anon_vma_read,
-
- };
-
- VM_BUG_ON_PAGE(!PageLocked(page) || PageLRU(page), page);
- VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page);
-
- /* Anon THP are only marked as mlocked when singly mapped */
- if (PageTransCompound(page) && PageAnon(page))
- return;
-
- rmap_walk(page, &rwc);
-}
-
#ifdef CONFIG_DEVICE_PRIVATE
struct make_exclusive_args {
struct mm_struct *mm;
@@ -2291,11 +2221,6 @@ static struct anon_vma *rmap_walk_anon_lock(struct page *page,
*
* Find all the mappings of a page using the mapping pointer and the vma chains
* contained in the anon_vma struct it points to.
- *
- * When called from page_mlock(), the mmap_lock of the mm containing the vma
- * where the page was found will be held for write. So, we won't recheck
- * vm_flags for that VMA. That should be OK, because that vma shouldn't be
- * LOCKED.
*/
static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
bool locked)
@@ -2344,11 +2269,6 @@ static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
*
* Find all the mappings of a page using the mapping pointer and the vma chains
* contained in the address_space struct it points to.
- *
- * When called from page_mlock(), the mmap_lock of the mm containing the vma
- * where the page was found will be held for write. So, we won't recheck
- * vm_flags for that VMA. That should be OK, because that vma shouldn't be
- * LOCKED.
*/
static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,
bool locked)