From c88033efe9a391e72ba6b5df4b01d6e628f4e734 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 22 Apr 2024 09:33:11 -0400 Subject: mm/userfaultfd: reset ptes when close() for wr-protected ones Userfaultfd unregister includes a step to remove wr-protect bits from all the relevant pgtable entries, but that only covered an explicit UFFDIO_UNREGISTER ioctl, not a close() on the userfaultfd itself. Cover that too. This fixes a WARN trace. The only user visible side effect is the user can observe leftover wr-protect bits even if the user close()ed on an userfaultfd when releasing the last reference of it. However hopefully that should be harmless, and nothing bad should happen even if so. This change is now more important after the recent page-table-check patch we merged in mm-unstable (446dd9ad37d0 ("mm/page_table_check: support userfault wr-protect entries")), as we'll do sanity check on uffd-wp bits without vma context. So it's better if we can 100% guarantee no uffd-wp bit leftovers, to make sure each report will be valid. Link: https://lore.kernel.org/all/000000000000ca4df20616a0fe16@google.com/ Fixes: f369b07c8614 ("mm/uffd: reset write protection when unregister with wp-mode") Analyzed-by: David Hildenbrand Link: https://lkml.kernel.org/r/20240422133311.2987675-1-peterx@redhat.com Reported-by: syzbot+d8426b591c36b21c750e@syzkaller.appspotmail.com Signed-off-by: Peter Xu Reviewed-by: David Hildenbrand Cc: Nadav Amit Cc: Signed-off-by: Andrew Morton --- fs/userfaultfd.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 60dcfafdc11a..292f5fd50104 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -895,6 +895,10 @@ static int userfaultfd_release(struct inode *inode, struct file *file) prev = vma; continue; } + /* Reset ptes for the whole vma range if wr-protected */ + if (userfaultfd_wp(vma)) + uffd_wp_range(vma, vma->vm_start, + vma->vm_end - vma->vm_start, false); new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS; vma = vma_modify_flags_uffd(&vmi, prev, vma, vma->vm_start, vma->vm_end, new_flags, -- cgit v1.2.3 From c70dce4982ce1718bf978a35f8e26160b82081f4 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Mon, 29 Apr 2024 12:40:17 +0100 Subject: fs/proc/task_mmu: fix loss of young/dirty bits during pagemap scan make_uffd_wp_pte() was previously doing: pte = ptep_get(ptep); ptep_modify_prot_start(ptep); pte = pte_mkuffd_wp(pte); ptep_modify_prot_commit(ptep, pte); But if another thread accessed or dirtied the pte between the first 2 calls, this could lead to loss of that information. Since ptep_modify_prot_start() gets and clears atomically, the following is the correct pattern and prevents any possible race. Any access after the first call would see an invalid pte and cause a fault: pte = ptep_modify_prot_start(ptep); pte = pte_mkuffd_wp(pte); ptep_modify_prot_commit(ptep, pte); Link: https://lkml.kernel.org/r/20240429114017.182570-1-ryan.roberts@arm.com Fixes: 52526ca7fdb9 ("fs/proc/task_mmu: implement IOCTL to get and optionally clear info about PTEs") Signed-off-by: Ryan Roberts Acked-by: David Hildenbrand Cc: Muhammad Usama Anjum Cc: Peter Xu Cc: Signed-off-by: Andrew Morton --- fs/proc/task_mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 23fbab954c20..af4bc1da0c01 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1825,7 +1825,7 @@ static void make_uffd_wp_pte(struct vm_area_struct *vma, pte_t old_pte; old_pte = ptep_modify_prot_start(vma, addr, pte); - ptent = pte_mkuffd_wp(ptent); + ptent = pte_mkuffd_wp(old_pte); ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent); } else if (is_swap_pte(ptent)) { ptent = pte_swp_mkuffd_wp(ptent); -- cgit v1.2.3 From 2c7ad9a590d1a99ec59c7d90cef41e2b296944c4 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Mon, 29 Apr 2024 12:41:04 +0100 Subject: fs/proc/task_mmu: fix uffd-wp confusion in pagemap_scan_pmd_entry() pagemap_scan_pmd_entry() checks if uffd-wp is set on each pte to avoid unnecessary if set. However it was previously checking with `pte_uffd_wp(ptep_get(pte))` without first confirming that the pte was present. It is only valid to call pte_uffd_wp() for present ptes. For swap ptes, pte_swp_uffd_wp() must be called because the uffd-wp bit may be kept in a different position, depending on the arch. This was leading to test failures in the pagemap_ioctl mm selftest, when bringing up uffd-wp support on arm64 due to incorrectly interpretting the uffd-wp status of migration entries. Let's fix this by using the correct check based on pte_present(). While we are at it, let's pass the pte to make_uffd_wp_pte() to avoid the pointless extra ptep_get() which can't be optimized out due to READ_ONCE() on many arches. Link: https://lkml.kernel.org/r/20240429114104.182890-1-ryan.roberts@arm.com Fixes: 12f6b01a0bcb ("fs/proc/task_mmu: add fast paths to get/clear PAGE_IS_WRITTEN flag") Closes: https://lore.kernel.org/linux-arm-kernel/ZiuyGXt0XWwRgFh9@x1n/ Signed-off-by: Ryan Roberts Acked-by: David Hildenbrand Reviewed-by: Muhammad Usama Anjum Tested-by: Muhammad Usama Anjum Cc: Peter Xu Cc: Signed-off-by: Andrew Morton --- fs/proc/task_mmu.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index af4bc1da0c01..102f48668c35 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1817,10 +1817,8 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p, } static void make_uffd_wp_pte(struct vm_area_struct *vma, - unsigned long addr, pte_t *pte) + unsigned long addr, pte_t *pte, pte_t ptent) { - pte_t ptent = ptep_get(pte); - if (pte_present(ptent)) { pte_t old_pte; @@ -2175,9 +2173,12 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start, if ((p->arg.flags & PM_SCAN_WP_MATCHING) && !p->vec_out) { /* Fast path for performing exclusive WP */ for (addr = start; addr != end; pte++, addr += PAGE_SIZE) { - if (pte_uffd_wp(ptep_get(pte))) + pte_t ptent = ptep_get(pte); + + if ((pte_present(ptent) && pte_uffd_wp(ptent)) || + pte_swp_uffd_wp_any(ptent)) continue; - make_uffd_wp_pte(vma, addr, pte); + make_uffd_wp_pte(vma, addr, pte, ptent); if (!flush_end) start = addr; flush_end = addr + PAGE_SIZE; @@ -2190,8 +2191,10 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start, p->arg.return_mask == PAGE_IS_WRITTEN) { for (addr = start; addr < end; pte++, addr += PAGE_SIZE) { unsigned long next = addr + PAGE_SIZE; + pte_t ptent = ptep_get(pte); - if (pte_uffd_wp(ptep_get(pte))) + if ((pte_present(ptent) && pte_uffd_wp(ptent)) || + pte_swp_uffd_wp_any(ptent)) continue; ret = pagemap_scan_output(p->cur_vma_category | PAGE_IS_WRITTEN, p, addr, &next); @@ -2199,7 +2202,7 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start, break; if (~p->arg.flags & PM_SCAN_WP_MATCHING) continue; - make_uffd_wp_pte(vma, addr, pte); + make_uffd_wp_pte(vma, addr, pte, ptent); if (!flush_end) start = addr; flush_end = next; @@ -2208,8 +2211,9 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start, } for (addr = start; addr != end; pte++, addr += PAGE_SIZE) { + pte_t ptent = ptep_get(pte); unsigned long categories = p->cur_vma_category | - pagemap_page_category(p, vma, addr, ptep_get(pte)); + pagemap_page_category(p, vma, addr, ptent); unsigned long next = addr + PAGE_SIZE; if (!pagemap_scan_is_interesting_page(categories, p)) @@ -2224,7 +2228,7 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start, if (~categories & PAGE_IS_WRITTEN) continue; - make_uffd_wp_pte(vma, addr, pte); + make_uffd_wp_pte(vma, addr, pte, ptent); if (!flush_end) start = addr; flush_end = next; -- cgit v1.2.3