diff options
Diffstat (limited to 'mm/migrate.c')
-rw-r--r-- | mm/migrate.c | 152 |
1 files changed, 89 insertions, 63 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index b1092876e537..7160c1556f79 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -193,7 +193,7 @@ void putback_movable_pages(struct list_head *l) put_page(page); } else { mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + - page_is_file_cache(page), -hpage_nr_pages(page)); + page_is_file_lru(page), -hpage_nr_pages(page)); putback_lru_page(page); } } @@ -243,11 +243,15 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma, entry = pte_to_swp_entry(*pvmw.pte); if (is_write_migration_entry(entry)) pte = maybe_mkwrite(pte, vma); + else if (pte_swp_uffd_wp(*pvmw.pte)) + pte = pte_mkuffd_wp(pte); if (unlikely(is_zone_device_page(new))) { if (is_device_private_page(new)) { entry = make_device_private_entry(new, pte_write(pte)); pte = swp_entry_to_pte(entry); + if (pte_swp_uffd_wp(*pvmw.pte)) + pte = pte_mkuffd_wp(pte); } } @@ -647,6 +651,14 @@ void migrate_page_states(struct page *newpage, struct page *page) if (PageWriteback(newpage)) end_page_writeback(newpage); + /* + * PG_readahead shares the same bit with PG_reclaim. The above + * end_page_writeback() may clear PG_readahead mistakenly, so set the + * bit after that. + */ + if (PageReadahead(page)) + SetPageReadahead(newpage); + copy_page_owner(page, newpage); mem_cgroup_migrate(page, newpage); @@ -1211,7 +1223,7 @@ out: */ if (likely(!__PageMovable(page))) mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + - page_is_file_cache(page), -hpage_nr_pages(page)); + page_is_file_lru(page), -hpage_nr_pages(page)); } /* @@ -1282,6 +1294,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, int page_was_mapped = 0; struct page *new_hpage; struct anon_vma *anon_vma = NULL; + struct address_space *mapping = NULL; /* * Migratability of hugepages depends on architectures and their size. @@ -1329,18 +1342,36 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, goto put_anon; if (page_mapped(hpage)) { + /* + * try_to_unmap could potentially call huge_pmd_unshare. + * Because of this, take semaphore in write mode here and + * set TTU_RMAP_LOCKED to let lower levels know we have + * taken the lock. + */ + mapping = hugetlb_page_mapping_lock_write(hpage); + if (unlikely(!mapping)) + goto unlock_put_anon; + try_to_unmap(hpage, - TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS); + TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS| + TTU_RMAP_LOCKED); page_was_mapped = 1; + /* + * Leave mapping locked until after subsequent call to + * remove_migration_ptes() + */ } if (!page_mapped(hpage)) rc = move_to_new_page(new_hpage, hpage, mode); - if (page_was_mapped) + if (page_was_mapped) { remove_migration_ptes(hpage, - rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage, false); + rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage, true); + i_mmap_unlock_write(mapping); + } +unlock_put_anon: unlock_page(new_hpage); put_anon: @@ -1499,9 +1530,6 @@ static int do_move_pages_to_node(struct mm_struct *mm, { int err; - if (list_empty(pagelist)) - return 0; - err = migrate_pages(pagelist, alloc_new_node_page, NULL, node, MIGRATE_SYNC, MR_SYSCALL); if (err) @@ -1568,7 +1596,7 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr, err = 1; list_add_tail(&head->lru, pagelist); mod_node_page_state(page_pgdat(head), - NR_ISOLATED_ANON + page_is_file_cache(head), + NR_ISOLATED_ANON + page_is_file_lru(head), hpage_nr_pages(head)); } out_putpage: @@ -1583,6 +1611,32 @@ out: return err; } +static int move_pages_and_store_status(struct mm_struct *mm, int node, + struct list_head *pagelist, int __user *status, + int start, int i, unsigned long nr_pages) +{ + int err; + + if (list_empty(pagelist)) + return 0; + + err = do_move_pages_to_node(mm, pagelist, node); + if (err) { + /* + * Positive err means the number of failed + * pages to migrate. Since we are going to + * abort and return the number of non-migrated + * pages, so need to incude the rest of the + * nr_pages that have not been attempted as + * well. + */ + if (err > 0) + err += nr_pages - i - 1; + return err; + } + return store_status(status, start, node, i - start); +} + /* * Migrate an array of page address onto an array of nodes and fill * the corresponding array of status. @@ -1626,21 +1680,8 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes, current_node = node; start = i; } else if (node != current_node) { - err = do_move_pages_to_node(mm, &pagelist, current_node); - if (err) { - /* - * Positive err means the number of failed - * pages to migrate. Since we are going to - * abort and return the number of non-migrated - * pages, so need to incude the rest of the - * nr_pages that have not been attempted as - * well. - */ - if (err > 0) - err += nr_pages - i - 1; - goto out; - } - err = store_status(status, start, current_node, i - start); + err = move_pages_and_store_status(mm, current_node, + &pagelist, status, start, i, nr_pages); if (err) goto out; start = i; @@ -1654,49 +1695,29 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes, err = add_page_for_migration(mm, addr, current_node, &pagelist, flags & MPOL_MF_MOVE_ALL); - if (!err) { - /* The page is already on the target node */ - err = store_status(status, i, current_node, 1); - if (err) - goto out_flush; - continue; - } else if (err > 0) { + if (err > 0) { /* The page is successfully queued for migration */ continue; } - err = store_status(status, i, err, 1); + /* + * If the page is already on the target node (!err), store the + * node, otherwise, store the err. + */ + err = store_status(status, i, err ? : current_node, 1); if (err) goto out_flush; - err = do_move_pages_to_node(mm, &pagelist, current_node); - if (err) { - if (err > 0) - err += nr_pages - i - 1; + err = move_pages_and_store_status(mm, current_node, &pagelist, + status, start, i, nr_pages); + if (err) goto out; - } - if (i > start) { - err = store_status(status, start, current_node, i - start); - if (err) - goto out; - } current_node = NUMA_NO_NODE; } out_flush: - if (list_empty(&pagelist)) - return err; - /* Make sure we do not overwrite the existing error */ - err1 = do_move_pages_to_node(mm, &pagelist, current_node); - /* - * Don't have to report non-attempted pages here since: - * - If the above loop is done gracefully all pages have been - * attempted. - * - If the above loop is aborted it means a fatal error - * happened, should return ret. - */ - if (!err1) - err1 = store_status(status, start, current_node, i - start); + err1 = move_pages_and_store_status(mm, current_node, &pagelist, + status, start, i, nr_pages); if (err >= 0) err = err1; out: @@ -1938,7 +1959,7 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) return 0; } - page_lru = page_is_file_cache(page); + page_lru = page_is_file_lru(page); mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_lru, hpage_nr_pages(page)); @@ -1974,7 +1995,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, * Don't migrate file pages that are mapped in multiple processes * with execute permissions as they are probably shared libraries. */ - if (page_mapcount(page) != 1 && page_is_file_cache(page) && + if (page_mapcount(page) != 1 && page_is_file_lru(page) && (vma->vm_flags & VM_EXEC)) goto out; @@ -1982,7 +2003,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, * Also do not migrate dirty pages as not all filesystems can move * dirty pages in MIGRATE_ASYNC mode which is a waste of cycles. */ - if (page_is_file_cache(page) && PageDirty(page)) + if (page_is_file_lru(page) && PageDirty(page)) goto out; isolated = numamigrate_isolate_page(pgdat, page); @@ -1997,7 +2018,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, if (!list_empty(&migratepages)) { list_del(&page->lru); dec_node_page_state(page, NR_ISOLATED_ANON + - page_is_file_cache(page)); + page_is_file_lru(page)); putback_lru_page(page); } isolated = 0; @@ -2027,7 +2048,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, pg_data_t *pgdat = NODE_DATA(node); int isolated = 0; struct page *new_page = NULL; - int page_lru = page_is_file_cache(page); + int page_lru = page_is_file_lru(page); unsigned long start = address & HPAGE_PMD_MASK; new_page = alloc_pages_node(node, @@ -2241,7 +2262,7 @@ again: arch_enter_lazy_mmu_mode(); for (; addr < end; addr += PAGE_SIZE, ptep++) { - unsigned long mpfn, pfn; + unsigned long mpfn = 0, pfn; struct page *page; swp_entry_t entry; pte_t pte; @@ -2255,8 +2276,6 @@ again: } if (!pte_present(pte)) { - mpfn = 0; - /* * Only care about unaddressable device page special * page table entry. Other special swap entries are not @@ -2267,11 +2286,16 @@ again: goto next; page = device_private_entry_to_page(entry); + if (page->pgmap->owner != migrate->src_owner) + goto next; + mpfn = migrate_pfn(page_to_pfn(page)) | MIGRATE_PFN_MIGRATE; if (is_write_device_private_entry(entry)) mpfn |= MIGRATE_PFN_WRITE; } else { + if (migrate->src_owner) + goto next; pfn = pte_pfn(pte); if (is_zero_pfn(pfn)) { mpfn = MIGRATE_PFN_MIGRATE; @@ -2318,6 +2342,8 @@ again: swp_pte = swp_entry_to_pte(entry); if (pte_soft_dirty(pte)) swp_pte = pte_swp_mksoft_dirty(swp_pte); + if (pte_uffd_wp(pte)) + swp_pte = pte_swp_mkuffd_wp(swp_pte); set_pte_at(mm, addr, ptep, swp_pte); /* |