summaryrefslogtreecommitdiff
path: root/mm/migrate.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/migrate.c')
-rw-r--r--mm/migrate.c270
1 files changed, 168 insertions, 102 deletions
diff --git a/mm/migrate.c b/mm/migrate.c
index 923ea80ba744..dfdb3a136bf8 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -20,7 +20,6 @@
#include <linux/pagemap.h>
#include <linux/buffer_head.h>
#include <linux/mm_inline.h>
-#include <linux/nsproxy.h>
#include <linux/ksm.h>
#include <linux/rmap.h>
#include <linux/topology.h>
@@ -35,21 +34,16 @@
#include <linux/syscalls.h>
#include <linux/compat.h>
#include <linux/hugetlb.h>
-#include <linux/hugetlb_cgroup.h>
#include <linux/gfp.h>
#include <linux/pfn_t.h>
-#include <linux/memremap.h>
-#include <linux/userfaultfd_k.h>
-#include <linux/balloon_compaction.h>
#include <linux/page_idle.h>
#include <linux/page_owner.h>
#include <linux/sched/mm.h>
#include <linux/ptrace.h>
-#include <linux/oom.h>
#include <linux/memory.h>
-#include <linux/random.h>
#include <linux/sched/sysctl.h>
#include <linux/memory-tiers.h>
+#include <linux/pagewalk.h>
#include <asm/tlbflush.h>
@@ -177,13 +171,83 @@ void putback_movable_pages(struct list_head *l)
}
}
+/* Must be called with an elevated refcount on the non-hugetlb folio */
+bool isolate_folio_to_list(struct folio *folio, struct list_head *list)
+{
+ bool isolated, lru;
+
+ if (folio_test_hugetlb(folio))
+ return isolate_hugetlb(folio, list);
+
+ lru = !__folio_test_movable(folio);
+ if (lru)
+ isolated = folio_isolate_lru(folio);
+ else
+ isolated = isolate_movable_page(&folio->page,
+ ISOLATE_UNEVICTABLE);
+
+ if (!isolated)
+ return false;
+
+ list_add(&folio->lru, list);
+ if (lru)
+ node_stat_add_folio(folio, NR_ISOLATED_ANON +
+ folio_is_file_lru(folio));
+
+ return true;
+}
+
+static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw,
+ struct folio *folio,
+ unsigned long idx)
+{
+ struct page *page = folio_page(folio, idx);
+ bool contains_data;
+ pte_t newpte;
+ void *addr;
+
+ VM_BUG_ON_PAGE(PageCompound(page), page);
+ VM_BUG_ON_PAGE(!PageAnon(page), page);
+ VM_BUG_ON_PAGE(!PageLocked(page), page);
+ VM_BUG_ON_PAGE(pte_present(*pvmw->pte), page);
+
+ if (folio_test_mlocked(folio) || (pvmw->vma->vm_flags & VM_LOCKED) ||
+ mm_forbids_zeropage(pvmw->vma->vm_mm))
+ return false;
+
+ /*
+ * The pmd entry mapping the old thp was flushed and the pte mapping
+ * this subpage has been non present. If the subpage is only zero-filled
+ * then map it to the shared zeropage.
+ */
+ addr = kmap_local_page(page);
+ contains_data = memchr_inv(addr, 0, PAGE_SIZE);
+ kunmap_local(addr);
+
+ if (contains_data)
+ return false;
+
+ newpte = pte_mkspecial(pfn_pte(my_zero_pfn(pvmw->address),
+ pvmw->vma->vm_page_prot));
+ set_pte_at(pvmw->vma->vm_mm, pvmw->address, pvmw->pte, newpte);
+
+ dec_mm_counter(pvmw->vma->vm_mm, mm_counter(folio));
+ return true;
+}
+
+struct rmap_walk_arg {
+ struct folio *folio;
+ bool map_unused_to_zeropage;
+};
+
/*
* Restore a potential migration pte to a working pte entry
*/
static bool remove_migration_pte(struct folio *folio,
- struct vm_area_struct *vma, unsigned long addr, void *old)
+ struct vm_area_struct *vma, unsigned long addr, void *arg)
{
- DEFINE_FOLIO_VMA_WALK(pvmw, old, vma, addr, PVMW_SYNC | PVMW_MIGRATION);
+ struct rmap_walk_arg *rmap_walk_arg = arg;
+ DEFINE_FOLIO_VMA_WALK(pvmw, rmap_walk_arg->folio, vma, addr, PVMW_SYNC | PVMW_MIGRATION);
while (page_vma_mapped_walk(&pvmw)) {
rmap_t rmap_flags = RMAP_NONE;
@@ -207,6 +271,9 @@ static bool remove_migration_pte(struct folio *folio,
continue;
}
#endif
+ if (rmap_walk_arg->map_unused_to_zeropage &&
+ try_to_map_unused_to_zeropage(&pvmw, folio, idx))
+ continue;
folio_get(folio);
pte = mk_pte(new, READ_ONCE(vma->vm_page_prot));
@@ -285,14 +352,21 @@ static bool remove_migration_pte(struct folio *folio,
* Get rid of all migration entries and replace them by
* references to the indicated page.
*/
-void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked)
+void remove_migration_ptes(struct folio *src, struct folio *dst, int flags)
{
+ struct rmap_walk_arg rmap_walk_arg = {
+ .folio = src,
+ .map_unused_to_zeropage = flags & RMP_USE_SHARED_ZEROPAGE,
+ };
+
struct rmap_walk_control rwc = {
.rmap_one = remove_migration_pte,
- .arg = src,
+ .arg = &rmap_walk_arg,
};
- if (locked)
+ VM_BUG_ON_FOLIO((flags & RMP_USE_SHARED_ZEROPAGE) && (src != dst), src);
+
+ if (flags & RMP_LOCKED)
rmap_walk_locked(dst, &rwc);
else
rmap_walk(dst, &rwc);
@@ -422,6 +496,8 @@ static int __folio_migrate_mapping(struct address_space *mapping,
/* No turning back from here */
newfolio->index = folio->index;
newfolio->mapping = folio->mapping;
+ if (folio_test_anon(folio) && folio_test_large(folio))
+ mod_mthp_stat(folio_order(folio), MTHP_STAT_NR_ANON, 1);
if (folio_test_swapbacked(folio))
__folio_set_swapbacked(newfolio);
@@ -446,6 +522,8 @@ static int __folio_migrate_mapping(struct address_space *mapping,
*/
newfolio->index = folio->index;
newfolio->mapping = folio->mapping;
+ if (folio_test_anon(folio) && folio_test_large(folio))
+ mod_mthp_stat(folio_order(folio), MTHP_STAT_NR_ANON, 1);
folio_ref_add(newfolio, nr); /* add cache reference */
if (folio_test_swapbacked(folio)) {
__folio_set_swapbacked(newfolio);
@@ -585,8 +663,6 @@ void folio_migrate_flags(struct folio *newfolio, struct folio *folio)
{
int cpupid;
- if (folio_test_error(folio))
- folio_set_error(newfolio);
if (folio_test_referenced(folio))
folio_set_referenced(newfolio);
if (folio_test_uptodate(folio))
@@ -640,7 +716,8 @@ void folio_migrate_flags(struct folio *newfolio, struct folio *folio)
folio_migrate_ksm(newfolio, folio);
/*
* Please do not reorder this without considering how mm/ksm.c's
- * ksm_get_folio() depends upon ksm_migrate_page() and PageSwapCache().
+ * ksm_get_folio() depends upon ksm_migrate_page() and the
+ * swapcache flag.
*/
if (folio_test_swapcache(folio))
folio_clear_swapcache(folio);
@@ -666,6 +743,7 @@ void folio_migrate_flags(struct folio *newfolio, struct folio *folio)
folio_set_readahead(newfolio);
folio_copy_owner(newfolio, folio);
+ pgalloc_tag_copy(newfolio, folio);
mem_cgroup_migrate(folio, newfolio);
}
@@ -904,7 +982,7 @@ static int writeout(struct address_space *mapping, struct folio *folio)
* At this point we know that the migration attempt cannot
* be successful.
*/
- remove_migration_ptes(folio, folio, false);
+ remove_migration_ptes(folio, folio, 0);
rc = mapping->a_ops->writepage(&folio->page, &wbc);
@@ -1068,7 +1146,7 @@ static void migrate_folio_undo_src(struct folio *src,
struct list_head *ret)
{
if (page_was_mapped)
- remove_migration_ptes(src, src, false);
+ remove_migration_ptes(src, src, 0);
/* Drop an anon_vma reference if we took one */
if (anon_vma)
put_anon_vma(anon_vma);
@@ -1306,7 +1384,7 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
lru_add_drain();
if (old_page_state & PAGE_WAS_MAPPED)
- remove_migration_ptes(src, dst, false);
+ remove_migration_ptes(src, dst, 0);
out_unlock_both:
folio_unlock(dst);
@@ -1444,7 +1522,7 @@ static int unmap_and_move_huge_page(new_folio_t get_new_folio,
if (page_was_mapped)
remove_migration_ptes(src,
- rc == MIGRATEPAGE_SUCCESS ? dst : src, false);
+ rc == MIGRATEPAGE_SUCCESS ? dst : src, 0);
unlock_put_anon:
folio_unlock(dst);
@@ -1682,7 +1760,8 @@ static int migrate_pages_batch(struct list_head *from,
* use _deferred_list.
*/
if (nr_pages > 2 &&
- !list_empty(&folio->_deferred_list)) {
+ !list_empty(&folio->_deferred_list) &&
+ folio_test_partially_mapped(folio)) {
if (!try_split_folio(folio, split_folios, mode)) {
nr_failed++;
stats->nr_thp_failed += is_thp;
@@ -2111,76 +2190,66 @@ static int do_move_pages_to_node(struct list_head *pagelist, int node)
return err;
}
+static int __add_folio_for_migration(struct folio *folio, int node,
+ struct list_head *pagelist, bool migrate_all)
+{
+ if (is_zero_folio(folio) || is_huge_zero_folio(folio))
+ return -EFAULT;
+
+ if (folio_is_zone_device(folio))
+ return -ENOENT;
+
+ if (folio_nid(folio) == node)
+ return 0;
+
+ if (folio_likely_mapped_shared(folio) && !migrate_all)
+ return -EACCES;
+
+ if (folio_test_hugetlb(folio)) {
+ if (isolate_hugetlb(folio, pagelist))
+ return 1;
+ } else if (folio_isolate_lru(folio)) {
+ list_add_tail(&folio->lru, pagelist);
+ node_stat_mod_folio(folio,
+ NR_ISOLATED_ANON + folio_is_file_lru(folio),
+ folio_nr_pages(folio));
+ return 1;
+ }
+ return -EBUSY;
+}
+
/*
- * Resolves the given address to a struct page, isolates it from the LRU and
+ * Resolves the given address to a struct folio, isolates it from the LRU and
* puts it to the given pagelist.
* Returns:
- * errno - if the page cannot be found/isolated
+ * errno - if the folio cannot be found/isolated
* 0 - when it doesn't have to be migrated because it is already on the
* target node
* 1 - when it has been queued
*/
-static int add_page_for_migration(struct mm_struct *mm, const void __user *p,
+static int add_folio_for_migration(struct mm_struct *mm, const void __user *p,
int node, struct list_head *pagelist, bool migrate_all)
{
struct vm_area_struct *vma;
- unsigned long addr;
- struct page *page;
+ struct folio_walk fw;
struct folio *folio;
- int err;
+ unsigned long addr;
+ int err = -EFAULT;
mmap_read_lock(mm);
addr = (unsigned long)untagged_addr_remote(mm, p);
- err = -EFAULT;
vma = vma_lookup(mm, addr);
- if (!vma || !vma_migratable(vma))
- goto out;
-
- /* FOLL_DUMP to ignore special (like zero) pages */
- page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP);
-
- err = PTR_ERR(page);
- if (IS_ERR(page))
- goto out;
-
- err = -ENOENT;
- if (!page)
- goto out;
-
- folio = page_folio(page);
- if (folio_is_zone_device(folio))
- goto out_putfolio;
-
- err = 0;
- if (folio_nid(folio) == node)
- goto out_putfolio;
-
- err = -EACCES;
- if (folio_likely_mapped_shared(folio) && !migrate_all)
- goto out_putfolio;
-
- err = -EBUSY;
- if (folio_test_hugetlb(folio)) {
- if (isolate_hugetlb(folio, pagelist))
- err = 1;
- } else {
- if (!folio_isolate_lru(folio))
- goto out_putfolio;
-
- err = 1;
- list_add_tail(&folio->lru, pagelist);
- node_stat_mod_folio(folio,
- NR_ISOLATED_ANON + folio_is_file_lru(folio),
- folio_nr_pages(folio));
+ if (vma && vma_migratable(vma)) {
+ folio = folio_walk_start(&fw, vma, addr, FW_ZEROPAGE);
+ if (folio) {
+ err = __add_folio_for_migration(folio, node, pagelist,
+ migrate_all);
+ folio_walk_end(&fw, vma);
+ } else {
+ err = -ENOENT;
+ }
}
-out_putfolio:
- /*
- * Either remove the duplicate refcount from folio_isolate_lru()
- * or drop the folio ref if it was not isolated.
- */
- folio_put(folio);
-out:
mmap_read_unlock(mm);
return err;
}
@@ -2274,8 +2343,8 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
* Errors in the page lookup or isolation are not fatal and we simply
* report them via status
*/
- err = add_page_for_migration(mm, p, current_node, &pagelist,
- flags & MPOL_MF_MOVE_ALL);
+ err = add_folio_for_migration(mm, p, current_node, &pagelist,
+ flags & MPOL_MF_MOVE_ALL);
if (err > 0) {
/* The page is successfully queued for migration */
@@ -2331,28 +2400,26 @@ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
for (i = 0; i < nr_pages; i++) {
unsigned long addr = (unsigned long)(*pages);
struct vm_area_struct *vma;
- struct page *page;
+ struct folio_walk fw;
+ struct folio *folio;
int err = -EFAULT;
vma = vma_lookup(mm, addr);
if (!vma)
goto set_status;
- /* FOLL_DUMP to ignore special (like zero) pages */
- page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP);
-
- err = PTR_ERR(page);
- if (IS_ERR(page))
- goto set_status;
-
- err = -ENOENT;
- if (!page)
- goto set_status;
-
- if (!is_zone_device_page(page))
- err = page_to_nid(page);
-
- put_page(page);
+ folio = folio_walk_start(&fw, vma, addr, FW_ZEROPAGE);
+ if (folio) {
+ if (is_zero_folio(folio) || is_huge_zero_folio(folio))
+ err = -EFAULT;
+ else if (folio_is_zone_device(folio))
+ err = -ENOENT;
+ else
+ err = folio_nid(folio);
+ folio_walk_end(&fw, vma);
+ } else {
+ err = -ENOENT;
+ }
set_status:
*status = err;
@@ -2432,25 +2499,19 @@ static struct mm_struct *find_mm_struct(pid_t pid, nodemask_t *mem_nodes)
return current->mm;
}
- /* Find the mm_struct */
- rcu_read_lock();
- task = find_task_by_vpid(pid);
+ task = find_get_task_by_vpid(pid);
if (!task) {
- rcu_read_unlock();
return ERR_PTR(-ESRCH);
}
- get_task_struct(task);
/*
* Check if this process has the right to modify the specified
* process. Use the regular "ptrace_may_access()" checks.
*/
if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) {
- rcu_read_unlock();
mm = ERR_PTR(-EPERM);
goto out;
}
- rcu_read_unlock();
mm = ERR_PTR(security_task_movememory(task));
if (IS_ERR(mm))
@@ -2526,7 +2587,7 @@ static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
if (!zone_watermark_ok(zone, 0,
high_wmark_pages(zone) +
nr_migrate_pages,
- ZONE_MOVABLE, 0))
+ ZONE_MOVABLE, ALLOC_CMA))
continue;
return true;
}
@@ -2627,6 +2688,8 @@ int migrate_misplaced_folio(struct folio *folio, struct vm_area_struct *vma,
int nr_remaining;
unsigned int nr_succeeded;
LIST_HEAD(migratepages);
+ struct mem_cgroup *memcg = get_mem_cgroup_from_folio(folio);
+ struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
list_add(&folio->lru, &migratepages);
nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_folio,
@@ -2636,10 +2699,13 @@ int migrate_misplaced_folio(struct folio *folio, struct vm_area_struct *vma,
putback_movable_pages(&migratepages);
if (nr_succeeded) {
count_vm_numa_events(NUMA_PAGE_MIGRATE, nr_succeeded);
- if (!node_is_toptier(folio_nid(folio)) && node_is_toptier(node))
- mod_node_page_state(pgdat, PGPROMOTE_SUCCESS,
- nr_succeeded);
+ count_memcg_events(memcg, NUMA_PAGE_MIGRATE, nr_succeeded);
+ if ((sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING)
+ && !node_is_toptier(folio_nid(folio))
+ && node_is_toptier(node))
+ mod_lruvec_state(lruvec, PGPROMOTE_SUCCESS, nr_succeeded);
}
+ mem_cgroup_put(memcg);
BUG_ON(!list_empty(&migratepages));
return nr_remaining ? -EAGAIN : 0;
}