diff options
author | Jakub Kicinski <kuba@kernel.org> | 2023-12-14 17:13:35 -0800 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2023-12-14 17:14:41 -0800 |
commit | 8f674972d698c1223b6b4f374df5dde835b88c34 (patch) | |
tree | e6ec59ecb3b42b333416dfc10e738689199811f4 /mm | |
parent | 1b666016d0ad4a879dcd3d9188635ad68c4b16ce (diff) | |
parent | c7402612e2e61b76177f22e6e7f705adcbecc6fe (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR.
Conflicts:
drivers/net/ethernet/intel/iavf/iavf_ethtool.c
3a0b5a2929fd ("iavf: Introduce new state machines for flow director")
95260816b489 ("iavf: use iavf_schedule_aq_request() helper")
https://lore.kernel.org/all/84e12519-04dc-bd80-bc34-8cf50d7898ce@intel.com/
drivers/net/ethernet/broadcom/bnxt/bnxt.c
c13e268c0768 ("bnxt_en: Fix HWTSTAMP_FILTER_ALL packet timestamp logic")
c2f8063309da ("bnxt_en: Refactor RX VLAN acceleration logic.")
a7445d69809f ("bnxt_en: Add support for new RX and TPA_START completion types for P7")
1c7fd6ee2fe4 ("bnxt_en: Rename some macros for the P5 chips")
https://lore.kernel.org/all/20231211110022.27926ad9@canb.auug.org.au/
drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
bd6781c18cb5 ("bnxt_en: Fix wrong return value check in bnxt_close_nic()")
84793a499578 ("bnxt_en: Skip nic close/open when configuring tstamp filters")
https://lore.kernel.org/all/20231214113041.3a0c003c@canb.auug.org.au/
drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
3d7a3f2612d7 ("net/mlx5: Nack sync reset request when HotPlug is enabled")
cecf44ea1a1f ("net/mlx5: Allow sync reset flow when BF MGT interface device is present")
https://lore.kernel.org/all/20231211110328.76c925af@canb.auug.org.au/
No adjacent changes.
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 16 | ||||
-rw-r--r-- | mm/damon/core.c | 1 | ||||
-rw-r--r-- | mm/damon/sysfs-schemes.c | 49 | ||||
-rw-r--r-- | mm/filemap.c | 2 | ||||
-rw-r--r-- | mm/hugetlb.c | 7 | ||||
-rw-r--r-- | mm/kmemleak.c | 40 | ||||
-rw-r--r-- | mm/madvise.c | 11 | ||||
-rw-r--r-- | mm/memcontrol.c | 2 | ||||
-rw-r--r-- | mm/memory.c | 1 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 15 |
10 files changed, 103 insertions, 41 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 89971a894b60..57cd378c73d6 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1201,13 +1201,6 @@ config ANON_VMA_NAME area from being merged with adjacent virtual memory areas due to the difference in their name. -config USERFAULTFD - bool "Enable userfaultfd() system call" - depends on MMU - help - Enable the userfaultfd() system call that allows to intercept and - handle page faults in userland. - config HAVE_ARCH_USERFAULTFD_WP bool help @@ -1218,6 +1211,14 @@ config HAVE_ARCH_USERFAULTFD_MINOR help Arch has userfaultfd minor fault support +menuconfig USERFAULTFD + bool "Enable userfaultfd() system call" + depends on MMU + help + Enable the userfaultfd() system call that allows to intercept and + handle page faults in userland. + +if USERFAULTFD config PTE_MARKER_UFFD_WP bool "Userfaultfd write protection support for shmem/hugetlbfs" default y @@ -1227,6 +1228,7 @@ config PTE_MARKER_UFFD_WP Allows to create marker PTEs for userfaultfd write protection purposes. It is required to enable userfaultfd write protection on file-backed memory types like shmem and hugetlbfs. +endif # USERFAULTFD # multi-gen LRU { config LRU_GEN diff --git a/mm/damon/core.c b/mm/damon/core.c index 6262d55904e7..ce1562783e7e 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1225,6 +1225,7 @@ static void damon_split_region_at(struct damon_target *t, new->age = r->age; new->last_nr_accesses = r->last_nr_accesses; new->nr_accesses_bp = r->nr_accesses_bp; + new->nr_accesses = r->nr_accesses; damon_insert_region(new, r, damon_next_region(r), t); } diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index be667236b8e6..fe0fe2562000 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -139,6 +139,13 @@ static const struct kobj_type damon_sysfs_scheme_region_ktype = { * damon_sysfs_before_damos_apply() understands the situation by showing the * 'finished' status and do nothing. * + * If DAMOS is not applied to any region due to any reasons including the + * access pattern, the watermarks, the quotas, and the filters, + * ->before_damos_apply() will not be called back. Until the situation is + * changed, the update will not be finished. To avoid this, + * damon_sysfs_after_sampling() set the status as 'finished' if more than two + * apply intervals of the scheme is passed while the state is 'idle'. + * * Finally, the tried regions request handling finisher function * (damon_sysfs_schemes_update_regions_stop()) unregisters the callbacks. */ @@ -154,6 +161,7 @@ struct damon_sysfs_scheme_regions { int nr_regions; unsigned long total_bytes; enum damos_sysfs_regions_upd_status upd_status; + unsigned long upd_timeout_jiffies; }; static struct damon_sysfs_scheme_regions * @@ -1854,7 +1862,9 @@ static int damon_sysfs_after_sampling(struct damon_ctx *ctx) for (i = 0; i < sysfs_schemes->nr; i++) { sysfs_regions = sysfs_schemes->schemes_arr[i]->tried_regions; if (sysfs_regions->upd_status == - DAMOS_TRIED_REGIONS_UPD_STARTED) + DAMOS_TRIED_REGIONS_UPD_STARTED || + time_after(jiffies, + sysfs_regions->upd_timeout_jiffies)) sysfs_regions->upd_status = DAMOS_TRIED_REGIONS_UPD_FINISHED; } @@ -1885,14 +1895,41 @@ int damon_sysfs_schemes_clear_regions( return 0; } +static struct damos *damos_sysfs_nth_scheme(int n, struct damon_ctx *ctx) +{ + struct damos *scheme; + int i = 0; + + damon_for_each_scheme(scheme, ctx) { + if (i == n) + return scheme; + i++; + } + return NULL; +} + static void damos_tried_regions_init_upd_status( - struct damon_sysfs_schemes *sysfs_schemes) + struct damon_sysfs_schemes *sysfs_schemes, + struct damon_ctx *ctx) { int i; + struct damos *scheme; + struct damon_sysfs_scheme_regions *sysfs_regions; - for (i = 0; i < sysfs_schemes->nr; i++) - sysfs_schemes->schemes_arr[i]->tried_regions->upd_status = - DAMOS_TRIED_REGIONS_UPD_IDLE; + for (i = 0; i < sysfs_schemes->nr; i++) { + sysfs_regions = sysfs_schemes->schemes_arr[i]->tried_regions; + scheme = damos_sysfs_nth_scheme(i, ctx); + if (!scheme) { + sysfs_regions->upd_status = + DAMOS_TRIED_REGIONS_UPD_FINISHED; + continue; + } + sysfs_regions->upd_status = DAMOS_TRIED_REGIONS_UPD_IDLE; + sysfs_regions->upd_timeout_jiffies = jiffies + + 2 * usecs_to_jiffies(scheme->apply_interval_us ? + scheme->apply_interval_us : + ctx->attrs.sample_interval); + } } /* Called from damon_sysfs_cmd_request_callback under damon_sysfs_lock */ @@ -1902,7 +1939,7 @@ int damon_sysfs_schemes_update_regions_start( { damon_sysfs_schemes_clear_regions(sysfs_schemes, ctx); damon_sysfs_schemes_for_damos_callback = sysfs_schemes; - damos_tried_regions_init_upd_status(sysfs_schemes); + damos_tried_regions_init_upd_status(sysfs_schemes, ctx); damos_regions_upd_total_bytes_only = total_bytes_only; ctx->callback.before_damos_apply = damon_sysfs_before_damos_apply; ctx->callback.after_sampling = damon_sysfs_after_sampling; diff --git a/mm/filemap.c b/mm/filemap.c index 32eedf3afd45..f1c8c278310f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3371,7 +3371,7 @@ static bool filemap_map_pmd(struct vm_fault *vmf, struct folio *folio, } } - if (pmd_none(*vmf->pmd)) + if (pmd_none(*vmf->pmd) && vmf->prealloc_pte) pmd_install(mm, vmf->pmd, &vmf->prealloc_pte); return false; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 1169ef2f2176..6feb3e0630d1 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1182,6 +1182,13 @@ static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag) return (get_vma_private_data(vma) & flag) != 0; } +bool __vma_private_lock(struct vm_area_struct *vma) +{ + return !(vma->vm_flags & VM_MAYSHARE) && + get_vma_private_data(vma) & ~HPAGE_RESV_MASK && + is_vma_resv_set(vma, HPAGE_RESV_OWNER); +} + void hugetlb_dup_vma_private(struct vm_area_struct *vma) { VM_BUG_ON_VMA(!is_vm_hugetlb_page(vma), vma); diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 1eacca03bedd..5501363d6b31 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -642,32 +642,16 @@ static struct kmemleak_object *__alloc_object(gfp_t gfp) if (!object) { pr_warn("Cannot allocate a kmemleak_object structure\n"); kmemleak_disable(); + return NULL; } - return object; -} - -static int __link_object(struct kmemleak_object *object, unsigned long ptr, - size_t size, int min_count, bool is_phys) -{ - - struct kmemleak_object *parent; - struct rb_node **link, *rb_parent; - unsigned long untagged_ptr; - unsigned long untagged_objp; - INIT_LIST_HEAD(&object->object_list); INIT_LIST_HEAD(&object->gray_list); INIT_HLIST_HEAD(&object->area_list); raw_spin_lock_init(&object->lock); atomic_set(&object->use_count, 1); - object->flags = OBJECT_ALLOCATED | (is_phys ? OBJECT_PHYS : 0); - object->pointer = ptr; - object->size = kfence_ksize((void *)ptr) ?: size; object->excess_ref = 0; - object->min_count = min_count; object->count = 0; /* white color initially */ - object->jiffies = jiffies; object->checksum = 0; object->del_state = 0; @@ -692,6 +676,24 @@ static int __link_object(struct kmemleak_object *object, unsigned long ptr, /* kernel backtrace */ object->trace_handle = set_track_prepare(); + return object; +} + +static int __link_object(struct kmemleak_object *object, unsigned long ptr, + size_t size, int min_count, bool is_phys) +{ + + struct kmemleak_object *parent; + struct rb_node **link, *rb_parent; + unsigned long untagged_ptr; + unsigned long untagged_objp; + + object->flags = OBJECT_ALLOCATED | (is_phys ? OBJECT_PHYS : 0); + object->pointer = ptr; + object->size = kfence_ksize((void *)ptr) ?: size; + object->min_count = min_count; + object->jiffies = jiffies; + untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr); /* * Only update min_addr and max_addr with object @@ -1150,6 +1152,7 @@ EXPORT_SYMBOL_GPL(kmemleak_free_percpu); void __ref kmemleak_update_trace(const void *ptr) { struct kmemleak_object *object; + depot_stack_handle_t trace_handle; unsigned long flags; pr_debug("%s(0x%px)\n", __func__, ptr); @@ -1166,8 +1169,9 @@ void __ref kmemleak_update_trace(const void *ptr) return; } + trace_handle = set_track_prepare(); raw_spin_lock_irqsave(&object->lock, flags); - object->trace_handle = set_track_prepare(); + object->trace_handle = trace_handle; raw_spin_unlock_irqrestore(&object->lock, flags); put_object(object); diff --git a/mm/madvise.c b/mm/madvise.c index cf4d694280e9..6214a1ab5654 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -335,6 +335,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, struct folio *folio = NULL; LIST_HEAD(folio_list); bool pageout_anon_only_filter; + unsigned int batch_count = 0; if (fatal_signal_pending(current)) return -EINTR; @@ -416,6 +417,7 @@ huge_unlock: regular_folio: #endif tlb_change_page_size(tlb, PAGE_SIZE); +restart: start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); if (!start_pte) return 0; @@ -424,6 +426,15 @@ regular_folio: for (; addr < end; pte++, addr += PAGE_SIZE) { ptent = ptep_get(pte); + if (++batch_count == SWAP_CLUSTER_MAX) { + batch_count = 0; + if (need_resched()) { + pte_unmap_unlock(start_pte, ptl); + cond_resched(); + goto restart; + } + } + if (pte_none(ptent)) continue; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 1c1061df9cd1..b226090fd906 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3166,6 +3166,7 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void) return NULL; from_memcg: + objcg = NULL; for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) { /* * Memcg pointer is protected by scope (see set_active_memcg()) @@ -3176,7 +3177,6 @@ from_memcg: objcg = rcu_dereference_check(memcg->objcg, 1); if (likely(objcg)) break; - objcg = NULL; } return objcg; diff --git a/mm/memory.c b/mm/memory.c index 1f18ed4a5497..5c757fba8858 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1517,6 +1517,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, continue; } else { /* We should have covered all the swap entry types */ + pr_alert("unrecognized swap entry 0x%lx\n", entry.val); WARN_ON_ONCE(1); } pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index ab41a511e20a..7a5fc89a8652 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1129,6 +1129,9 @@ void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages) kasan_remove_zero_shadow(__va(PFN_PHYS(pfn)), PFN_PHYS(nr_pages)); } +/* + * Must be called with mem_hotplug_lock in write mode. + */ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *zone, struct memory_group *group) { @@ -1149,7 +1152,6 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, !IS_ALIGNED(pfn + nr_pages, PAGES_PER_SECTION))) return -EINVAL; - mem_hotplug_begin(); /* associate pfn range with the zone */ move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_ISOLATE); @@ -1208,7 +1210,6 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, writeback_set_ratelimit(); memory_notify(MEM_ONLINE, &arg); - mem_hotplug_done(); return 0; failed_addition: @@ -1217,7 +1218,6 @@ failed_addition: (((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1); memory_notify(MEM_CANCEL_ONLINE, &arg); remove_pfn_range_from_zone(zone, pfn, nr_pages); - mem_hotplug_done(); return ret; } @@ -1458,7 +1458,7 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) /* create memory block devices after memory was added */ ret = create_memory_block_devices(start, size, params.altmap, group); if (ret) { - arch_remove_memory(start, size, NULL); + arch_remove_memory(start, size, params.altmap); goto error_free; } @@ -1863,6 +1863,9 @@ static int count_system_ram_pages_cb(unsigned long start_pfn, return 0; } +/* + * Must be called with mem_hotplug_lock in write mode. + */ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages, struct zone *zone, struct memory_group *group) { @@ -1885,8 +1888,6 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages, !IS_ALIGNED(start_pfn + nr_pages, PAGES_PER_SECTION))) return -EINVAL; - mem_hotplug_begin(); - /* * Don't allow to offline memory blocks that contain holes. * Consequently, memory blocks with holes can never get onlined @@ -2031,7 +2032,6 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages, memory_notify(MEM_OFFLINE, &arg); remove_pfn_range_from_zone(zone, start_pfn, nr_pages); - mem_hotplug_done(); return 0; failed_removal_isolated: @@ -2046,7 +2046,6 @@ failed_removal: (unsigned long long) start_pfn << PAGE_SHIFT, ((unsigned long long) end_pfn << PAGE_SHIFT) - 1, reason); - mem_hotplug_done(); return ret; } |