summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2023-12-14 17:13:35 -0800
committerJakub Kicinski <kuba@kernel.org>2023-12-14 17:14:41 -0800
commit8f674972d698c1223b6b4f374df5dde835b88c34 (patch)
treee6ec59ecb3b42b333416dfc10e738689199811f4 /mm
parent1b666016d0ad4a879dcd3d9188635ad68c4b16ce (diff)
parentc7402612e2e61b76177f22e6e7f705adcbecc6fe (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR. Conflicts: drivers/net/ethernet/intel/iavf/iavf_ethtool.c 3a0b5a2929fd ("iavf: Introduce new state machines for flow director") 95260816b489 ("iavf: use iavf_schedule_aq_request() helper") https://lore.kernel.org/all/84e12519-04dc-bd80-bc34-8cf50d7898ce@intel.com/ drivers/net/ethernet/broadcom/bnxt/bnxt.c c13e268c0768 ("bnxt_en: Fix HWTSTAMP_FILTER_ALL packet timestamp logic") c2f8063309da ("bnxt_en: Refactor RX VLAN acceleration logic.") a7445d69809f ("bnxt_en: Add support for new RX and TPA_START completion types for P7") 1c7fd6ee2fe4 ("bnxt_en: Rename some macros for the P5 chips") https://lore.kernel.org/all/20231211110022.27926ad9@canb.auug.org.au/ drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c bd6781c18cb5 ("bnxt_en: Fix wrong return value check in bnxt_close_nic()") 84793a499578 ("bnxt_en: Skip nic close/open when configuring tstamp filters") https://lore.kernel.org/all/20231214113041.3a0c003c@canb.auug.org.au/ drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c 3d7a3f2612d7 ("net/mlx5: Nack sync reset request when HotPlug is enabled") cecf44ea1a1f ("net/mlx5: Allow sync reset flow when BF MGT interface device is present") https://lore.kernel.org/all/20231211110328.76c925af@canb.auug.org.au/ No adjacent changes. Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig16
-rw-r--r--mm/damon/core.c1
-rw-r--r--mm/damon/sysfs-schemes.c49
-rw-r--r--mm/filemap.c2
-rw-r--r--mm/hugetlb.c7
-rw-r--r--mm/kmemleak.c40
-rw-r--r--mm/madvise.c11
-rw-r--r--mm/memcontrol.c2
-rw-r--r--mm/memory.c1
-rw-r--r--mm/memory_hotplug.c15
10 files changed, 103 insertions, 41 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 89971a894b60..57cd378c73d6 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1201,13 +1201,6 @@ config ANON_VMA_NAME
area from being merged with adjacent virtual memory areas due to the
difference in their name.
-config USERFAULTFD
- bool "Enable userfaultfd() system call"
- depends on MMU
- help
- Enable the userfaultfd() system call that allows to intercept and
- handle page faults in userland.
-
config HAVE_ARCH_USERFAULTFD_WP
bool
help
@@ -1218,6 +1211,14 @@ config HAVE_ARCH_USERFAULTFD_MINOR
help
Arch has userfaultfd minor fault support
+menuconfig USERFAULTFD
+ bool "Enable userfaultfd() system call"
+ depends on MMU
+ help
+ Enable the userfaultfd() system call that allows to intercept and
+ handle page faults in userland.
+
+if USERFAULTFD
config PTE_MARKER_UFFD_WP
bool "Userfaultfd write protection support for shmem/hugetlbfs"
default y
@@ -1227,6 +1228,7 @@ config PTE_MARKER_UFFD_WP
Allows to create marker PTEs for userfaultfd write protection
purposes. It is required to enable userfaultfd write protection on
file-backed memory types like shmem and hugetlbfs.
+endif # USERFAULTFD
# multi-gen LRU {
config LRU_GEN
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 6262d55904e7..ce1562783e7e 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -1225,6 +1225,7 @@ static void damon_split_region_at(struct damon_target *t,
new->age = r->age;
new->last_nr_accesses = r->last_nr_accesses;
new->nr_accesses_bp = r->nr_accesses_bp;
+ new->nr_accesses = r->nr_accesses;
damon_insert_region(new, r, damon_next_region(r), t);
}
diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c
index be667236b8e6..fe0fe2562000 100644
--- a/mm/damon/sysfs-schemes.c
+++ b/mm/damon/sysfs-schemes.c
@@ -139,6 +139,13 @@ static const struct kobj_type damon_sysfs_scheme_region_ktype = {
* damon_sysfs_before_damos_apply() understands the situation by showing the
* 'finished' status and do nothing.
*
+ * If DAMOS is not applied to any region due to any reasons including the
+ * access pattern, the watermarks, the quotas, and the filters,
+ * ->before_damos_apply() will not be called back. Until the situation is
+ * changed, the update will not be finished. To avoid this,
+ * damon_sysfs_after_sampling() set the status as 'finished' if more than two
+ * apply intervals of the scheme is passed while the state is 'idle'.
+ *
* Finally, the tried regions request handling finisher function
* (damon_sysfs_schemes_update_regions_stop()) unregisters the callbacks.
*/
@@ -154,6 +161,7 @@ struct damon_sysfs_scheme_regions {
int nr_regions;
unsigned long total_bytes;
enum damos_sysfs_regions_upd_status upd_status;
+ unsigned long upd_timeout_jiffies;
};
static struct damon_sysfs_scheme_regions *
@@ -1854,7 +1862,9 @@ static int damon_sysfs_after_sampling(struct damon_ctx *ctx)
for (i = 0; i < sysfs_schemes->nr; i++) {
sysfs_regions = sysfs_schemes->schemes_arr[i]->tried_regions;
if (sysfs_regions->upd_status ==
- DAMOS_TRIED_REGIONS_UPD_STARTED)
+ DAMOS_TRIED_REGIONS_UPD_STARTED ||
+ time_after(jiffies,
+ sysfs_regions->upd_timeout_jiffies))
sysfs_regions->upd_status =
DAMOS_TRIED_REGIONS_UPD_FINISHED;
}
@@ -1885,14 +1895,41 @@ int damon_sysfs_schemes_clear_regions(
return 0;
}
+static struct damos *damos_sysfs_nth_scheme(int n, struct damon_ctx *ctx)
+{
+ struct damos *scheme;
+ int i = 0;
+
+ damon_for_each_scheme(scheme, ctx) {
+ if (i == n)
+ return scheme;
+ i++;
+ }
+ return NULL;
+}
+
static void damos_tried_regions_init_upd_status(
- struct damon_sysfs_schemes *sysfs_schemes)
+ struct damon_sysfs_schemes *sysfs_schemes,
+ struct damon_ctx *ctx)
{
int i;
+ struct damos *scheme;
+ struct damon_sysfs_scheme_regions *sysfs_regions;
- for (i = 0; i < sysfs_schemes->nr; i++)
- sysfs_schemes->schemes_arr[i]->tried_regions->upd_status =
- DAMOS_TRIED_REGIONS_UPD_IDLE;
+ for (i = 0; i < sysfs_schemes->nr; i++) {
+ sysfs_regions = sysfs_schemes->schemes_arr[i]->tried_regions;
+ scheme = damos_sysfs_nth_scheme(i, ctx);
+ if (!scheme) {
+ sysfs_regions->upd_status =
+ DAMOS_TRIED_REGIONS_UPD_FINISHED;
+ continue;
+ }
+ sysfs_regions->upd_status = DAMOS_TRIED_REGIONS_UPD_IDLE;
+ sysfs_regions->upd_timeout_jiffies = jiffies +
+ 2 * usecs_to_jiffies(scheme->apply_interval_us ?
+ scheme->apply_interval_us :
+ ctx->attrs.sample_interval);
+ }
}
/* Called from damon_sysfs_cmd_request_callback under damon_sysfs_lock */
@@ -1902,7 +1939,7 @@ int damon_sysfs_schemes_update_regions_start(
{
damon_sysfs_schemes_clear_regions(sysfs_schemes, ctx);
damon_sysfs_schemes_for_damos_callback = sysfs_schemes;
- damos_tried_regions_init_upd_status(sysfs_schemes);
+ damos_tried_regions_init_upd_status(sysfs_schemes, ctx);
damos_regions_upd_total_bytes_only = total_bytes_only;
ctx->callback.before_damos_apply = damon_sysfs_before_damos_apply;
ctx->callback.after_sampling = damon_sysfs_after_sampling;
diff --git a/mm/filemap.c b/mm/filemap.c
index 32eedf3afd45..f1c8c278310f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3371,7 +3371,7 @@ static bool filemap_map_pmd(struct vm_fault *vmf, struct folio *folio,
}
}
- if (pmd_none(*vmf->pmd))
+ if (pmd_none(*vmf->pmd) && vmf->prealloc_pte)
pmd_install(mm, vmf->pmd, &vmf->prealloc_pte);
return false;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 1169ef2f2176..6feb3e0630d1 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1182,6 +1182,13 @@ static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag)
return (get_vma_private_data(vma) & flag) != 0;
}
+bool __vma_private_lock(struct vm_area_struct *vma)
+{
+ return !(vma->vm_flags & VM_MAYSHARE) &&
+ get_vma_private_data(vma) & ~HPAGE_RESV_MASK &&
+ is_vma_resv_set(vma, HPAGE_RESV_OWNER);
+}
+
void hugetlb_dup_vma_private(struct vm_area_struct *vma)
{
VM_BUG_ON_VMA(!is_vm_hugetlb_page(vma), vma);
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 1eacca03bedd..5501363d6b31 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -642,32 +642,16 @@ static struct kmemleak_object *__alloc_object(gfp_t gfp)
if (!object) {
pr_warn("Cannot allocate a kmemleak_object structure\n");
kmemleak_disable();
+ return NULL;
}
- return object;
-}
-
-static int __link_object(struct kmemleak_object *object, unsigned long ptr,
- size_t size, int min_count, bool is_phys)
-{
-
- struct kmemleak_object *parent;
- struct rb_node **link, *rb_parent;
- unsigned long untagged_ptr;
- unsigned long untagged_objp;
-
INIT_LIST_HEAD(&object->object_list);
INIT_LIST_HEAD(&object->gray_list);
INIT_HLIST_HEAD(&object->area_list);
raw_spin_lock_init(&object->lock);
atomic_set(&object->use_count, 1);
- object->flags = OBJECT_ALLOCATED | (is_phys ? OBJECT_PHYS : 0);
- object->pointer = ptr;
- object->size = kfence_ksize((void *)ptr) ?: size;
object->excess_ref = 0;
- object->min_count = min_count;
object->count = 0; /* white color initially */
- object->jiffies = jiffies;
object->checksum = 0;
object->del_state = 0;
@@ -692,6 +676,24 @@ static int __link_object(struct kmemleak_object *object, unsigned long ptr,
/* kernel backtrace */
object->trace_handle = set_track_prepare();
+ return object;
+}
+
+static int __link_object(struct kmemleak_object *object, unsigned long ptr,
+ size_t size, int min_count, bool is_phys)
+{
+
+ struct kmemleak_object *parent;
+ struct rb_node **link, *rb_parent;
+ unsigned long untagged_ptr;
+ unsigned long untagged_objp;
+
+ object->flags = OBJECT_ALLOCATED | (is_phys ? OBJECT_PHYS : 0);
+ object->pointer = ptr;
+ object->size = kfence_ksize((void *)ptr) ?: size;
+ object->min_count = min_count;
+ object->jiffies = jiffies;
+
untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr);
/*
* Only update min_addr and max_addr with object
@@ -1150,6 +1152,7 @@ EXPORT_SYMBOL_GPL(kmemleak_free_percpu);
void __ref kmemleak_update_trace(const void *ptr)
{
struct kmemleak_object *object;
+ depot_stack_handle_t trace_handle;
unsigned long flags;
pr_debug("%s(0x%px)\n", __func__, ptr);
@@ -1166,8 +1169,9 @@ void __ref kmemleak_update_trace(const void *ptr)
return;
}
+ trace_handle = set_track_prepare();
raw_spin_lock_irqsave(&object->lock, flags);
- object->trace_handle = set_track_prepare();
+ object->trace_handle = trace_handle;
raw_spin_unlock_irqrestore(&object->lock, flags);
put_object(object);
diff --git a/mm/madvise.c b/mm/madvise.c
index cf4d694280e9..6214a1ab5654 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -335,6 +335,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
struct folio *folio = NULL;
LIST_HEAD(folio_list);
bool pageout_anon_only_filter;
+ unsigned int batch_count = 0;
if (fatal_signal_pending(current))
return -EINTR;
@@ -416,6 +417,7 @@ huge_unlock:
regular_folio:
#endif
tlb_change_page_size(tlb, PAGE_SIZE);
+restart:
start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
if (!start_pte)
return 0;
@@ -424,6 +426,15 @@ regular_folio:
for (; addr < end; pte++, addr += PAGE_SIZE) {
ptent = ptep_get(pte);
+ if (++batch_count == SWAP_CLUSTER_MAX) {
+ batch_count = 0;
+ if (need_resched()) {
+ pte_unmap_unlock(start_pte, ptl);
+ cond_resched();
+ goto restart;
+ }
+ }
+
if (pte_none(ptent))
continue;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1c1061df9cd1..b226090fd906 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3166,6 +3166,7 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
return NULL;
from_memcg:
+ objcg = NULL;
for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) {
/*
* Memcg pointer is protected by scope (see set_active_memcg())
@@ -3176,7 +3177,6 @@ from_memcg:
objcg = rcu_dereference_check(memcg->objcg, 1);
if (likely(objcg))
break;
- objcg = NULL;
}
return objcg;
diff --git a/mm/memory.c b/mm/memory.c
index 1f18ed4a5497..5c757fba8858 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1517,6 +1517,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
continue;
} else {
/* We should have covered all the swap entry types */
+ pr_alert("unrecognized swap entry 0x%lx\n", entry.val);
WARN_ON_ONCE(1);
}
pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index ab41a511e20a..7a5fc89a8652 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1129,6 +1129,9 @@ void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages)
kasan_remove_zero_shadow(__va(PFN_PHYS(pfn)), PFN_PHYS(nr_pages));
}
+/*
+ * Must be called with mem_hotplug_lock in write mode.
+ */
int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
struct zone *zone, struct memory_group *group)
{
@@ -1149,7 +1152,6 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
!IS_ALIGNED(pfn + nr_pages, PAGES_PER_SECTION)))
return -EINVAL;
- mem_hotplug_begin();
/* associate pfn range with the zone */
move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_ISOLATE);
@@ -1208,7 +1210,6 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
writeback_set_ratelimit();
memory_notify(MEM_ONLINE, &arg);
- mem_hotplug_done();
return 0;
failed_addition:
@@ -1217,7 +1218,6 @@ failed_addition:
(((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1);
memory_notify(MEM_CANCEL_ONLINE, &arg);
remove_pfn_range_from_zone(zone, pfn, nr_pages);
- mem_hotplug_done();
return ret;
}
@@ -1458,7 +1458,7 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
/* create memory block devices after memory was added */
ret = create_memory_block_devices(start, size, params.altmap, group);
if (ret) {
- arch_remove_memory(start, size, NULL);
+ arch_remove_memory(start, size, params.altmap);
goto error_free;
}
@@ -1863,6 +1863,9 @@ static int count_system_ram_pages_cb(unsigned long start_pfn,
return 0;
}
+/*
+ * Must be called with mem_hotplug_lock in write mode.
+ */
int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
struct zone *zone, struct memory_group *group)
{
@@ -1885,8 +1888,6 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
!IS_ALIGNED(start_pfn + nr_pages, PAGES_PER_SECTION)))
return -EINVAL;
- mem_hotplug_begin();
-
/*
* Don't allow to offline memory blocks that contain holes.
* Consequently, memory blocks with holes can never get onlined
@@ -2031,7 +2032,6 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
memory_notify(MEM_OFFLINE, &arg);
remove_pfn_range_from_zone(zone, start_pfn, nr_pages);
- mem_hotplug_done();
return 0;
failed_removal_isolated:
@@ -2046,7 +2046,6 @@ failed_removal:
(unsigned long long) start_pfn << PAGE_SHIFT,
((unsigned long long) end_pfn << PAGE_SHIFT) - 1,
reason);
- mem_hotplug_done();
return ret;
}