summaryrefslogtreecommitdiff
path: root/mm/vmscan.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-01-09 11:18:47 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2024-01-09 11:18:47 -0800
commitfb46e22a9e3863e08aef8815df9f17d0f4b9aede (patch)
tree83e052911fa8d8d90bcf9de2796e17e19040613f /mm/vmscan.c
parentd30e51aa7b1f6fa7dd78d4598d1e4c047fcc3fb9 (diff)
parent5e0a760b44417f7cadd79de2204d6247109558a0 (diff)
Merge tag 'mm-stable-2024-01-08-15-31' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: "Many singleton patches against the MM code. The patch series which are included in this merge do the following: - Peng Zhang has done some mapletree maintainance work in the series 'maple_tree: add mt_free_one() and mt_attr() helpers' 'Some cleanups of maple tree' - In the series 'mm: use memmap_on_memory semantics for dax/kmem' Vishal Verma has altered the interworking between memory-hotplug and dax/kmem so that newly added 'device memory' can more easily have its memmap placed within that newly added memory. - Matthew Wilcox continues folio-related work (including a few fixes) in the patch series 'Add folio_zero_tail() and folio_fill_tail()' 'Make folio_start_writeback return void' 'Fix fault handler's handling of poisoned tail pages' 'Convert aops->error_remove_page to ->error_remove_folio' 'Finish two folio conversions' 'More swap folio conversions' - Kefeng Wang has also contributed folio-related work in the series 'mm: cleanup and use more folio in page fault' - Jim Cromie has improved the kmemleak reporting output in the series 'tweak kmemleak report format'. - In the series 'stackdepot: allow evicting stack traces' Andrey Konovalov to permits clients (in this case KASAN) to cause eviction of no longer needed stack traces. - Charan Teja Kalla has fixed some accounting issues in the page allocator's atomic reserve calculations in the series 'mm: page_alloc: fixes for high atomic reserve caluculations'. - Dmitry Rokosov has added to the samples/ dorectory some sample code for a userspace memcg event listener application. See the series 'samples: introduce cgroup events listeners'. - Some mapletree maintanance work from Liam Howlett in the series 'maple_tree: iterator state changes'. - Nhat Pham has improved zswap's approach to writeback in the series 'workload-specific and memory pressure-driven zswap writeback'. - DAMON/DAMOS feature and maintenance work from SeongJae Park in the series 'mm/damon: let users feed and tame/auto-tune DAMOS' 'selftests/damon: add Python-written DAMON functionality tests' 'mm/damon: misc updates for 6.8' - Yosry Ahmed has improved memcg's stats flushing in the series 'mm: memcg: subtree stats flushing and thresholds'. - In the series 'Multi-size THP for anonymous memory' Ryan Roberts has added a runtime opt-in feature to transparent hugepages which improves performance by allocating larger chunks of memory during anonymous page faults. - Matthew Wilcox has also contributed some cleanup and maintenance work against eh buffer_head code int he series 'More buffer_head cleanups'. - Suren Baghdasaryan has done work on Andrea Arcangeli's series 'userfaultfd move option'. UFFDIO_MOVE permits userspace heap compaction algorithms to move userspace's pages around rather than UFFDIO_COPY'a alloc/copy/free. - Stefan Roesch has developed a 'KSM Advisor', in the series 'mm/ksm: Add ksm advisor'. This is a governor which tunes KSM's scanning aggressiveness in response to userspace's current needs. - Chengming Zhou has optimized zswap's temporary working memory use in the series 'mm/zswap: dstmem reuse optimizations and cleanups'. - Matthew Wilcox has performed some maintenance work on the writeback code, both code and within filesystems. The series is 'Clean up the writeback paths'. - Andrey Konovalov has optimized KASAN's handling of alloc and free stack traces for secondary-level allocators, in the series 'kasan: save mempool stack traces'. - Andrey also performed some KASAN maintenance work in the series 'kasan: assorted clean-ups'. - David Hildenbrand has gone to town on the rmap code. Cleanups, more pte batching, folio conversions and more. See the series 'mm/rmap: interface overhaul'. - Kinsey Ho has contributed some maintenance work on the MGLRU code in the series 'mm/mglru: Kconfig cleanup'. - Matthew Wilcox has contributed lruvec page accounting code cleanups in the series 'Remove some lruvec page accounting functions'" * tag 'mm-stable-2024-01-08-15-31' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (361 commits) mm, treewide: rename MAX_ORDER to MAX_PAGE_ORDER mm, treewide: introduce NR_PAGE_ORDERS selftests/mm: add separate UFFDIO_MOVE test for PMD splitting selftests/mm: skip test if application doesn't has root privileges selftests/mm: conform test to TAP format output selftests: mm: hugepage-mmap: conform to TAP format output selftests/mm: gup_test: conform test to TAP format output mm/selftests: hugepage-mremap: conform test to TAP format output mm/vmstat: move pgdemote_* out of CONFIG_NUMA_BALANCING mm: zsmalloc: return -ENOSPC rather than -EINVAL in zs_malloc while size is too large mm/memcontrol: remove __mod_lruvec_page_state() mm/khugepaged: use a folio more in collapse_file() slub: use a folio in __kmalloc_large_node slub: use folio APIs in free_large_kmalloc() slub: use alloc_pages_node() in alloc_slab_page() mm: remove inc/dec lruvec page state functions mm: ratelimit stat flush from workingset shrinker kasan: stop leaking stack trace handles mm/mglru: remove CONFIG_TRANSPARENT_HUGEPAGE mm/mglru: add dummy pmd_dirty() ...
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c280
1 files changed, 150 insertions, 130 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index bba207f41b14..4f9c854ce6cc 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -411,10 +411,10 @@ static int reclaimer_offset(void)
{
BUILD_BUG_ON(PGSTEAL_DIRECT - PGSTEAL_KSWAPD !=
PGDEMOTE_DIRECT - PGDEMOTE_KSWAPD);
- BUILD_BUG_ON(PGSTEAL_DIRECT - PGSTEAL_KSWAPD !=
- PGSCAN_DIRECT - PGSCAN_KSWAPD);
BUILD_BUG_ON(PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD !=
PGDEMOTE_KHUGEPAGED - PGDEMOTE_KSWAPD);
+ BUILD_BUG_ON(PGSTEAL_DIRECT - PGSTEAL_KSWAPD !=
+ PGSCAN_DIRECT - PGSCAN_KSWAPD);
BUILD_BUG_ON(PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD !=
PGSCAN_KHUGEPAGED - PGSCAN_KSWAPD);
@@ -977,7 +977,8 @@ static unsigned int demote_folio_list(struct list_head *demote_folios,
(unsigned long)&mtc, MIGRATE_ASYNC, MR_DEMOTION,
&nr_succeeded);
- __count_vm_events(PGDEMOTE_KSWAPD + reclaimer_offset(), nr_succeeded);
+ mod_node_page_state(pgdat, PGDEMOTE_KSWAPD + reclaimer_offset(),
+ nr_succeeded);
return nr_succeeded;
}
@@ -2222,7 +2223,7 @@ static void prepare_scan_control(pg_data_t *pgdat, struct scan_control *sc)
* Flush the memory cgroup stats, so that we read accurate per-memcg
* lruvec stats for heuristics.
*/
- mem_cgroup_flush_stats();
+ mem_cgroup_flush_stats(sc->target_mem_cgroup);
/*
* Determine the scan balance between anon and file LRUs.
@@ -2667,13 +2668,14 @@ static void get_item_key(void *item, int *key)
key[1] = hash >> BLOOM_FILTER_SHIFT;
}
-static bool test_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item)
+static bool test_bloom_filter(struct lru_gen_mm_state *mm_state, unsigned long seq,
+ void *item)
{
int key[2];
unsigned long *filter;
int gen = filter_gen_from_seq(seq);
- filter = READ_ONCE(lruvec->mm_state.filters[gen]);
+ filter = READ_ONCE(mm_state->filters[gen]);
if (!filter)
return true;
@@ -2682,13 +2684,14 @@ static bool test_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *it
return test_bit(key[0], filter) && test_bit(key[1], filter);
}
-static void update_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item)
+static void update_bloom_filter(struct lru_gen_mm_state *mm_state, unsigned long seq,
+ void *item)
{
int key[2];
unsigned long *filter;
int gen = filter_gen_from_seq(seq);
- filter = READ_ONCE(lruvec->mm_state.filters[gen]);
+ filter = READ_ONCE(mm_state->filters[gen]);
if (!filter)
return;
@@ -2700,12 +2703,12 @@ static void update_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *
set_bit(key[1], filter);
}
-static void reset_bloom_filter(struct lruvec *lruvec, unsigned long seq)
+static void reset_bloom_filter(struct lru_gen_mm_state *mm_state, unsigned long seq)
{
unsigned long *filter;
int gen = filter_gen_from_seq(seq);
- filter = lruvec->mm_state.filters[gen];
+ filter = mm_state->filters[gen];
if (filter) {
bitmap_clear(filter, 0, BIT(BLOOM_FILTER_SHIFT));
return;
@@ -2713,13 +2716,15 @@ static void reset_bloom_filter(struct lruvec *lruvec, unsigned long seq)
filter = bitmap_zalloc(BIT(BLOOM_FILTER_SHIFT),
__GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN);
- WRITE_ONCE(lruvec->mm_state.filters[gen], filter);
+ WRITE_ONCE(mm_state->filters[gen], filter);
}
/******************************************************************************
* mm_struct list
******************************************************************************/
+#ifdef CONFIG_LRU_GEN_WALKS_MMU
+
static struct lru_gen_mm_list *get_mm_list(struct mem_cgroup *memcg)
{
static struct lru_gen_mm_list mm_list = {
@@ -2736,6 +2741,29 @@ static struct lru_gen_mm_list *get_mm_list(struct mem_cgroup *memcg)
return &mm_list;
}
+static struct lru_gen_mm_state *get_mm_state(struct lruvec *lruvec)
+{
+ return &lruvec->mm_state;
+}
+
+static struct mm_struct *get_next_mm(struct lru_gen_mm_walk *walk)
+{
+ int key;
+ struct mm_struct *mm;
+ struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec);
+ struct lru_gen_mm_state *mm_state = get_mm_state(walk->lruvec);
+
+ mm = list_entry(mm_state->head, struct mm_struct, lru_gen.list);
+ key = pgdat->node_id % BITS_PER_TYPE(mm->lru_gen.bitmap);
+
+ if (!walk->force_scan && !test_bit(key, &mm->lru_gen.bitmap))
+ return NULL;
+
+ clear_bit(key, &mm->lru_gen.bitmap);
+
+ return mmget_not_zero(mm) ? mm : NULL;
+}
+
void lru_gen_add_mm(struct mm_struct *mm)
{
int nid;
@@ -2751,10 +2779,11 @@ void lru_gen_add_mm(struct mm_struct *mm)
for_each_node_state(nid, N_MEMORY) {
struct lruvec *lruvec = get_lruvec(memcg, nid);
+ struct lru_gen_mm_state *mm_state = get_mm_state(lruvec);
/* the first addition since the last iteration */
- if (lruvec->mm_state.tail == &mm_list->fifo)
- lruvec->mm_state.tail = &mm->lru_gen.list;
+ if (mm_state->tail == &mm_list->fifo)
+ mm_state->tail = &mm->lru_gen.list;
}
list_add_tail(&mm->lru_gen.list, &mm_list->fifo);
@@ -2780,14 +2809,15 @@ void lru_gen_del_mm(struct mm_struct *mm)
for_each_node(nid) {
struct lruvec *lruvec = get_lruvec(memcg, nid);
+ struct lru_gen_mm_state *mm_state = get_mm_state(lruvec);
/* where the current iteration continues after */
- if (lruvec->mm_state.head == &mm->lru_gen.list)
- lruvec->mm_state.head = lruvec->mm_state.head->prev;
+ if (mm_state->head == &mm->lru_gen.list)
+ mm_state->head = mm_state->head->prev;
/* where the last iteration ended before */
- if (lruvec->mm_state.tail == &mm->lru_gen.list)
- lruvec->mm_state.tail = lruvec->mm_state.tail->next;
+ if (mm_state->tail == &mm->lru_gen.list)
+ mm_state->tail = mm_state->tail->next;
}
list_del_init(&mm->lru_gen.list);
@@ -2830,10 +2860,30 @@ void lru_gen_migrate_mm(struct mm_struct *mm)
}
#endif
+#else /* !CONFIG_LRU_GEN_WALKS_MMU */
+
+static struct lru_gen_mm_list *get_mm_list(struct mem_cgroup *memcg)
+{
+ return NULL;
+}
+
+static struct lru_gen_mm_state *get_mm_state(struct lruvec *lruvec)
+{
+ return NULL;
+}
+
+static struct mm_struct *get_next_mm(struct lru_gen_mm_walk *walk)
+{
+ return NULL;
+}
+
+#endif
+
static void reset_mm_stats(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, bool last)
{
int i;
int hist;
+ struct lru_gen_mm_state *mm_state = get_mm_state(lruvec);
lockdep_assert_held(&get_mm_list(lruvec_memcg(lruvec))->lock);
@@ -2841,44 +2891,20 @@ static void reset_mm_stats(struct lruvec *lruvec, struct lru_gen_mm_walk *walk,
hist = lru_hist_from_seq(walk->max_seq);
for (i = 0; i < NR_MM_STATS; i++) {
- WRITE_ONCE(lruvec->mm_state.stats[hist][i],
- lruvec->mm_state.stats[hist][i] + walk->mm_stats[i]);
+ WRITE_ONCE(mm_state->stats[hist][i],
+ mm_state->stats[hist][i] + walk->mm_stats[i]);
walk->mm_stats[i] = 0;
}
}
if (NR_HIST_GENS > 1 && last) {
- hist = lru_hist_from_seq(lruvec->mm_state.seq + 1);
+ hist = lru_hist_from_seq(mm_state->seq + 1);
for (i = 0; i < NR_MM_STATS; i++)
- WRITE_ONCE(lruvec->mm_state.stats[hist][i], 0);
+ WRITE_ONCE(mm_state->stats[hist][i], 0);
}
}
-static bool should_skip_mm(struct mm_struct *mm, struct lru_gen_mm_walk *walk)
-{
- int type;
- unsigned long size = 0;
- struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec);
- int key = pgdat->node_id % BITS_PER_TYPE(mm->lru_gen.bitmap);
-
- if (!walk->force_scan && !test_bit(key, &mm->lru_gen.bitmap))
- return true;
-
- clear_bit(key, &mm->lru_gen.bitmap);
-
- for (type = !walk->can_swap; type < ANON_AND_FILE; type++) {
- size += type ? get_mm_counter(mm, MM_FILEPAGES) :
- get_mm_counter(mm, MM_ANONPAGES) +
- get_mm_counter(mm, MM_SHMEMPAGES);
- }
-
- if (size < MIN_LRU_BATCH)
- return true;
-
- return !mmget_not_zero(mm);
-}
-
static bool iterate_mm_list(struct lruvec *lruvec, struct lru_gen_mm_walk *walk,
struct mm_struct **iter)
{
@@ -2887,7 +2913,7 @@ static bool iterate_mm_list(struct lruvec *lruvec, struct lru_gen_mm_walk *walk,
struct mm_struct *mm = NULL;
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
- struct lru_gen_mm_state *mm_state = &lruvec->mm_state;
+ struct lru_gen_mm_state *mm_state = get_mm_state(lruvec);
/*
* mm_state->seq is incremented after each iteration of mm_list. There
@@ -2925,11 +2951,7 @@ static bool iterate_mm_list(struct lruvec *lruvec, struct lru_gen_mm_walk *walk,
mm_state->tail = mm_state->head->next;
walk->force_scan = true;
}
-
- mm = list_entry(mm_state->head, struct mm_struct, lru_gen.list);
- if (should_skip_mm(mm, walk))
- mm = NULL;
- } while (!mm);
+ } while (!(mm = get_next_mm(walk)));
done:
if (*iter || last)
reset_mm_stats(lruvec, walk, last);
@@ -2937,7 +2959,7 @@ done:
spin_unlock(&mm_list->lock);
if (mm && first)
- reset_bloom_filter(lruvec, walk->max_seq + 1);
+ reset_bloom_filter(mm_state, walk->max_seq + 1);
if (*iter)
mmput_async(*iter);
@@ -2952,7 +2974,7 @@ static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long max_seq)
bool success = false;
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
- struct lru_gen_mm_state *mm_state = &lruvec->mm_state;
+ struct lru_gen_mm_state *mm_state = get_mm_state(lruvec);
spin_lock(&mm_list->lock);
@@ -3248,7 +3270,6 @@ static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned
return pfn;
}
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
static unsigned long get_pmd_pfn(pmd_t pmd, struct vm_area_struct *vma, unsigned long addr)
{
unsigned long pfn = pmd_pfn(pmd);
@@ -3266,7 +3287,6 @@ static unsigned long get_pmd_pfn(pmd_t pmd, struct vm_area_struct *vma, unsigned
return pfn;
}
-#endif
static struct folio *get_pfn_folio(unsigned long pfn, struct mem_cgroup *memcg,
struct pglist_data *pgdat, bool can_swap)
@@ -3369,7 +3389,6 @@ restart:
return suitable_to_scan(total, young);
}
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area_struct *vma,
struct mm_walk *args, unsigned long *bitmap, unsigned long *first)
{
@@ -3447,12 +3466,6 @@ next:
done:
*first = -1;
}
-#else
-static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area_struct *vma,
- struct mm_walk *args, unsigned long *bitmap, unsigned long *first)
-{
-}
-#endif
static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end,
struct mm_walk *args)
@@ -3465,6 +3478,7 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end,
DECLARE_BITMAP(bitmap, MIN_LRU_BATCH);
unsigned long first = -1;
struct lru_gen_mm_walk *walk = args->private;
+ struct lru_gen_mm_state *mm_state = get_mm_state(walk->lruvec);
VM_WARN_ON_ONCE(pud_leaf(*pud));
@@ -3487,7 +3501,6 @@ restart:
continue;
}
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
if (pmd_trans_huge(val)) {
unsigned long pfn = pmd_pfn(val);
struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec);
@@ -3506,7 +3519,7 @@ restart:
walk_pmd_range_locked(pud, addr, vma, args, bitmap, &first);
continue;
}
-#endif
+
walk->mm_stats[MM_NONLEAF_TOTAL]++;
if (should_clear_pmd_young()) {
@@ -3516,7 +3529,7 @@ restart:
walk_pmd_range_locked(pud, addr, vma, args, bitmap, &first);
}
- if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i))
+ if (!walk->force_scan && !test_bloom_filter(mm_state, walk->max_seq, pmd + i))
continue;
walk->mm_stats[MM_NONLEAF_FOUND]++;
@@ -3527,7 +3540,7 @@ restart:
walk->mm_stats[MM_NONLEAF_ADDED]++;
/* carry over to the next generation */
- update_bloom_filter(walk->lruvec, walk->max_seq + 1, pmd + i);
+ update_bloom_filter(mm_state, walk->max_seq + 1, pmd + i);
}
walk_pmd_range_locked(pud, -1, vma, args, bitmap, &first);
@@ -3734,16 +3747,25 @@ next:
return success;
}
-static void inc_max_seq(struct lruvec *lruvec, bool can_swap, bool force_scan)
+static bool inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
+ bool can_swap, bool force_scan)
{
+ bool success;
int prev, next;
int type, zone;
struct lru_gen_folio *lrugen = &lruvec->lrugen;
restart:
+ if (max_seq < READ_ONCE(lrugen->max_seq))
+ return false;
+
spin_lock_irq(&lruvec->lru_lock);
VM_WARN_ON_ONCE(!seq_is_valid(lruvec));
+ success = max_seq == lrugen->max_seq;
+ if (!success)
+ goto unlock;
+
for (type = ANON_AND_FILE - 1; type >= 0; type--) {
if (get_nr_gens(lruvec, type) != MAX_NR_GENS)
continue;
@@ -3787,8 +3809,10 @@ restart:
WRITE_ONCE(lrugen->timestamps[next], jiffies);
/* make sure preceding modifications appear */
smp_store_release(&lrugen->max_seq, lrugen->max_seq + 1);
-
+unlock:
spin_unlock_irq(&lruvec->lru_lock);
+
+ return success;
}
static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
@@ -3798,14 +3822,16 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
struct lru_gen_mm_walk *walk;
struct mm_struct *mm = NULL;
struct lru_gen_folio *lrugen = &lruvec->lrugen;
+ struct lru_gen_mm_state *mm_state = get_mm_state(lruvec);
VM_WARN_ON_ONCE(max_seq > READ_ONCE(lrugen->max_seq));
+ if (!mm_state)
+ return inc_max_seq(lruvec, max_seq, can_swap, force_scan);
+
/* see the comment in iterate_mm_list() */
- if (max_seq <= READ_ONCE(lruvec->mm_state.seq)) {
- success = false;
- goto done;
- }
+ if (max_seq <= READ_ONCE(mm_state->seq))
+ return false;
/*
* If the hardware doesn't automatically set the accessed bit, fallback
@@ -3835,8 +3861,10 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
walk_mm(lruvec, mm, walk);
} while (mm);
done:
- if (success)
- inc_max_seq(lruvec, can_swap, force_scan);
+ if (success) {
+ success = inc_max_seq(lruvec, max_seq, can_swap, force_scan);
+ WARN_ON_ONCE(!success);
+ }
return success;
}
@@ -3961,6 +3989,7 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
struct mem_cgroup *memcg = folio_memcg(folio);
struct pglist_data *pgdat = folio_pgdat(folio);
struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
+ struct lru_gen_mm_state *mm_state = get_mm_state(lruvec);
DEFINE_MAX_SEQ(lruvec);
int old_gen, new_gen = lru_gen_from_seq(max_seq);
@@ -4043,8 +4072,8 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
mem_cgroup_unlock_pages();
/* feedback from rmap walkers to page table walkers */
- if (suitable_to_scan(i, young))
- update_bloom_filter(lruvec, max_seq, pvmw->pmd);
+ if (mm_state && suitable_to_scan(i, young))
+ update_bloom_filter(mm_state, max_seq, pvmw->pmd);
}
/******************************************************************************
@@ -4060,13 +4089,6 @@ enum {
MEMCG_LRU_YOUNG,
};
-#ifdef CONFIG_MEMCG
-
-static int lru_gen_memcg_seg(struct lruvec *lruvec)
-{
- return READ_ONCE(lruvec->lrugen.seg);
-}
-
static void lru_gen_rotate_memcg(struct lruvec *lruvec, int op)
{
int seg;
@@ -4113,6 +4135,8 @@ static void lru_gen_rotate_memcg(struct lruvec *lruvec, int op)
spin_unlock_irqrestore(&pgdat->memcg_lru.lock, flags);
}
+#ifdef CONFIG_MEMCG
+
void lru_gen_online_memcg(struct mem_cgroup *memcg)
{
int gen;
@@ -4180,18 +4204,11 @@ void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid)
struct lruvec *lruvec = get_lruvec(memcg, nid);
/* see the comment on MEMCG_NR_GENS */
- if (lru_gen_memcg_seg(lruvec) != MEMCG_LRU_HEAD)
+ if (READ_ONCE(lruvec->lrugen.seg) != MEMCG_LRU_HEAD)
lru_gen_rotate_memcg(lruvec, MEMCG_LRU_HEAD);
}
-#else /* !CONFIG_MEMCG */
-
-static int lru_gen_memcg_seg(struct lruvec *lruvec)
-{
- return 0;
-}
-
-#endif
+#endif /* CONFIG_MEMCG */
/******************************************************************************
* the eviction
@@ -4739,7 +4756,7 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc)
if (mem_cgroup_below_low(NULL, memcg)) {
/* see the comment on MEMCG_NR_GENS */
- if (lru_gen_memcg_seg(lruvec) != MEMCG_LRU_TAIL)
+ if (READ_ONCE(lruvec->lrugen.seg) != MEMCG_LRU_TAIL)
return MEMCG_LRU_TAIL;
memcg_memory_event(memcg, MEMCG_LOW);
@@ -4762,12 +4779,10 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc)
return 0;
/* one retry if offlined or too small */
- return lru_gen_memcg_seg(lruvec) != MEMCG_LRU_TAIL ?
+ return READ_ONCE(lruvec->lrugen.seg) != MEMCG_LRU_TAIL ?
MEMCG_LRU_TAIL : MEMCG_LRU_YOUNG;
}
-#ifdef CONFIG_MEMCG
-
static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
{
int op;
@@ -4859,20 +4874,6 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
blk_finish_plug(&plug);
}
-#else /* !CONFIG_MEMCG */
-
-static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
-{
- BUILD_BUG();
-}
-
-static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
-{
- BUILD_BUG();
-}
-
-#endif
-
static void set_initial_priority(struct pglist_data *pgdat, struct scan_control *sc)
{
int priority;
@@ -5220,6 +5221,7 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
int type, tier;
int hist = lru_hist_from_seq(seq);
struct lru_gen_folio *lrugen = &lruvec->lrugen;
+ struct lru_gen_mm_state *mm_state = get_mm_state(lruvec);
for (tier = 0; tier < MAX_NR_TIERS; tier++) {
seq_printf(m, " %10d", tier);
@@ -5245,6 +5247,9 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
seq_putc(m, '\n');
}
+ if (!mm_state)
+ return;
+
seq_puts(m, " ");
for (i = 0; i < NR_MM_STATS; i++) {
const char *s = " ";
@@ -5252,10 +5257,10 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
if (seq == max_seq && NR_HIST_GENS == 1) {
s = "LOYNFA";
- n = READ_ONCE(lruvec->mm_state.stats[hist][i]);
+ n = READ_ONCE(mm_state->stats[hist][i]);
} else if (seq != max_seq && NR_HIST_GENS > 1) {
s = "loynfa";
- n = READ_ONCE(lruvec->mm_state.stats[hist][i]);
+ n = READ_ONCE(mm_state->stats[hist][i]);
}
seq_printf(m, " %10lu%c", n, s[i]);
@@ -5519,11 +5524,24 @@ static const struct file_operations lru_gen_ro_fops = {
* initialization
******************************************************************************/
+void lru_gen_init_pgdat(struct pglist_data *pgdat)
+{
+ int i, j;
+
+ spin_lock_init(&pgdat->memcg_lru.lock);
+
+ for (i = 0; i < MEMCG_NR_GENS; i++) {
+ for (j = 0; j < MEMCG_NR_BINS; j++)
+ INIT_HLIST_NULLS_HEAD(&pgdat->memcg_lru.fifo[i][j], i);
+ }
+}
+
void lru_gen_init_lruvec(struct lruvec *lruvec)
{
int i;
int gen, type, zone;
struct lru_gen_folio *lrugen = &lruvec->lrugen;
+ struct lru_gen_mm_state *mm_state = get_mm_state(lruvec);
lrugen->max_seq = MIN_NR_GENS + 1;
lrugen->enabled = lru_gen_enabled();
@@ -5534,47 +5552,46 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)
for_each_gen_type_zone(gen, type, zone)
INIT_LIST_HEAD(&lrugen->folios[gen][type][zone]);
- lruvec->mm_state.seq = MIN_NR_GENS;
+ if (mm_state)
+ mm_state->seq = MIN_NR_GENS;
}
#ifdef CONFIG_MEMCG
-void lru_gen_init_pgdat(struct pglist_data *pgdat)
+void lru_gen_init_memcg(struct mem_cgroup *memcg)
{
- int i, j;
+ struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
- spin_lock_init(&pgdat->memcg_lru.lock);
+ if (!mm_list)
+ return;
- for (i = 0; i < MEMCG_NR_GENS; i++) {
- for (j = 0; j < MEMCG_NR_BINS; j++)
- INIT_HLIST_NULLS_HEAD(&pgdat->memcg_lru.fifo[i][j], i);
- }
-}
-
-void lru_gen_init_memcg(struct mem_cgroup *memcg)
-{
- INIT_LIST_HEAD(&memcg->mm_list.fifo);
- spin_lock_init(&memcg->mm_list.lock);
+ INIT_LIST_HEAD(&mm_list->fifo);
+ spin_lock_init(&mm_list->lock);
}
void lru_gen_exit_memcg(struct mem_cgroup *memcg)
{
int i;
int nid;
+ struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
- VM_WARN_ON_ONCE(!list_empty(&memcg->mm_list.fifo));
+ VM_WARN_ON_ONCE(mm_list && !list_empty(&mm_list->fifo));
for_each_node(nid) {
struct lruvec *lruvec = get_lruvec(memcg, nid);
+ struct lru_gen_mm_state *mm_state = get_mm_state(lruvec);
VM_WARN_ON_ONCE(memchr_inv(lruvec->lrugen.nr_pages, 0,
sizeof(lruvec->lrugen.nr_pages)));
lruvec->lrugen.list.next = LIST_POISON1;
+ if (!mm_state)
+ continue;
+
for (i = 0; i < NR_BLOOM_FILTERS; i++) {
- bitmap_free(lruvec->mm_state.filters[i]);
- lruvec->mm_state.filters[i] = NULL;
+ bitmap_free(mm_state->filters[i]);
+ mm_state->filters[i] = NULL;
}
}
}
@@ -5600,14 +5617,17 @@ late_initcall(init_lru_gen);
static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
{
+ BUILD_BUG();
}
static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
{
+ BUILD_BUG();
}
static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *sc)
{
+ BUILD_BUG();
}
#endif /* CONFIG_LRU_GEN */
@@ -6400,7 +6420,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
* scan_control uses s8 fields for order, priority, and reclaim_idx.
* Confirm they are large enough for max values.
*/
- BUILD_BUG_ON(MAX_ORDER >= S8_MAX);
+ BUILD_BUG_ON(MAX_PAGE_ORDER >= S8_MAX);
BUILD_BUG_ON(DEF_PRIORITY > S8_MAX);
BUILD_BUG_ON(MAX_NR_ZONES > S8_MAX);