diff options
author | Kinsey Ho <kinseyho@google.com> | 2023-12-27 14:12:02 +0000 |
---|---|---|
committer | Andrew Morton <akpm@linux-foundation.org> | 2024-01-05 10:17:44 -0800 |
commit | 61dd3f246b3adaabff3241c586f2210ac91b05a4 (patch) | |
tree | fc42e80cc23c1e05d514256dc1cc4c3cf4791817 /mm | |
parent | 71ce1ab54a505736786d9c5921e6c2718c7ec535 (diff) |
mm/mglru: add CONFIG_LRU_GEN_WALKS_MMU
Add CONFIG_LRU_GEN_WALKS_MMU such that if disabled, the code that
walks page tables to promote pages into the youngest generation will
not be built.
Also improves code readability by adding two helper functions
get_mm_state() and get_next_mm().
Link: https://lkml.kernel.org/r/20231227141205.2200125-3-kinseyho@google.com
Signed-off-by: Kinsey Ho <kinseyho@google.com>
Co-developed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Tested-by: Donet Tom <donettom@linux.vnet.ibm.com>
Acked-by: Yu Zhao <yuzhao@google.com>
Cc: kernel test robot <lkp@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 4 | ||||
-rw-r--r-- | mm/vmscan.c | 192 |
2 files changed, 127 insertions, 69 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index b072664b889a..79d563d8f9e0 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1274,6 +1274,10 @@ config LRU_GEN_STATS from evicted generations for debugging purpose. This option has a per-memcg and per-node memory overhead. + +config LRU_GEN_WALKS_MMU + def_bool y + depends on LRU_GEN && ARCH_HAS_HW_PTE_YOUNG # } config ARCH_SUPPORTS_PER_VMA_LOCK diff --git a/mm/vmscan.c b/mm/vmscan.c index b4ca3563bcf4..aa7ea09ffb4c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2671,13 +2671,14 @@ static void get_item_key(void *item, int *key) key[1] = hash >> BLOOM_FILTER_SHIFT; } -static bool test_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item) +static bool test_bloom_filter(struct lru_gen_mm_state *mm_state, unsigned long seq, + void *item) { int key[2]; unsigned long *filter; int gen = filter_gen_from_seq(seq); - filter = READ_ONCE(lruvec->mm_state.filters[gen]); + filter = READ_ONCE(mm_state->filters[gen]); if (!filter) return true; @@ -2686,13 +2687,14 @@ static bool test_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *it return test_bit(key[0], filter) && test_bit(key[1], filter); } -static void update_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item) +static void update_bloom_filter(struct lru_gen_mm_state *mm_state, unsigned long seq, + void *item) { int key[2]; unsigned long *filter; int gen = filter_gen_from_seq(seq); - filter = READ_ONCE(lruvec->mm_state.filters[gen]); + filter = READ_ONCE(mm_state->filters[gen]); if (!filter) return; @@ -2704,12 +2706,12 @@ static void update_bloom_filter(struct lruvec *lruvec, unsigned long seq, void * set_bit(key[1], filter); } -static void reset_bloom_filter(struct lruvec *lruvec, unsigned long seq) +static void reset_bloom_filter(struct lru_gen_mm_state *mm_state, unsigned long seq) { unsigned long *filter; int gen = filter_gen_from_seq(seq); - filter = lruvec->mm_state.filters[gen]; + filter = mm_state->filters[gen]; if (filter) { bitmap_clear(filter, 0, BIT(BLOOM_FILTER_SHIFT)); return; @@ -2717,13 +2719,15 @@ static void reset_bloom_filter(struct lruvec *lruvec, unsigned long seq) filter = bitmap_zalloc(BIT(BLOOM_FILTER_SHIFT), __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN); - WRITE_ONCE(lruvec->mm_state.filters[gen], filter); + WRITE_ONCE(mm_state->filters[gen], filter); } /****************************************************************************** * mm_struct list ******************************************************************************/ +#ifdef CONFIG_LRU_GEN_WALKS_MMU + static struct lru_gen_mm_list *get_mm_list(struct mem_cgroup *memcg) { static struct lru_gen_mm_list mm_list = { @@ -2740,6 +2744,29 @@ static struct lru_gen_mm_list *get_mm_list(struct mem_cgroup *memcg) return &mm_list; } +static struct lru_gen_mm_state *get_mm_state(struct lruvec *lruvec) +{ + return &lruvec->mm_state; +} + +static struct mm_struct *get_next_mm(struct lru_gen_mm_walk *walk) +{ + int key; + struct mm_struct *mm; + struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec); + struct lru_gen_mm_state *mm_state = get_mm_state(walk->lruvec); + + mm = list_entry(mm_state->head, struct mm_struct, lru_gen.list); + key = pgdat->node_id % BITS_PER_TYPE(mm->lru_gen.bitmap); + + if (!walk->force_scan && !test_bit(key, &mm->lru_gen.bitmap)) + return NULL; + + clear_bit(key, &mm->lru_gen.bitmap); + + return mmget_not_zero(mm) ? mm : NULL; +} + void lru_gen_add_mm(struct mm_struct *mm) { int nid; @@ -2755,10 +2782,11 @@ void lru_gen_add_mm(struct mm_struct *mm) for_each_node_state(nid, N_MEMORY) { struct lruvec *lruvec = get_lruvec(memcg, nid); + struct lru_gen_mm_state *mm_state = get_mm_state(lruvec); /* the first addition since the last iteration */ - if (lruvec->mm_state.tail == &mm_list->fifo) - lruvec->mm_state.tail = &mm->lru_gen.list; + if (mm_state->tail == &mm_list->fifo) + mm_state->tail = &mm->lru_gen.list; } list_add_tail(&mm->lru_gen.list, &mm_list->fifo); @@ -2784,14 +2812,15 @@ void lru_gen_del_mm(struct mm_struct *mm) for_each_node(nid) { struct lruvec *lruvec = get_lruvec(memcg, nid); + struct lru_gen_mm_state *mm_state = get_mm_state(lruvec); /* where the current iteration continues after */ - if (lruvec->mm_state.head == &mm->lru_gen.list) - lruvec->mm_state.head = lruvec->mm_state.head->prev; + if (mm_state->head == &mm->lru_gen.list) + mm_state->head = mm_state->head->prev; /* where the last iteration ended before */ - if (lruvec->mm_state.tail == &mm->lru_gen.list) - lruvec->mm_state.tail = lruvec->mm_state.tail->next; + if (mm_state->tail == &mm->lru_gen.list) + mm_state->tail = mm_state->tail->next; } list_del_init(&mm->lru_gen.list); @@ -2834,10 +2863,30 @@ void lru_gen_migrate_mm(struct mm_struct *mm) } #endif +#else /* !CONFIG_LRU_GEN_WALKS_MMU */ + +static struct lru_gen_mm_list *get_mm_list(struct mem_cgroup *memcg) +{ + return NULL; +} + +static struct lru_gen_mm_state *get_mm_state(struct lruvec *lruvec) +{ + return NULL; +} + +static struct mm_struct *get_next_mm(struct lru_gen_mm_walk *walk) +{ + return NULL; +} + +#endif + static void reset_mm_stats(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, bool last) { int i; int hist; + struct lru_gen_mm_state *mm_state = get_mm_state(lruvec); lockdep_assert_held(&get_mm_list(lruvec_memcg(lruvec))->lock); @@ -2845,44 +2894,20 @@ static void reset_mm_stats(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, hist = lru_hist_from_seq(walk->max_seq); for (i = 0; i < NR_MM_STATS; i++) { - WRITE_ONCE(lruvec->mm_state.stats[hist][i], - lruvec->mm_state.stats[hist][i] + walk->mm_stats[i]); + WRITE_ONCE(mm_state->stats[hist][i], + mm_state->stats[hist][i] + walk->mm_stats[i]); walk->mm_stats[i] = 0; } } if (NR_HIST_GENS > 1 && last) { - hist = lru_hist_from_seq(lruvec->mm_state.seq + 1); + hist = lru_hist_from_seq(mm_state->seq + 1); for (i = 0; i < NR_MM_STATS; i++) - WRITE_ONCE(lruvec->mm_state.stats[hist][i], 0); + WRITE_ONCE(mm_state->stats[hist][i], 0); } } -static bool should_skip_mm(struct mm_struct *mm, struct lru_gen_mm_walk *walk) -{ - int type; - unsigned long size = 0; - struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec); - int key = pgdat->node_id % BITS_PER_TYPE(mm->lru_gen.bitmap); - - if (!walk->force_scan && !test_bit(key, &mm->lru_gen.bitmap)) - return true; - - clear_bit(key, &mm->lru_gen.bitmap); - - for (type = !walk->can_swap; type < ANON_AND_FILE; type++) { - size += type ? get_mm_counter(mm, MM_FILEPAGES) : - get_mm_counter(mm, MM_ANONPAGES) + - get_mm_counter(mm, MM_SHMEMPAGES); - } - - if (size < MIN_LRU_BATCH) - return true; - - return !mmget_not_zero(mm); -} - static bool iterate_mm_list(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, struct mm_struct **iter) { @@ -2891,7 +2916,7 @@ static bool iterate_mm_list(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, struct mm_struct *mm = NULL; struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct lru_gen_mm_list *mm_list = get_mm_list(memcg); - struct lru_gen_mm_state *mm_state = &lruvec->mm_state; + struct lru_gen_mm_state *mm_state = get_mm_state(lruvec); /* * mm_state->seq is incremented after each iteration of mm_list. There @@ -2929,11 +2954,7 @@ static bool iterate_mm_list(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, mm_state->tail = mm_state->head->next; walk->force_scan = true; } - - mm = list_entry(mm_state->head, struct mm_struct, lru_gen.list); - if (should_skip_mm(mm, walk)) - mm = NULL; - } while (!mm); + } while (!(mm = get_next_mm(walk))); done: if (*iter || last) reset_mm_stats(lruvec, walk, last); @@ -2941,7 +2962,7 @@ done: spin_unlock(&mm_list->lock); if (mm && first) - reset_bloom_filter(lruvec, walk->max_seq + 1); + reset_bloom_filter(mm_state, walk->max_seq + 1); if (*iter) mmput_async(*iter); @@ -2956,7 +2977,7 @@ static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long max_seq) bool success = false; struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct lru_gen_mm_list *mm_list = get_mm_list(memcg); - struct lru_gen_mm_state *mm_state = &lruvec->mm_state; + struct lru_gen_mm_state *mm_state = get_mm_state(lruvec); spin_lock(&mm_list->lock); @@ -3469,6 +3490,7 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, DECLARE_BITMAP(bitmap, MIN_LRU_BATCH); unsigned long first = -1; struct lru_gen_mm_walk *walk = args->private; + struct lru_gen_mm_state *mm_state = get_mm_state(walk->lruvec); VM_WARN_ON_ONCE(pud_leaf(*pud)); @@ -3520,7 +3542,7 @@ restart: walk_pmd_range_locked(pud, addr, vma, args, bitmap, &first); } - if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i)) + if (!walk->force_scan && !test_bloom_filter(mm_state, walk->max_seq, pmd + i)) continue; walk->mm_stats[MM_NONLEAF_FOUND]++; @@ -3531,7 +3553,7 @@ restart: walk->mm_stats[MM_NONLEAF_ADDED]++; /* carry over to the next generation */ - update_bloom_filter(walk->lruvec, walk->max_seq + 1, pmd + i); + update_bloom_filter(mm_state, walk->max_seq + 1, pmd + i); } walk_pmd_range_locked(pud, -1, vma, args, bitmap, &first); @@ -3738,16 +3760,25 @@ next: return success; } -static void inc_max_seq(struct lruvec *lruvec, bool can_swap, bool force_scan) +static bool inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, + bool can_swap, bool force_scan) { + bool success; int prev, next; int type, zone; struct lru_gen_folio *lrugen = &lruvec->lrugen; restart: + if (max_seq < READ_ONCE(lrugen->max_seq)) + return false; + spin_lock_irq(&lruvec->lru_lock); VM_WARN_ON_ONCE(!seq_is_valid(lruvec)); + success = max_seq == lrugen->max_seq; + if (!success) + goto unlock; + for (type = ANON_AND_FILE - 1; type >= 0; type--) { if (get_nr_gens(lruvec, type) != MAX_NR_GENS) continue; @@ -3791,8 +3822,10 @@ restart: WRITE_ONCE(lrugen->timestamps[next], jiffies); /* make sure preceding modifications appear */ smp_store_release(&lrugen->max_seq, lrugen->max_seq + 1); - +unlock: spin_unlock_irq(&lruvec->lru_lock); + + return success; } static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, @@ -3802,14 +3835,16 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, struct lru_gen_mm_walk *walk; struct mm_struct *mm = NULL; struct lru_gen_folio *lrugen = &lruvec->lrugen; + struct lru_gen_mm_state *mm_state = get_mm_state(lruvec); VM_WARN_ON_ONCE(max_seq > READ_ONCE(lrugen->max_seq)); + if (!mm_state) + return inc_max_seq(lruvec, max_seq, can_swap, force_scan); + /* see the comment in iterate_mm_list() */ - if (max_seq <= READ_ONCE(lruvec->mm_state.seq)) { - success = false; - goto done; - } + if (max_seq <= READ_ONCE(mm_state->seq)) + return false; /* * If the hardware doesn't automatically set the accessed bit, fallback @@ -3839,8 +3874,10 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, walk_mm(lruvec, mm, walk); } while (mm); done: - if (success) - inc_max_seq(lruvec, can_swap, force_scan); + if (success) { + success = inc_max_seq(lruvec, max_seq, can_swap, force_scan); + WARN_ON_ONCE(!success); + } return success; } @@ -3964,6 +4001,7 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) struct mem_cgroup *memcg = folio_memcg(folio); struct pglist_data *pgdat = folio_pgdat(folio); struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat); + struct lru_gen_mm_state *mm_state = get_mm_state(lruvec); DEFINE_MAX_SEQ(lruvec); int old_gen, new_gen = lru_gen_from_seq(max_seq); @@ -4042,8 +4080,8 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) mem_cgroup_unlock_pages(); /* feedback from rmap walkers to page table walkers */ - if (suitable_to_scan(i, young)) - update_bloom_filter(lruvec, max_seq, pvmw->pmd); + if (mm_state && suitable_to_scan(i, young)) + update_bloom_filter(mm_state, max_seq, pvmw->pmd); } /****************************************************************************** @@ -5219,6 +5257,7 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec, int type, tier; int hist = lru_hist_from_seq(seq); struct lru_gen_folio *lrugen = &lruvec->lrugen; + struct lru_gen_mm_state *mm_state = get_mm_state(lruvec); for (tier = 0; tier < MAX_NR_TIERS; tier++) { seq_printf(m, " %10d", tier); @@ -5244,6 +5283,9 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec, seq_putc(m, '\n'); } + if (!mm_state) + return; + seq_puts(m, " "); for (i = 0; i < NR_MM_STATS; i++) { const char *s = " "; @@ -5251,10 +5293,10 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec, if (seq == max_seq && NR_HIST_GENS == 1) { s = "LOYNFA"; - n = READ_ONCE(lruvec->mm_state.stats[hist][i]); + n = READ_ONCE(mm_state->stats[hist][i]); } else if (seq != max_seq && NR_HIST_GENS > 1) { s = "loynfa"; - n = READ_ONCE(lruvec->mm_state.stats[hist][i]); + n = READ_ONCE(mm_state->stats[hist][i]); } seq_printf(m, " %10lu%c", n, s[i]); @@ -5523,6 +5565,7 @@ void lru_gen_init_lruvec(struct lruvec *lruvec) int i; int gen, type, zone; struct lru_gen_folio *lrugen = &lruvec->lrugen; + struct lru_gen_mm_state *mm_state = get_mm_state(lruvec); lrugen->max_seq = MIN_NR_GENS + 1; lrugen->enabled = lru_gen_enabled(); @@ -5533,7 +5576,8 @@ void lru_gen_init_lruvec(struct lruvec *lruvec) for_each_gen_type_zone(gen, type, zone) INIT_LIST_HEAD(&lrugen->folios[gen][type][zone]); - lruvec->mm_state.seq = MIN_NR_GENS; + if (mm_state) + mm_state->seq = MIN_NR_GENS; } #ifdef CONFIG_MEMCG @@ -5552,28 +5596,38 @@ void lru_gen_init_pgdat(struct pglist_data *pgdat) void lru_gen_init_memcg(struct mem_cgroup *memcg) { - INIT_LIST_HEAD(&memcg->mm_list.fifo); - spin_lock_init(&memcg->mm_list.lock); + struct lru_gen_mm_list *mm_list = get_mm_list(memcg); + + if (!mm_list) + return; + + INIT_LIST_HEAD(&mm_list->fifo); + spin_lock_init(&mm_list->lock); } void lru_gen_exit_memcg(struct mem_cgroup *memcg) { int i; int nid; + struct lru_gen_mm_list *mm_list = get_mm_list(memcg); - VM_WARN_ON_ONCE(!list_empty(&memcg->mm_list.fifo)); + VM_WARN_ON_ONCE(mm_list && !list_empty(&mm_list->fifo)); for_each_node(nid) { struct lruvec *lruvec = get_lruvec(memcg, nid); + struct lru_gen_mm_state *mm_state = get_mm_state(lruvec); VM_WARN_ON_ONCE(memchr_inv(lruvec->lrugen.nr_pages, 0, sizeof(lruvec->lrugen.nr_pages))); lruvec->lrugen.list.next = LIST_POISON1; + if (!mm_state) + continue; + for (i = 0; i < NR_BLOOM_FILTERS; i++) { - bitmap_free(lruvec->mm_state.filters[i]); - lruvec->mm_state.filters[i] = NULL; + bitmap_free(mm_state->filters[i]); + mm_state->filters[i] = NULL; } } } |