diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 253 |
1 files changed, 110 insertions, 143 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 4b27e245a055..e8ca4bdcb03c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -485,7 +485,7 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, int nid) if (lru_gen_enabled()) { if (soft_limit_excess(memcg)) - lru_gen_soft_reclaim(&memcg->nodeinfo[nid]->lruvec); + lru_gen_soft_reclaim(memcg, nid); return; } @@ -639,7 +639,7 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val) } } -static void do_flush_stats(bool atomic) +static void do_flush_stats(void) { /* * We always flush the entire tree, so concurrent flushers can just @@ -652,30 +652,16 @@ static void do_flush_stats(bool atomic) WRITE_ONCE(flush_next_time, jiffies_64 + 2*FLUSH_TIME); - if (atomic) - cgroup_rstat_flush_atomic(root_mem_cgroup->css.cgroup); - else - cgroup_rstat_flush(root_mem_cgroup->css.cgroup); + cgroup_rstat_flush(root_mem_cgroup->css.cgroup); atomic_set(&stats_flush_threshold, 0); atomic_set(&stats_flush_ongoing, 0); } -static bool should_flush_stats(void) -{ - return atomic_read(&stats_flush_threshold) > num_online_cpus(); -} - void mem_cgroup_flush_stats(void) { - if (should_flush_stats()) - do_flush_stats(false); -} - -void mem_cgroup_flush_stats_atomic(void) -{ - if (should_flush_stats()) - do_flush_stats(true); + if (atomic_read(&stats_flush_threshold) > num_online_cpus()) + do_flush_stats(); } void mem_cgroup_flush_stats_ratelimited(void) @@ -690,7 +676,7 @@ static void flush_memcg_stats_dwork(struct work_struct *w) * Always flush here so that flushing in latency-sensitive paths is * as cheap as possible. */ - do_flush_stats(false); + do_flush_stats(); queue_delayed_work(system_unbound_wq, &stats_flush_dwork, FLUSH_TIME); } @@ -1273,13 +1259,13 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg) * * This function iterates over tasks attached to @memcg or to any of its * descendants and calls @fn for each task. If @fn returns a non-zero - * value, the function breaks the iteration loop and returns the value. - * Otherwise, it will iterate over all tasks and return 0. + * value, the function breaks the iteration loop. Otherwise, it will iterate + * over all tasks and return 0. * * This function must not be called for the root memory cgroup. */ -int mem_cgroup_scan_tasks(struct mem_cgroup *memcg, - int (*fn)(struct task_struct *, void *), void *arg) +void mem_cgroup_scan_tasks(struct mem_cgroup *memcg, + int (*fn)(struct task_struct *, void *), void *arg) { struct mem_cgroup *iter; int ret = 0; @@ -1299,7 +1285,6 @@ int mem_cgroup_scan_tasks(struct mem_cgroup *memcg, break; } } - return ret; } #ifdef CONFIG_DEBUG_VM @@ -1580,13 +1565,10 @@ static inline unsigned long memcg_page_state_output(struct mem_cgroup *memcg, return memcg_page_state(memcg, item) * memcg_page_state_unit(item); } -static void memory_stat_format(struct mem_cgroup *memcg, char *buf, int bufsize) +static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) { - struct seq_buf s; int i; - seq_buf_init(&s, buf, bufsize); - /* * Provide statistics on the state of the memory subsystem as * well as cumulative event counters that show past behavior. @@ -1603,21 +1585,21 @@ static void memory_stat_format(struct mem_cgroup *memcg, char *buf, int bufsize) u64 size; size = memcg_page_state_output(memcg, memory_stats[i].idx); - seq_buf_printf(&s, "%s %llu\n", memory_stats[i].name, size); + seq_buf_printf(s, "%s %llu\n", memory_stats[i].name, size); if (unlikely(memory_stats[i].idx == NR_SLAB_UNRECLAIMABLE_B)) { size += memcg_page_state_output(memcg, NR_SLAB_RECLAIMABLE_B); - seq_buf_printf(&s, "slab %llu\n", size); + seq_buf_printf(s, "slab %llu\n", size); } } /* Accumulated memory events */ - seq_buf_printf(&s, "pgscan %lu\n", + seq_buf_printf(s, "pgscan %lu\n", memcg_events(memcg, PGSCAN_KSWAPD) + memcg_events(memcg, PGSCAN_DIRECT) + memcg_events(memcg, PGSCAN_KHUGEPAGED)); - seq_buf_printf(&s, "pgsteal %lu\n", + seq_buf_printf(s, "pgsteal %lu\n", memcg_events(memcg, PGSTEAL_KSWAPD) + memcg_events(memcg, PGSTEAL_DIRECT) + memcg_events(memcg, PGSTEAL_KHUGEPAGED)); @@ -1627,13 +1609,24 @@ static void memory_stat_format(struct mem_cgroup *memcg, char *buf, int bufsize) memcg_vm_event_stat[i] == PGPGOUT) continue; - seq_buf_printf(&s, "%s %lu\n", + seq_buf_printf(s, "%s %lu\n", vm_event_name(memcg_vm_event_stat[i]), memcg_events(memcg, memcg_vm_event_stat[i])); } /* The above should easily fit into one page */ - WARN_ON_ONCE(seq_buf_has_overflowed(&s)); + WARN_ON_ONCE(seq_buf_has_overflowed(s)); +} + +static void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s); + +static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) +{ + if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) + memcg_stat_format(memcg, s); + else + memcg1_stat_format(memcg, s); + WARN_ON_ONCE(seq_buf_has_overflowed(s)); } #define K(x) ((x) << (PAGE_SHIFT-10)) @@ -1671,6 +1664,7 @@ void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg) { /* Use static buffer, for the caller is holding oom_lock. */ static char buf[PAGE_SIZE]; + struct seq_buf s; lockdep_assert_held(&oom_lock); @@ -1693,8 +1687,9 @@ void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg) pr_info("Memory cgroup stats for "); pr_cont_cgroup_path(memcg->css.cgroup); pr_cont(":"); - memory_stat_format(memcg, buf, sizeof(buf)); - pr_info("%s", buf); + seq_buf_init(&s, buf, sizeof(buf)); + memory_stat_format(memcg, &s); + seq_buf_do_printk(&s, KERN_INFO); } /* @@ -2028,26 +2023,12 @@ bool mem_cgroup_oom_synchronize(bool handle) if (locked) mem_cgroup_oom_notify(memcg); - if (locked && !READ_ONCE(memcg->oom_kill_disable)) { - mem_cgroup_unmark_under_oom(memcg); - finish_wait(&memcg_oom_waitq, &owait.wait); - mem_cgroup_out_of_memory(memcg, current->memcg_oom_gfp_mask, - current->memcg_oom_order); - } else { - schedule(); - mem_cgroup_unmark_under_oom(memcg); - finish_wait(&memcg_oom_waitq, &owait.wait); - } + schedule(); + mem_cgroup_unmark_under_oom(memcg); + finish_wait(&memcg_oom_waitq, &owait.wait); - if (locked) { + if (locked) mem_cgroup_oom_unlock(memcg); - /* - * There is no guarantee that an OOM-lock contender - * sees the wakeups triggered by the OOM kill - * uncharges. Wake any sleepers explicitly. - */ - memcg_oom_recover(memcg); - } cleanup: current->memcg_in_oom = NULL; css_put(&memcg->css); @@ -2166,17 +2147,12 @@ again: * When charge migration first begins, we can have multiple * critical sections holding the fast-path RCU lock and one * holding the slowpath move_lock. Track the task who has the - * move_lock for unlock_page_memcg(). + * move_lock for folio_memcg_unlock(). */ memcg->move_lock_task = current; memcg->move_lock_flags = flags; } -void lock_page_memcg(struct page *page) -{ - folio_memcg_lock(page_folio(page)); -} - static void __folio_memcg_unlock(struct mem_cgroup *memcg) { if (memcg && memcg->move_lock_task == current) { @@ -2204,11 +2180,6 @@ void folio_memcg_unlock(struct folio *folio) __folio_memcg_unlock(folio_memcg(folio)); } -void unlock_page_memcg(struct page *page) -{ - folio_memcg_unlock(page_folio(page)); -} - struct memcg_stock_pcp { local_lock_t stock_lock; struct mem_cgroup *cached; /* this never be root cgroup */ @@ -2275,7 +2246,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) local_lock_irqsave(&memcg_stock.stock_lock, flags); stock = this_cpu_ptr(&memcg_stock); - if (memcg == stock->cached && stock->nr_pages >= nr_pages) { + if (memcg == READ_ONCE(stock->cached) && stock->nr_pages >= nr_pages) { stock->nr_pages -= nr_pages; ret = true; } @@ -2290,7 +2261,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) */ static void drain_stock(struct memcg_stock_pcp *stock) { - struct mem_cgroup *old = stock->cached; + struct mem_cgroup *old = READ_ONCE(stock->cached); if (!old) return; @@ -2303,7 +2274,7 @@ static void drain_stock(struct memcg_stock_pcp *stock) } css_put(&old->css); - stock->cached = NULL; + WRITE_ONCE(stock->cached, NULL); } static void drain_local_stock(struct work_struct *dummy) @@ -2338,10 +2309,10 @@ static void __refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) struct memcg_stock_pcp *stock; stock = this_cpu_ptr(&memcg_stock); - if (stock->cached != memcg) { /* reset if necessary */ + if (READ_ONCE(stock->cached) != memcg) { /* reset if necessary */ drain_stock(stock); css_get(&memcg->css); - stock->cached = memcg; + WRITE_ONCE(stock->cached, memcg); } stock->nr_pages += nr_pages; @@ -2383,7 +2354,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg) bool flush = false; rcu_read_lock(); - memcg = stock->cached; + memcg = READ_ONCE(stock->cached); if (memcg && stock->nr_pages && mem_cgroup_is_descendant(memcg, root_memcg)) flush = true; @@ -2884,7 +2855,7 @@ static void commit_charge(struct folio *folio, struct mem_cgroup *memcg) * * - the page lock * - LRU isolation - * - lock_page_memcg() + * - folio_memcg_lock() * - exclusive reference * - mem_cgroup_trylock_pages() */ @@ -3208,12 +3179,12 @@ void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, * accumulating over a page of vmstat data or when pgdat or idx * changes. */ - if (stock->cached_objcg != objcg) { + if (READ_ONCE(stock->cached_objcg) != objcg) { old = drain_obj_stock(stock); obj_cgroup_get(objcg); stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes) ? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0; - stock->cached_objcg = objcg; + WRITE_ONCE(stock->cached_objcg, objcg); stock->cached_pgdat = pgdat; } else if (stock->cached_pgdat != pgdat) { /* Flush the existing cached vmstat data */ @@ -3267,7 +3238,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes) local_lock_irqsave(&memcg_stock.stock_lock, flags); stock = this_cpu_ptr(&memcg_stock); - if (objcg == stock->cached_objcg && stock->nr_bytes >= nr_bytes) { + if (objcg == READ_ONCE(stock->cached_objcg) && stock->nr_bytes >= nr_bytes) { stock->nr_bytes -= nr_bytes; ret = true; } @@ -3279,7 +3250,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes) static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock) { - struct obj_cgroup *old = stock->cached_objcg; + struct obj_cgroup *old = READ_ONCE(stock->cached_objcg); if (!old) return NULL; @@ -3332,7 +3303,7 @@ static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock) stock->cached_pgdat = NULL; } - stock->cached_objcg = NULL; + WRITE_ONCE(stock->cached_objcg, NULL); /* * The `old' objects needs to be released by the caller via * obj_cgroup_put() outside of memcg_stock_pcp::stock_lock. @@ -3343,10 +3314,11 @@ static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock) static bool obj_stock_flush_required(struct memcg_stock_pcp *stock, struct mem_cgroup *root_memcg) { + struct obj_cgroup *objcg = READ_ONCE(stock->cached_objcg); struct mem_cgroup *memcg; - if (stock->cached_objcg) { - memcg = obj_cgroup_memcg(stock->cached_objcg); + if (objcg) { + memcg = obj_cgroup_memcg(objcg); if (memcg && mem_cgroup_is_descendant(memcg, root_memcg)) return true; } @@ -3365,10 +3337,10 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes, local_lock_irqsave(&memcg_stock.stock_lock, flags); stock = this_cpu_ptr(&memcg_stock); - if (stock->cached_objcg != objcg) { /* reset if necessary */ + if (READ_ONCE(stock->cached_objcg) != objcg) { /* reset if necessary */ old = drain_obj_stock(stock); obj_cgroup_get(objcg); - stock->cached_objcg = objcg; + WRITE_ONCE(stock->cached_objcg, objcg); stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes) ? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0; allow_uncharge = true; /* Allow uncharge when objcg changes */ @@ -3699,27 +3671,13 @@ static unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) if (mem_cgroup_is_root(memcg)) { /* - * We can reach here from irq context through: - * uncharge_batch() - * |--memcg_check_events() - * |--mem_cgroup_threshold() - * |--__mem_cgroup_threshold() - * |--mem_cgroup_usage - * - * rstat flushing is an expensive operation that should not be - * done from irq context; use stale stats in this case. - * Arguably, usage threshold events are not reliable on the root - * memcg anyway since its usage is ill-defined. - * - * Additionally, other call paths through memcg_check_events() - * disable irqs, so make sure we are flushing stats atomically. + * Approximate root's usage from global state. This isn't + * perfect, but the root usage was always an approximation. */ - if (in_task()) - mem_cgroup_flush_stats_atomic(); - val = memcg_page_state(memcg, NR_FILE_PAGES) + - memcg_page_state(memcg, NR_ANON_MAPPED); + val = global_node_page_state(NR_FILE_PAGES) + + global_node_page_state(NR_ANON_MAPPED); if (swap) - val += memcg_page_state(memcg, MEMCG_SWAP); + val += total_swap_pages - get_nr_swap_pages(); } else { if (!swap) val = page_counter_read(&memcg->memory); @@ -4135,9 +4093,8 @@ static const unsigned int memcg1_events[] = { PGMAJFAULT, }; -static int memcg_stat_show(struct seq_file *m, void *v) +static void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) { - struct mem_cgroup *memcg = mem_cgroup_from_seq(m); unsigned long memory, memsw; struct mem_cgroup *mi; unsigned int i; @@ -4152,18 +4109,18 @@ static int memcg_stat_show(struct seq_file *m, void *v) if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account()) continue; nr = memcg_page_state_local(memcg, memcg1_stats[i]); - seq_printf(m, "%s %lu\n", memcg1_stat_names[i], + seq_buf_printf(s, "%s %lu\n", memcg1_stat_names[i], nr * memcg_page_state_unit(memcg1_stats[i])); } for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) - seq_printf(m, "%s %lu\n", vm_event_name(memcg1_events[i]), - memcg_events_local(memcg, memcg1_events[i])); + seq_buf_printf(s, "%s %lu\n", vm_event_name(memcg1_events[i]), + memcg_events_local(memcg, memcg1_events[i])); for (i = 0; i < NR_LRU_LISTS; i++) - seq_printf(m, "%s %lu\n", lru_list_name(i), - memcg_page_state_local(memcg, NR_LRU_BASE + i) * - PAGE_SIZE); + seq_buf_printf(s, "%s %lu\n", lru_list_name(i), + memcg_page_state_local(memcg, NR_LRU_BASE + i) * + PAGE_SIZE); /* Hierarchical information */ memory = memsw = PAGE_COUNTER_MAX; @@ -4171,11 +4128,11 @@ static int memcg_stat_show(struct seq_file *m, void *v) memory = min(memory, READ_ONCE(mi->memory.max)); memsw = min(memsw, READ_ONCE(mi->memsw.max)); } - seq_printf(m, "hierarchical_memory_limit %llu\n", - (u64)memory * PAGE_SIZE); + seq_buf_printf(s, "hierarchical_memory_limit %llu\n", + (u64)memory * PAGE_SIZE); if (do_memsw_account()) - seq_printf(m, "hierarchical_memsw_limit %llu\n", - (u64)memsw * PAGE_SIZE); + seq_buf_printf(s, "hierarchical_memsw_limit %llu\n", + (u64)memsw * PAGE_SIZE); for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) { unsigned long nr; @@ -4183,19 +4140,19 @@ static int memcg_stat_show(struct seq_file *m, void *v) if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account()) continue; nr = memcg_page_state(memcg, memcg1_stats[i]); - seq_printf(m, "total_%s %llu\n", memcg1_stat_names[i], + seq_buf_printf(s, "total_%s %llu\n", memcg1_stat_names[i], (u64)nr * memcg_page_state_unit(memcg1_stats[i])); } for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) - seq_printf(m, "total_%s %llu\n", - vm_event_name(memcg1_events[i]), - (u64)memcg_events(memcg, memcg1_events[i])); + seq_buf_printf(s, "total_%s %llu\n", + vm_event_name(memcg1_events[i]), + (u64)memcg_events(memcg, memcg1_events[i])); for (i = 0; i < NR_LRU_LISTS; i++) - seq_printf(m, "total_%s %llu\n", lru_list_name(i), - (u64)memcg_page_state(memcg, NR_LRU_BASE + i) * - PAGE_SIZE); + seq_buf_printf(s, "total_%s %llu\n", lru_list_name(i), + (u64)memcg_page_state(memcg, NR_LRU_BASE + i) * + PAGE_SIZE); #ifdef CONFIG_DEBUG_VM { @@ -4210,12 +4167,10 @@ static int memcg_stat_show(struct seq_file *m, void *v) anon_cost += mz->lruvec.anon_cost; file_cost += mz->lruvec.file_cost; } - seq_printf(m, "anon_cost %lu\n", anon_cost); - seq_printf(m, "file_cost %lu\n", file_cost); + seq_buf_printf(s, "anon_cost %lu\n", anon_cost); + seq_buf_printf(s, "file_cost %lu\n", file_cost); } #endif - - return 0; } static u64 mem_cgroup_swappiness_read(struct cgroup_subsys_state *css, @@ -4648,11 +4603,7 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages, struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css); struct mem_cgroup *parent; - /* - * wb_writeback() takes a spinlock and calls - * wb_over_bg_thresh()->mem_cgroup_wb_stats(). Do not sleep. - */ - mem_cgroup_flush_stats_atomic(); + mem_cgroup_flush_stats(); *pdirty = memcg_page_state(memcg, NR_FILE_DIRTY); *pwriteback = memcg_page_state(memcg, NR_WRITEBACK); @@ -5059,6 +5010,8 @@ static int mem_cgroup_slab_show(struct seq_file *m, void *p) } #endif +static int memory_stat_show(struct seq_file *m, void *v); + static struct cftype mem_cgroup_legacy_files[] = { { .name = "usage_in_bytes", @@ -5091,7 +5044,7 @@ static struct cftype mem_cgroup_legacy_files[] = { }, { .name = "stat", - .seq_show = memcg_stat_show, + .seq_show = memory_stat_show, }, { .name = "force_empty", @@ -5464,7 +5417,7 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css) if (unlikely(mem_cgroup_is_root(memcg))) queue_delayed_work(system_unbound_wq, &stats_flush_dwork, - 2UL*HZ); + FLUSH_TIME); lru_gen_online_memcg(memcg); return 0; offline_kmem: @@ -5865,7 +5818,7 @@ static int mem_cgroup_move_account(struct page *page, * with (un)charging, migration, LRU putback, or anything else * that would rely on a stable page's memory cgroup. * - * Note that lock_page_memcg is a memcg lock, not a page lock, + * Note that folio_memcg_lock is a memcg lock, not a page lock, * to save space. As soon as we switch page's memory cgroup to a * new memcg that isn't locked, the above state can change * concurrently again. Make sure we're truly done with it. @@ -6057,11 +6010,11 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd, return 0; } - if (pmd_trans_unstable(pmd)) - return 0; pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); + if (!pte) + return 0; for (; addr != end; pte++, addr += PAGE_SIZE) - if (get_mctgt_type(vma, addr, *pte, NULL)) + if (get_mctgt_type(vma, addr, ptep_get(pte), NULL)) mc.precharge++; /* increment precharge temporarily */ pte_unmap_unlock(pte - 1, ptl); cond_resched(); @@ -6277,12 +6230,12 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, return 0; } - if (pmd_trans_unstable(pmd)) - return 0; retry: pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); + if (!pte) + return 0; for (; addr != end; addr += PAGE_SIZE) { - pte_t ptent = *(pte++); + pte_t ptent = ptep_get(pte++); bool device = false; swp_entry_t ent; @@ -6356,7 +6309,7 @@ static void mem_cgroup_move_charge(void) { lru_add_drain_all(); /* - * Signal lock_page_memcg() to take the memcg's move_lock + * Signal folio_memcg_lock() to take the memcg's move_lock * while we're moving its pages to another memcg. Then wait * for already started RCU-only updates to finish. */ @@ -6634,10 +6587,12 @@ static int memory_stat_show(struct seq_file *m, void *v) { struct mem_cgroup *memcg = mem_cgroup_from_seq(m); char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + struct seq_buf s; if (!buf) return -ENOMEM; - memory_stat_format(memcg, buf, PAGE_SIZE); + seq_buf_init(&s, buf, PAGE_SIZE); + memory_stat_format(memcg, &s); seq_puts(m, buf); kfree(buf); return 0; @@ -6896,7 +6851,7 @@ static unsigned long effective_protection(unsigned long usage, protected = min(usage, setting); /* * If all cgroups at this level combined claim and use more - * protection then what the parent affords them, distribute + * protection than what the parent affords them, distribute * shares in proportion to utilization. * * We are using actual utilization rather than the statically @@ -7421,8 +7376,7 @@ static int __init mem_cgroup_init(void) for_each_node(node) { struct mem_cgroup_tree_per_node *rtpn; - rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, - node_online(node) ? node : NUMA_NO_NODE); + rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, node); rtpn->rb_root = RB_ROOT; rtpn->rb_rightmost = NULL; @@ -7656,6 +7610,14 @@ static u64 swap_current_read(struct cgroup_subsys_state *css, return (u64)page_counter_read(&memcg->swap) * PAGE_SIZE; } +static u64 swap_peak_read(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(css); + + return (u64)memcg->swap.watermark * PAGE_SIZE; +} + static int swap_high_show(struct seq_file *m, void *v) { return seq_puts_memcg_tunable(m, @@ -7735,6 +7697,11 @@ static struct cftype swap_files[] = { .write = swap_max_write, }, { + .name = "swap.peak", + .flags = CFTYPE_NOT_ON_ROOT, + .read_u64 = swap_peak_read, + }, + { .name = "swap.events", .flags = CFTYPE_NOT_ON_ROOT, .file_offset = offsetof(struct mem_cgroup, swap_events_file), |