diff options
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 96 |
1 files changed, 72 insertions, 24 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 1ecc648b6191..e411385b304a 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -200,6 +200,29 @@ static bool sane_reclaim(struct scan_control *sc) #endif return false; } + +static void set_memcg_congestion(pg_data_t *pgdat, + struct mem_cgroup *memcg, + bool congested) +{ + struct mem_cgroup_per_node *mn; + + if (!memcg) + return; + + mn = mem_cgroup_nodeinfo(memcg, pgdat->node_id); + WRITE_ONCE(mn->congested, congested); +} + +static bool memcg_congested(pg_data_t *pgdat, + struct mem_cgroup *memcg) +{ + struct mem_cgroup_per_node *mn; + + mn = mem_cgroup_nodeinfo(memcg, pgdat->node_id); + return READ_ONCE(mn->congested); + +} #else static bool global_reclaim(struct scan_control *sc) { @@ -210,6 +233,18 @@ static bool sane_reclaim(struct scan_control *sc) { return true; } + +static inline void set_memcg_congestion(struct pglist_data *pgdat, + struct mem_cgroup *memcg, bool congested) +{ +} + +static inline bool memcg_congested(struct pglist_data *pgdat, + struct mem_cgroup *memcg) +{ + return false; + +} #endif /* @@ -2474,6 +2509,12 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat, return true; } +static bool pgdat_memcg_congested(pg_data_t *pgdat, struct mem_cgroup *memcg) +{ + return test_bit(PGDAT_CONGESTED, &pgdat->flags) || + (memcg && memcg_congested(pgdat, memcg)); +} + static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc) { struct reclaim_state *reclaim_state = current->reclaim_state; @@ -2556,29 +2597,27 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc) if (sc->nr_reclaimed - nr_reclaimed) reclaimable = true; - /* - * If reclaim is isolating dirty pages under writeback, it - * implies that the long-lived page allocation rate is exceeding - * the page laundering rate. Either the global limits are not - * being effective at throttling processes due to the page - * distribution throughout zones or there is heavy usage of a - * slow backing device. The only option is to throttle from - * reclaim context which is not ideal as there is no guarantee - * the dirtying process is throttled in the same way - * balance_dirty_pages() manages. - * - * Once a node is flagged PGDAT_WRITEBACK, kswapd will count the - * number of pages under pages flagged for immediate reclaim and - * stall if any are encountered in the nr_immediate check below. - */ - if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken) - set_bit(PGDAT_WRITEBACK, &pgdat->flags); + if (current_is_kswapd()) { + /* + * If reclaim is isolating dirty pages under writeback, + * it implies that the long-lived page allocation rate + * is exceeding the page laundering rate. Either the + * global limits are not being effective at throttling + * processes due to the page distribution throughout + * zones or there is heavy usage of a slow backing + * device. The only option is to throttle from reclaim + * context which is not ideal as there is no guarantee + * the dirtying process is throttled in the same way + * balance_dirty_pages() manages. + * + * Once a node is flagged PGDAT_WRITEBACK, kswapd will + * count the number of pages under pages flagged for + * immediate reclaim and stall if any are encountered + * in the nr_immediate check below. + */ + if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken) + set_bit(PGDAT_WRITEBACK, &pgdat->flags); - /* - * Legacy memcg will stall in page writeback so avoid forcibly - * stalling here. - */ - if (sane_reclaim(sc)) { /* * Tag a node as congested if all the dirty pages * scanned were backed by a congested BDI and @@ -2602,14 +2641,22 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc) } /* + * Legacy memcg will stall in page writeback so avoid forcibly + * stalling in wait_iff_congested(). + */ + if (!global_reclaim(sc) && sane_reclaim(sc) && + sc->nr.dirty && sc->nr.dirty == sc->nr.congested) + set_memcg_congestion(pgdat, root, true); + + /* * Stall direct reclaim for IO completions if underlying BDIs * and node is congested. Allow kswapd to continue until it * starts encountering unqueued dirty pages or cycling through * the LRU too quickly. */ if (!sc->hibernation_mode && !current_is_kswapd() && - current_may_throttle()) - wait_iff_congested(pgdat, BLK_RW_ASYNC, HZ/10); + current_may_throttle() && pgdat_memcg_congested(pgdat, root)) + wait_iff_congested(BLK_RW_ASYNC, HZ/10); } while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed, sc->nr_scanned - nr_scanned, sc)); @@ -2826,6 +2873,7 @@ retry: continue; last_pgdat = zone->zone_pgdat; snapshot_refaults(sc->target_mem_cgroup, zone->zone_pgdat); + set_memcg_congestion(last_pgdat, sc->target_mem_cgroup, false); } delayacct_freepages_end(); |