diff options
Diffstat (limited to 'block/blk-cgroup.c')
-rw-r--r-- | block/blk-cgroup.c | 41 |
1 files changed, 32 insertions, 9 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index c8b28ec5dde9..fc49be622e05 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -34,6 +34,8 @@ #include "blk-ioprio.h" #include "blk-throttle.h" +static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu); + /* * blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation. * blkcg_pol_register_mutex nests outside of it and synchronizes entire @@ -56,6 +58,8 @@ static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */ bool blkcg_debug_stats = false; +static DEFINE_RAW_SPINLOCK(blkg_stat_lock); + #define BLKG_DESTROY_BATCH_SIZE 64 /* @@ -163,10 +167,20 @@ static void blkg_free(struct blkcg_gq *blkg) static void __blkg_release(struct rcu_head *rcu) { struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head); + struct blkcg *blkcg = blkg->blkcg; + int cpu; #ifdef CONFIG_BLK_CGROUP_PUNT_BIO WARN_ON(!bio_list_empty(&blkg->async_bios)); #endif + /* + * Flush all the non-empty percpu lockless lists before releasing + * us, given these stat belongs to us. + * + * blkg_stat_lock is for serializing blkg stat update + */ + for_each_possible_cpu(cpu) + __blkcg_rstat_flush(blkcg, cpu); /* release the blkcg and parent blkg refs this blkg has been holding */ css_put(&blkg->blkcg->css); @@ -965,16 +979,12 @@ static void blkcg_iostat_update(struct blkcg_gq *blkg, struct blkg_iostat *cur, u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); } -static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) +static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu) { - struct blkcg *blkcg = css_to_blkcg(css); struct llist_head *lhead = per_cpu_ptr(blkcg->lhead, cpu); struct llist_node *lnode; struct blkg_iostat_set *bisc, *next_bisc; - - /* Root-level stats are sourced from system-wide IO stats */ - if (!cgroup_parent(css->cgroup)) - return; + unsigned long flags; rcu_read_lock(); @@ -983,6 +993,14 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) goto out; /* + * For covering concurrent parent blkg update from blkg_release(). + * + * When flushing from cgroup, cgroup_rstat_lock is always held, so + * this lock won't cause contention most of time. + */ + raw_spin_lock_irqsave(&blkg_stat_lock, flags); + + /* * Iterate only the iostat_cpu's queued in the lockless list. */ llist_for_each_entry_safe(bisc, next_bisc, lnode, lnode) { @@ -1005,13 +1023,19 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) if (parent && parent->parent) blkcg_iostat_update(parent, &blkg->iostat.cur, &blkg->iostat.last); - percpu_ref_put(&blkg->refcnt); } - + raw_spin_unlock_irqrestore(&blkg_stat_lock, flags); out: rcu_read_unlock(); } +static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) +{ + /* Root-level stats are sourced from system-wide IO stats */ + if (cgroup_parent(css->cgroup)) + __blkcg_rstat_flush(css_to_blkcg(css), cpu); +} + /* * We source root cgroup stats from the system-wide stats to avoid * tracking the same information twice and incurring overhead when no @@ -2092,7 +2116,6 @@ void blk_cgroup_bio_start(struct bio *bio) llist_add(&bis->lnode, lhead); WRITE_ONCE(bis->lqueued, true); - percpu_ref_get(&bis->blkg->refcnt); } u64_stats_update_end_irqrestore(&bis->sync, flags); |