summaryrefslogtreecommitdiff
path: root/kernel/sched/stats.h
diff options
context:
space:
mode:
authorJohn Stultz <jstultz@google.com>2024-06-18 14:58:55 -0700
committerPeter Zijlstra <peterz@infradead.org>2024-07-01 13:01:44 +0200
commitddae0ca2a8fe12d0e24ab10ba759c3fbd755ada8 (patch)
treeb71cecaa7bd1a81b073e117af8d8f4f5be66ba8b /kernel/sched/stats.h
parentb58652db66c910c2245f5bee7deca41c12d707b9 (diff)
sched: Move psi_account_irqtime() out of update_rq_clock_task() hotpath
It was reported that in moving to 6.1, a larger then 10% regression was seen in the performance of clock_gettime(CLOCK_THREAD_CPUTIME_ID,...). Using a simple reproducer, I found: 5.10: 100000000 calls in 24345994193 ns => 243.460 ns per call 100000000 calls in 24288172050 ns => 242.882 ns per call 100000000 calls in 24289135225 ns => 242.891 ns per call 6.1: 100000000 calls in 28248646742 ns => 282.486 ns per call 100000000 calls in 28227055067 ns => 282.271 ns per call 100000000 calls in 28177471287 ns => 281.775 ns per call The cause of this was finally narrowed down to the addition of psi_account_irqtime() in update_rq_clock_task(), in commit 52b1364ba0b1 ("sched/psi: Add PSI_IRQ to track IRQ/SOFTIRQ pressure"). In my initial attempt to resolve this, I leaned towards moving all accounting work out of the clock_gettime() call path, but it wasn't very pretty, so it will have to wait for a later deeper rework. Instead, Peter shared this approach: Rework psi_account_irqtime() to use its own psi_irq_time base for accounting, and move it out of the hotpath, calling it instead from sched_tick() and __schedule(). In testing this, we found the importance of ensuring psi_account_irqtime() is run under the rq_lock, which Johannes Weiner helpfully explained, so also add some lockdep annotations to make that requirement clear. With this change the performance is back in-line with 5.10: 6.1+fix: 100000000 calls in 24297324597 ns => 242.973 ns per call 100000000 calls in 24318869234 ns => 243.189 ns per call 100000000 calls in 24291564588 ns => 242.916 ns per call Reported-by: Jimmy Shiu <jimmyshiu@google.com> Originally-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: John Stultz <jstultz@google.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Chengming Zhou <chengming.zhou@linux.dev> Reviewed-by: Qais Yousef <qyousef@layalina.io> Link: https://lore.kernel.org/r/20240618215909.4099720-1-jstultz@google.com
Diffstat (limited to 'kernel/sched/stats.h')
-rw-r--r--kernel/sched/stats.h11
1 files changed, 8 insertions, 3 deletions
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index 38f3698f5e5b..b02dfc322951 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -110,8 +110,12 @@ __schedstats_from_se(struct sched_entity *se)
void psi_task_change(struct task_struct *task, int clear, int set);
void psi_task_switch(struct task_struct *prev, struct task_struct *next,
bool sleep);
-void psi_account_irqtime(struct task_struct *task, u32 delta);
-
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+void psi_account_irqtime(struct rq *rq, struct task_struct *curr, struct task_struct *prev);
+#else
+static inline void psi_account_irqtime(struct rq *rq, struct task_struct *curr,
+ struct task_struct *prev) {}
+#endif /*CONFIG_IRQ_TIME_ACCOUNTING */
/*
* PSI tracks state that persists across sleeps, such as iowaits and
* memory stalls. As a result, it has to distinguish between sleeps,
@@ -192,7 +196,8 @@ static inline void psi_ttwu_dequeue(struct task_struct *p) {}
static inline void psi_sched_switch(struct task_struct *prev,
struct task_struct *next,
bool sleep) {}
-static inline void psi_account_irqtime(struct task_struct *task, u32 delta) {}
+static inline void psi_account_irqtime(struct rq *rq, struct task_struct *curr,
+ struct task_struct *prev) {}
#endif /* CONFIG_PSI */
#ifdef CONFIG_SCHED_INFO