diff options
Diffstat (limited to 'kernel/sched/cputime.c')
-rw-r--r-- | kernel/sched/cputime.c | 64 |
1 files changed, 39 insertions, 25 deletions
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 72fdf06ef865..8394b1ee600c 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -288,24 +288,29 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) struct signal_struct *sig = tsk->signal; cputime_t utime, stime; struct task_struct *t; - - times->utime = sig->utime; - times->stime = sig->stime; - times->sum_exec_runtime = sig->sum_sched_runtime; + unsigned int seq, nextseq; + unsigned long flags; rcu_read_lock(); - /* make sure we can trust tsk->thread_group list */ - if (!likely(pid_alive(tsk))) - goto out; - - t = tsk; + /* Attempt a lockless read on the first round. */ + nextseq = 0; do { - task_cputime(t, &utime, &stime); - times->utime += utime; - times->stime += stime; - times->sum_exec_runtime += task_sched_runtime(t); - } while_each_thread(tsk, t); -out: + seq = nextseq; + flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); + times->utime = sig->utime; + times->stime = sig->stime; + times->sum_exec_runtime = sig->sum_sched_runtime; + + for_each_thread(tsk, t) { + task_cputime(t, &utime, &stime); + times->utime += utime; + times->stime += stime; + times->sum_exec_runtime += task_sched_runtime(t); + } + /* If lockless access failed, take the lock. */ + nextseq = 1; + } while (need_seqretry(&sig->stats_lock, seq)); + done_seqretry_irqrestore(&sig->stats_lock, seq, flags); rcu_read_unlock(); } @@ -550,6 +555,23 @@ drop_precision: } /* + * Atomically advance counter to the new value. Interrupts, vcpu + * scheduling, and scaling inaccuracies can cause cputime_advance + * to be occasionally called with a new value smaller than counter. + * Let's enforce atomicity. + * + * Normally a caller will only go through this loop once, or not + * at all in case a previous caller updated counter the same jiffy. + */ +static void cputime_advance(cputime_t *counter, cputime_t new) +{ + cputime_t old; + + while (new > (old = ACCESS_ONCE(*counter))) + cmpxchg_cputime(counter, old, new); +} + +/* * Adjust tick based cputime random precision against scheduler * runtime accounting. */ @@ -594,13 +616,8 @@ static void cputime_adjust(struct task_cputime *curr, utime = rtime - stime; } - /* - * If the tick based count grows faster than the scheduler one, - * the result of the scaling may go backward. - * Let's enforce monotonicity. - */ - prev->stime = max(prev->stime, stime); - prev->utime = max(prev->utime, utime); + cputime_advance(&prev->stime, stime); + cputime_advance(&prev->utime, utime); out: *ut = prev->utime; @@ -617,9 +634,6 @@ void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) cputime_adjust(&cputime, &p->prev_cputime, ut, st); } -/* - * Must be called with siglock held. - */ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) { struct task_cputime cputime; |