From a386b5af8edda1c742ce9f77891e112eefffc005 Mon Sep 17 00:00:00 2001 From: Kasper Pedersen Date: Wed, 20 Oct 2010 15:55:15 -0700 Subject: time: Compensate for rounding on odd-frequency clocksources When the clocksource is not a multiple of HZ, the clock will be off. For acpi_pm, HZ=1000 the error is 127.111 ppm: The rounding of cycle_interval ends up generating a false error term in ntp_error accumulation since xtime_interval is not exactly 1/HZ. So, we subtract out the error caused by the rounding. This has been visible since 2.6.32-rc2 commit a092ff0f90cae22b2ac8028ecd2c6f6c1a9e4601 time: Implement logarithmic time accumulation That commit raised NTP_INTERVAL_FREQ and exposed the rounding error. testing tool: http://n1.taur.dk/permanent/testpmt.c Also tested with ntpd and a frequency counter. Signed-off-by: Kasper Pedersen Acked-by: john stultz Cc: John Kacur Cc: Clark Williams Cc: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- kernel/time/timekeeping.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 49010d822f7..5bb86da8200 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -32,6 +32,8 @@ struct timekeeper { cycle_t cycle_interval; /* Number of clock shifted nano seconds in one NTP interval. */ u64 xtime_interval; + /* shifted nano seconds left over when rounding cycle_interval */ + s64 xtime_remainder; /* Raw nano seconds accumulated per NTP interval. */ u32 raw_interval; @@ -62,7 +64,7 @@ struct timekeeper timekeeper; static void timekeeper_setup_internals(struct clocksource *clock) { cycle_t interval; - u64 tmp; + u64 tmp, ntpinterval; timekeeper.clock = clock; clock->cycle_last = clock->read(clock); @@ -70,6 +72,7 @@ static void timekeeper_setup_internals(struct clocksource *clock) /* Do the ns -> cycle conversion first, using original mult */ tmp = NTP_INTERVAL_LENGTH; tmp <<= clock->shift; + ntpinterval = tmp; tmp += clock->mult/2; do_div(tmp, clock->mult); if (tmp == 0) @@ -80,6 +83,7 @@ static void timekeeper_setup_internals(struct clocksource *clock) /* Go back from cycles -> shifted ns */ timekeeper.xtime_interval = (u64) interval * clock->mult; + timekeeper.xtime_remainder = ntpinterval - timekeeper.xtime_interval; timekeeper.raw_interval = ((u64) interval * clock->mult) >> clock->shift; @@ -719,7 +723,8 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift) /* Accumulate error between NTP and clock interval */ timekeeper.ntp_error += tick_length << shift; - timekeeper.ntp_error -= timekeeper.xtime_interval << + timekeeper.ntp_error -= + (timekeeper.xtime_interval + timekeeper.xtime_remainder) << (timekeeper.ntp_error_shift + shift); return offset; -- cgit v1.2.3 From 2bf1c05e3c406925e498d06da66b4828f0209ea6 Mon Sep 17 00:00:00 2001 From: Nikitas Angelinas Date: Wed, 20 Oct 2010 15:57:31 -0700 Subject: time: Use ARRAY_SIZE macro in timecompare.c Replace sizeof(buffer)/sizeof(buffer[0]) with ARRAY_SIZE(buffer) in kernel/time/timecompare.c Signed-off-by: Nikitas Angelinas Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- kernel/time/timecompare.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/time/timecompare.c b/kernel/time/timecompare.c index ac38fbb176c..a9ae369925c 100644 --- a/kernel/time/timecompare.c +++ b/kernel/time/timecompare.c @@ -21,6 +21,7 @@ #include #include #include +#include /* * fixed point arithmetic scale factor for skew @@ -57,11 +58,11 @@ int timecompare_offset(struct timecompare *sync, int index; int num_samples = sync->num_samples; - if (num_samples > sizeof(buffer)/sizeof(buffer[0])) { + if (num_samples > ARRAY_SIZE(buffer)) { samples = kmalloc(sizeof(*samples) * num_samples, GFP_ATOMIC); if (!samples) { samples = buffer; - num_samples = sizeof(buffer)/sizeof(buffer[0]); + num_samples = ARRAY_SIZE(buffer); } } else { samples = buffer; -- cgit v1.2.3 From dd6414b50fa2b1cd247a8aa8f8bd42414b7453e1 Mon Sep 17 00:00:00 2001 From: Phil Carmody Date: Wed, 20 Oct 2010 15:57:33 -0700 Subject: timer: Permit statically-declared work with deferrable timers Currently, you have to just define a delayed_work uninitialised, and then initialise it before first use. That's a tad clumsy. At risk of playing mind-games with the compiler, fooling it into doing pointer arithmetic with compile-time-constants, this lets clients properly initialise delayed work with deferrable timers statically. This patch was inspired by the issues which lead Artem Bityutskiy to commit 8eab945c5616fc984 ("sunrpc: make the cache cleaner workqueue deferrable"). Signed-off-by: Phil Carmody Acked-by: Artem Bityutskiy Cc: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/timer.h | 25 +++++++++++++++++++++++++ include/linux/workqueue.h | 8 ++++++++ kernel/timer.c | 15 +-------------- 3 files changed, 34 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/include/linux/timer.h b/include/linux/timer.h index 1794674c1a5..cbfb7a355d3 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -48,6 +48,18 @@ extern struct tvec_base boot_tvec_bases; #define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn) #endif +/* + * Note that all tvec_bases are 2 byte aligned and lower bit of + * base in timer_list is guaranteed to be zero. Use the LSB to + * indicate whether the timer is deferrable. + * + * A deferrable timer will work normally when the system is busy, but + * will not cause a CPU to come out of idle just to service it; instead, + * the timer will be serviced when the CPU eventually wakes up with a + * subsequent non-deferrable timer. + */ +#define TBASE_DEFERRABLE_FLAG (0x1) + #define TIMER_INITIALIZER(_function, _expires, _data) { \ .entry = { .prev = TIMER_ENTRY_STATIC }, \ .function = (_function), \ @@ -59,6 +71,19 @@ extern struct tvec_base boot_tvec_bases; __FILE__ ":" __stringify(__LINE__)) \ } +#define TBASE_MAKE_DEFERRED(ptr) ((struct tvec_base *) \ + ((unsigned char *)(ptr) + TBASE_DEFERRABLE_FLAG)) + +#define TIMER_DEFERRED_INITIALIZER(_function, _expires, _data) {\ + .entry = { .prev = TIMER_ENTRY_STATIC }, \ + .function = (_function), \ + .expires = (_expires), \ + .data = (_data), \ + .base = TBASE_MAKE_DEFERRED(&boot_tvec_bases), \ + __TIMER_LOCKDEP_MAP_INITIALIZER( \ + __FILE__ ":" __stringify(__LINE__)) \ + } + #define DEFINE_TIMER(_name, _function, _expires, _data) \ struct timer_list _name = \ TIMER_INITIALIZER(_function, _expires, _data) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index f11100f9648..88238c15ec3 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -127,12 +127,20 @@ struct execute_work { .timer = TIMER_INITIALIZER(NULL, 0, 0), \ } +#define __DEFERRED_WORK_INITIALIZER(n, f) { \ + .work = __WORK_INITIALIZER((n).work, (f)), \ + .timer = TIMER_DEFERRED_INITIALIZER(NULL, 0, 0), \ + } + #define DECLARE_WORK(n, f) \ struct work_struct n = __WORK_INITIALIZER(n, f) #define DECLARE_DELAYED_WORK(n, f) \ struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f) +#define DECLARE_DEFERRED_WORK(n, f) \ + struct delayed_work n = __DEFERRED_WORK_INITIALIZER(n, f) + /* * initialize a work item's function pointer */ diff --git a/kernel/timer.c b/kernel/timer.c index 97bf05baade..72853b256ff 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -88,18 +88,6 @@ struct tvec_base boot_tvec_bases; EXPORT_SYMBOL(boot_tvec_bases); static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; -/* - * Note that all tvec_bases are 2 byte aligned and lower bit of - * base in timer_list is guaranteed to be zero. Use the LSB to - * indicate whether the timer is deferrable. - * - * A deferrable timer will work normally when the system is busy, but - * will not cause a CPU to come out of idle just to service it; instead, - * the timer will be serviced when the CPU eventually wakes up with a - * subsequent non-deferrable timer. - */ -#define TBASE_DEFERRABLE_FLAG (0x1) - /* Functions below help us manage 'deferrable' flag */ static inline unsigned int tbase_get_deferrable(struct tvec_base *base) { @@ -113,8 +101,7 @@ static inline struct tvec_base *tbase_get_base(struct tvec_base *base) static inline void timer_set_deferrable(struct timer_list *timer) { - timer->base = ((struct tvec_base *)((unsigned long)(timer->base) | - TBASE_DEFERRABLE_FLAG)); + timer->base = TBASE_MAKE_DEFERRED(timer->base); } static inline void -- cgit v1.2.3 From 20f33a03f0cf87e51165f7084f697acfb68e865b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 20 Oct 2010 15:57:34 -0700 Subject: posix-timers: Annotate lock_timer() lock_timer() conditionally grabs it_lock in case of returning non-NULL but unlock_timer() releases it unconditionally. This leads sparse to complain about the lock context imbalance. Rename and wrap lock_timer using __cond_lock() macro to make sparse happy. Signed-off-by: Namhyung Kim Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- kernel/posix-timers.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 9ca4973f736..93bd2eb2bc5 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -145,7 +145,13 @@ static int common_timer_del(struct k_itimer *timer); static enum hrtimer_restart posix_timer_fn(struct hrtimer *data); -static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags); +static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags); + +#define lock_timer(tid, flags) \ +({ struct k_itimer *__timr; \ + __cond_lock(&__timr->it_lock, __timr = __lock_timer(tid, flags)); \ + __timr; \ +}) static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) { @@ -619,7 +625,7 @@ out: * the find to the timer lock. To avoid a dead lock, the timer id MUST * be release with out holding the timer lock. */ -static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags) +static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags) { struct k_itimer *timr; /* -- cgit v1.2.3 From 6f1bc451e6a79470b122a37ee1fc6bbca450f444 Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Wed, 20 Oct 2010 15:57:31 -0700 Subject: timer: Make try_to_del_timer_sync() the same on SMP and UP On UP try_to_del_timer_sync() is mapped to del_timer() which does not take the running timer callback into account, so it has different semantics. Remove the SMP dependency of try_to_del_timer_sync() by using base->running_timer in the UP case as well. [ tglx: Removed set_running_timer() inline and tweaked the changelog ] Signed-off-by: Yong Zhang Cc: Ingo Molnar Cc: Peter Zijlstra Acked-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/timer.h | 4 ++-- kernel/timer.c | 17 +++-------------- 2 files changed, 5 insertions(+), 16 deletions(-) (limited to 'kernel') diff --git a/include/linux/timer.h b/include/linux/timer.h index cbfb7a355d3..6abd9138bed 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -274,11 +274,11 @@ static inline void timer_stats_timer_clear_start_info(struct timer_list *timer) extern void add_timer(struct timer_list *timer); +extern int try_to_del_timer_sync(struct timer_list *timer); + #ifdef CONFIG_SMP - extern int try_to_del_timer_sync(struct timer_list *timer); extern int del_timer_sync(struct timer_list *timer); #else -# define try_to_del_timer_sync(t) del_timer(t) # define del_timer_sync(t) del_timer(t) #endif diff --git a/kernel/timer.c b/kernel/timer.c index 72853b256ff..47b86c1e322 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -330,15 +330,6 @@ void set_timer_slack(struct timer_list *timer, int slack_hz) } EXPORT_SYMBOL_GPL(set_timer_slack); - -static inline void set_running_timer(struct tvec_base *base, - struct timer_list *timer) -{ -#ifdef CONFIG_SMP - base->running_timer = timer; -#endif -} - static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) { unsigned long expires = timer->expires; @@ -923,15 +914,12 @@ int del_timer(struct timer_list *timer) } EXPORT_SYMBOL(del_timer); -#ifdef CONFIG_SMP /** * try_to_del_timer_sync - Try to deactivate a timer * @timer: timer do del * * This function tries to deactivate a timer. Upon successful (ret >= 0) * exit the timer is not queued and the handler is not running on any CPU. - * - * It must not be called from interrupt contexts. */ int try_to_del_timer_sync(struct timer_list *timer) { @@ -960,6 +948,7 @@ out: } EXPORT_SYMBOL(try_to_del_timer_sync); +#ifdef CONFIG_SMP /** * del_timer_sync - deactivate a timer and wait for the handler to finish. * @timer: the timer to be deactivated @@ -1098,7 +1087,7 @@ static inline void __run_timers(struct tvec_base *base) timer_stats_account_timer(timer); - set_running_timer(base, timer); + base->running_timer = timer; detach_timer(timer, 1); spin_unlock_irq(&base->lock); @@ -1106,7 +1095,7 @@ static inline void __run_timers(struct tvec_base *base) spin_lock_irq(&base->lock); } } - set_running_timer(base, NULL); + base->running_timer = NULL; spin_unlock_irq(&base->lock); } -- cgit v1.2.3 From 1118e2cd33d47254854e1ba3ba8e32802ff14fdf Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Wed, 20 Oct 2010 15:57:32 -0700 Subject: timer: Del_timer_sync() can be used in softirq context Actually we have used del_timer_sync() in softirq context for a long time, e.g. in __dst_free()::cancel_delayed_work(). So change the comments of it to warn on hardirq context only, and make lockdep know about this change. Signed-off-by: Yong Zhang Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- kernel/timer.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/timer.c b/kernel/timer.c index 47b86c1e322..612de0306e7 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -959,7 +959,7 @@ EXPORT_SYMBOL(try_to_del_timer_sync); * * Synchronization rules: Callers must prevent restarting of the timer, * otherwise this function is meaningless. It must not be called from - * interrupt contexts. The caller must not hold locks which would prevent + * hardirq contexts. The caller must not hold locks which would prevent * completion of the timer's handler. The timer's handler must not call * add_timer_on(). Upon exit the timer is not queued and the handler is * not running on any CPU. @@ -969,12 +969,10 @@ EXPORT_SYMBOL(try_to_del_timer_sync); int del_timer_sync(struct timer_list *timer) { #ifdef CONFIG_LOCKDEP - unsigned long flags; - - local_irq_save(flags); + local_bh_disable(); lock_map_acquire(&timer->lockdep_map); lock_map_release(&timer->lockdep_map); - local_irq_restore(flags); + local_bh_enable(); #endif for (;;) { -- cgit v1.2.3 From 466bd3030973910118ca601da8072be97a1e2209 Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Wed, 20 Oct 2010 15:57:33 -0700 Subject: timer: Warn when del_timer_sync() is called in hardirq context Add explict warning when del_timer_sync() is called in hardirq context. Signed-off-by: Yong Zhang Cc: Ingo Molnar Cc: Peter Zijlstra Acked-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- kernel/timer.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/timer.c b/kernel/timer.c index 612de0306e7..483e54ba5c9 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -974,7 +974,11 @@ int del_timer_sync(struct timer_list *timer) lock_map_release(&timer->lockdep_map); local_bh_enable(); #endif - + /* + * don't use it in hardirq context, because it + * could lead to deadlock. + */ + WARN_ON(in_irq()); for (;;) { int ret = try_to_del_timer_sync(timer); if (ret >= 0) -- cgit v1.2.3 From 998adc3dda59f811966b3ccb21eb223680b25ec4 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 20 Sep 2010 19:19:17 -0700 Subject: hrtimers: Convert hrtimers to use timerlist infrastructure Converts the hrtimer code to use the new timerlist infrastructure Signed-off-by: John Stultz LKML Reference: <1290136329-18291-3-git-send-email-john.stultz@linaro.org> Reviewed-by: Thomas Gleixner CC: Alessandro Zummo CC: Thomas Gleixner CC: Richard Cochran --- include/linux/hrtimer.h | 32 +++++++++--------- kernel/hrtimer.c | 86 +++++++++++++++++------------------------------- kernel/time/timer_list.c | 8 ++--- 3 files changed, 49 insertions(+), 77 deletions(-) (limited to 'kernel') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index dd9954b7934..330586ffffb 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -22,7 +22,7 @@ #include #include #include - +#include struct hrtimer_clock_base; struct hrtimer_cpu_base; @@ -79,8 +79,8 @@ enum hrtimer_restart { /** * struct hrtimer - the basic hrtimer structure - * @node: red black tree node for time ordered insertion - * @_expires: the absolute expiry time in the hrtimers internal + * @node: timerqueue node, which also manages node.expires, + * the absolute expiry time in the hrtimers internal * representation. The time is related to the clock on * which the timer is based. Is setup by adding * slack to the _softexpires value. For non range timers @@ -101,8 +101,7 @@ enum hrtimer_restart { * The hrtimer structure must be initialized by hrtimer_init() */ struct hrtimer { - struct rb_node node; - ktime_t _expires; + struct timerqueue_node node; ktime_t _softexpires; enum hrtimer_restart (*function)(struct hrtimer *); struct hrtimer_clock_base *base; @@ -141,8 +140,7 @@ struct hrtimer_sleeper { struct hrtimer_clock_base { struct hrtimer_cpu_base *cpu_base; clockid_t index; - struct rb_root active; - struct rb_node *first; + struct timerqueue_head active; ktime_t resolution; ktime_t (*get_time)(void); ktime_t softirq_time; @@ -183,43 +181,43 @@ struct hrtimer_cpu_base { static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) { - timer->_expires = time; + timer->node.expires = time; timer->_softexpires = time; } static inline void hrtimer_set_expires_range(struct hrtimer *timer, ktime_t time, ktime_t delta) { timer->_softexpires = time; - timer->_expires = ktime_add_safe(time, delta); + timer->node.expires = ktime_add_safe(time, delta); } static inline void hrtimer_set_expires_range_ns(struct hrtimer *timer, ktime_t time, unsigned long delta) { timer->_softexpires = time; - timer->_expires = ktime_add_safe(time, ns_to_ktime(delta)); + timer->node.expires = ktime_add_safe(time, ns_to_ktime(delta)); } static inline void hrtimer_set_expires_tv64(struct hrtimer *timer, s64 tv64) { - timer->_expires.tv64 = tv64; + timer->node.expires.tv64 = tv64; timer->_softexpires.tv64 = tv64; } static inline void hrtimer_add_expires(struct hrtimer *timer, ktime_t time) { - timer->_expires = ktime_add_safe(timer->_expires, time); + timer->node.expires = ktime_add_safe(timer->node.expires, time); timer->_softexpires = ktime_add_safe(timer->_softexpires, time); } static inline void hrtimer_add_expires_ns(struct hrtimer *timer, u64 ns) { - timer->_expires = ktime_add_ns(timer->_expires, ns); + timer->node.expires = ktime_add_ns(timer->node.expires, ns); timer->_softexpires = ktime_add_ns(timer->_softexpires, ns); } static inline ktime_t hrtimer_get_expires(const struct hrtimer *timer) { - return timer->_expires; + return timer->node.expires; } static inline ktime_t hrtimer_get_softexpires(const struct hrtimer *timer) @@ -229,7 +227,7 @@ static inline ktime_t hrtimer_get_softexpires(const struct hrtimer *timer) static inline s64 hrtimer_get_expires_tv64(const struct hrtimer *timer) { - return timer->_expires.tv64; + return timer->node.expires.tv64; } static inline s64 hrtimer_get_softexpires_tv64(const struct hrtimer *timer) { @@ -238,12 +236,12 @@ static inline s64 hrtimer_get_softexpires_tv64(const struct hrtimer *timer) static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer) { - return ktime_to_ns(timer->_expires); + return ktime_to_ns(timer->node.expires); } static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer) { - return ktime_sub(timer->_expires, timer->base->get_time()); + return ktime_sub(timer->node.expires, timer->base->get_time()); } #ifdef CONFIG_HIGH_RES_TIMERS diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index ce669174f35..93976ad42f5 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -516,10 +516,13 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { struct hrtimer *timer; + struct timerqueue_node *next; - if (!base->first) + next = timerqueue_getnext(&base->active); + if (!next) continue; - timer = rb_entry(base->first, struct hrtimer, node); + timer = container_of(next, struct hrtimer, node); + expires = ktime_sub(hrtimer_get_expires(timer), base->offset); /* * clock_was_set() has changed base->offset so the @@ -840,48 +843,17 @@ EXPORT_SYMBOL_GPL(hrtimer_forward); static int enqueue_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) { - struct rb_node **link = &base->active.rb_node; - struct rb_node *parent = NULL; - struct hrtimer *entry; - int leftmost = 1; - debug_activate(timer); - /* - * Find the right place in the rbtree: - */ - while (*link) { - parent = *link; - entry = rb_entry(parent, struct hrtimer, node); - /* - * We dont care about collisions. Nodes with - * the same expiry time stay together. - */ - if (hrtimer_get_expires_tv64(timer) < - hrtimer_get_expires_tv64(entry)) { - link = &(*link)->rb_left; - } else { - link = &(*link)->rb_right; - leftmost = 0; - } - } + timerqueue_add(&base->active, &timer->node); - /* - * Insert the timer to the rbtree and check whether it - * replaces the first pending timer - */ - if (leftmost) - base->first = &timer->node; - - rb_link_node(&timer->node, parent, link); - rb_insert_color(&timer->node, &base->active); /* * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the * state of a possibly running callback. */ timer->state |= HRTIMER_STATE_ENQUEUED; - return leftmost; + return (&timer->node == base->active.next); } /* @@ -901,12 +873,7 @@ static void __remove_hrtimer(struct hrtimer *timer, if (!(timer->state & HRTIMER_STATE_ENQUEUED)) goto out; - /* - * Remove the timer from the rbtree and replace the first - * entry pointer if necessary. - */ - if (base->first == &timer->node) { - base->first = rb_next(&timer->node); + if (&timer->node == timerqueue_getnext(&base->active)) { #ifdef CONFIG_HIGH_RES_TIMERS /* Reprogram the clock event device. if enabled */ if (reprogram && hrtimer_hres_active()) { @@ -919,7 +886,7 @@ static void __remove_hrtimer(struct hrtimer *timer, } #endif } - rb_erase(&timer->node, &base->active); + timerqueue_del(&base->active, &timer->node); out: timer->state = newstate; } @@ -1123,11 +1090,13 @@ ktime_t hrtimer_get_next_event(void) if (!hrtimer_hres_active()) { for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { struct hrtimer *timer; + struct timerqueue_node *next; - if (!base->first) + next = timerqueue_getnext(&base->active); + if (!next) continue; - timer = rb_entry(base->first, struct hrtimer, node); + timer = container_of(next, struct hrtimer, node); delta.tv64 = hrtimer_get_expires_tv64(timer); delta = ktime_sub(delta, base->get_time()); if (delta.tv64 < mindelta.tv64) @@ -1157,6 +1126,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, timer->base = &cpu_base->clock_base[clock_id]; hrtimer_init_timer_hres(timer); + timerqueue_init(&timer->node); #ifdef CONFIG_TIMER_STATS timer->start_site = NULL; @@ -1270,14 +1240,14 @@ retry: for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { ktime_t basenow; - struct rb_node *node; + struct timerqueue_node *node; basenow = ktime_add(now, base->offset); - while ((node = base->first)) { + while ((node = timerqueue_getnext(&base->active))) { struct hrtimer *timer; - timer = rb_entry(node, struct hrtimer, node); + timer = container_of(node, struct hrtimer, node); /* * The immediate goal for using the softexpires is @@ -1433,7 +1403,7 @@ void hrtimer_run_pending(void) */ void hrtimer_run_queues(void) { - struct rb_node *node; + struct timerqueue_node *node; struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); struct hrtimer_clock_base *base; int index, gettime = 1; @@ -1442,9 +1412,11 @@ void hrtimer_run_queues(void) return; for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) { - base = &cpu_base->clock_base[index]; + struct timerqueue_node *next; - if (!base->first) + base = &cpu_base->clock_base[index]; + next = timerqueue_getnext(&base->active); + if (!next) continue; if (gettime) { @@ -1454,10 +1426,10 @@ void hrtimer_run_queues(void) raw_spin_lock(&cpu_base->lock); - while ((node = base->first)) { + while ((node = next)) { struct hrtimer *timer; - timer = rb_entry(node, struct hrtimer, node); + timer = container_of(node, struct hrtimer, node); if (base->softirq_time.tv64 <= hrtimer_get_expires_tv64(timer)) break; @@ -1622,8 +1594,10 @@ static void __cpuinit init_hrtimers_cpu(int cpu) raw_spin_lock_init(&cpu_base->lock); - for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) + for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { cpu_base->clock_base[i].cpu_base = cpu_base; + timerqueue_init_head(&cpu_base->clock_base[i].active); + } hrtimer_init_hres(cpu_base); } @@ -1634,10 +1608,10 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, struct hrtimer_clock_base *new_base) { struct hrtimer *timer; - struct rb_node *node; + struct timerqueue_node *node; - while ((node = rb_first(&old_base->active))) { - timer = rb_entry(node, struct hrtimer, node); + while ((node = timerqueue_getnext(&old_base->active))) { + timer = container_of(node, struct hrtimer, node); BUG_ON(hrtimer_callback_running(timer)); debug_deactivate(timer); diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index ab8f5e33fa9..32a19f9397f 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -79,26 +79,26 @@ print_active_timers(struct seq_file *m, struct hrtimer_clock_base *base, { struct hrtimer *timer, tmp; unsigned long next = 0, i; - struct rb_node *curr; + struct timerqueue_node *curr; unsigned long flags; next_one: i = 0; raw_spin_lock_irqsave(&base->cpu_base->lock, flags); - curr = base->first; + curr = timerqueue_getnext(&base->active); /* * Crude but we have to do this O(N*N) thing, because * we have to unlock the base when printing: */ while (curr && i < next) { - curr = rb_next(curr); + curr = timerqueue_iterate_next(curr); i++; } if (curr) { - timer = rb_entry(curr, struct hrtimer, node); + timer = container_of(curr, struct hrtimer, node); tmp = *timer; raw_spin_unlock_irqrestore(&base->cpu_base->lock, flags); -- cgit v1.2.3 From b007c389d3e09b823eccda1503390fa2a9adca0d Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 10 Dec 2010 22:19:53 -0800 Subject: hrtimer: fix timerqueue conversion flub In converting the hrtimers to timerqueue, I missed a spot in hrtimer_run_queues where we loop running timers. We end up not pulling the new next value out and instead just use the last next value, causing boot time hangs in some cases. The proper fix is to pull timerqueue_getnext each iteration instead of using a local next value. Reported-by: Ingo Molnar Signed-off-by: John Stultz --- kernel/hrtimer.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 93976ad42f5..7a7a2061c24 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1412,11 +1412,8 @@ void hrtimer_run_queues(void) return; for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) { - struct timerqueue_node *next; - base = &cpu_base->clock_base[index]; - next = timerqueue_getnext(&base->active); - if (!next) + if (!timerqueue_getnext(&base->active)) continue; if (gettime) { @@ -1426,7 +1423,7 @@ void hrtimer_run_queues(void) raw_spin_lock(&cpu_base->lock); - while ((node = next)) { + while ((node = timerqueue_getnext(&base->active))) { struct hrtimer *timer; timer = container_of(node, struct hrtimer, node); -- cgit v1.2.3 From 7496351ad87e61e96b49dd7b43c6534e3401f566 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 30 Nov 2010 14:05:53 -0600 Subject: timers: Use this_cpu_read Eric asked for this. [tglx: Because it generates faster code according to Erics ] Signed-off-by: Christoph Lameter Cc: Pekka Enberg Cc: Eric Dumazet Cc: Mathieu Desnoyers Cc: Tejun Heo Cc: linux-mm@kvack.org LKML-Reference: Signed-off-by: Thomas Gleixner --- kernel/timer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/timer.c b/kernel/timer.c index 483e54ba5c9..beb97fd11ac 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1227,7 +1227,7 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now, */ unsigned long get_next_timer_interrupt(unsigned long now) { - struct tvec_base *base = __get_cpu_var(tvec_bases); + struct tvec_base *base = __this_cpu_read(tvec_bases); unsigned long expires; spin_lock(&base->lock); @@ -1267,7 +1267,7 @@ void update_process_times(int user_tick) */ static void run_timer_softirq(struct softirq_action *h) { - struct tvec_base *base = __get_cpu_var(tvec_bases); + struct tvec_base *base = __this_cpu_read(tvec_bases); hrtimer_run_pending(); -- cgit v1.2.3