diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-05-23 17:05:55 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-05-23 17:05:55 -0700 |
commit | 6e01f86fb2025111c77101254f1442ac137089cd (patch) | |
tree | da88da855fe6d2ebb46db7835f599562c50d8387 | |
parent | fcfde8a7cf6d5e347ce61d8e5c0aee52926ef8e9 (diff) | |
parent | 317f29c14d0cca09952f1022491454b23455ebcb (diff) |
Merge tag 'timers-core-2022-05-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull timer and timekeeping updates from Thomas Gleixner:
- Expose CLOCK_TAI to instrumentation to aid with TSN debugging.
- Ensure that the clockevent is stopped when there is no timer armed to
avoid pointless wakeups.
- Make the sched clock frequency handling and rounding consistent.
- Provide a better debugobject hint for delayed works. The timer
callback is always the same, which makes it difficult to identify the
underlying work. Use the work function as a hint instead.
- Move the timer specific sysctl code into the timer subsystem.
- The usual set of improvements and cleanups
* tag 'timers-core-2022-05-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
timers: Provide a better debugobjects hint for delayed works
time/sched_clock: Fix formatting of frequency reporting code
time/sched_clock: Use Hz as the unit for clock rate reporting below 4kHz
time/sched_clock: Round the frequency reported to nearest rather than down
timekeeping: Consolidate fast timekeeper
timekeeping: Annotate ktime_get_boot_fast_ns() with data_race()
timers/nohz: Switch to ONESHOT_STOPPED in the low-res handler when the tick is stopped
timekeeping: Introduce fast accessor to clock tai
tracing/timer: Add missing argument documentation of trace points
clocksource: Replace cpumask_weight() with cpumask_empty()
timers: Move timer sysctl into the timer code
clockevents: Use dedicated list iterator variable
timers: Simplify calc_index()
timers: Initialize base::next_expiry_recalc in timers_prepare_cpu()
-rw-r--r-- | Documentation/core-api/timekeeping.rst | 1 | ||||
-rw-r--r-- | include/linux/timekeeping.h | 1 | ||||
-rw-r--r-- | include/linux/timer.h | 8 | ||||
-rw-r--r-- | include/trace/events/timer.h | 5 | ||||
-rw-r--r-- | kernel/sysctl.c | 11 | ||||
-rw-r--r-- | kernel/time/clockevents.c | 9 | ||||
-rw-r--r-- | kernel/time/clocksource.c | 2 | ||||
-rw-r--r-- | kernel/time/sched_clock.c | 13 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 12 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 39 | ||||
-rw-r--r-- | kernel/time/timer.c | 88 |
11 files changed, 127 insertions, 62 deletions
diff --git a/Documentation/core-api/timekeeping.rst b/Documentation/core-api/timekeeping.rst index 729e24864fe7..22ec68f24421 100644 --- a/Documentation/core-api/timekeeping.rst +++ b/Documentation/core-api/timekeeping.rst @@ -132,6 +132,7 @@ Some additional variants exist for more specialized cases: .. c:function:: u64 ktime_get_mono_fast_ns( void ) u64 ktime_get_raw_fast_ns( void ) u64 ktime_get_boot_fast_ns( void ) + u64 ktime_get_tai_fast_ns( void ) u64 ktime_get_real_fast_ns( void ) These variants are safe to call from any context, including from diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 78a98bdff76d..fe1e467ba046 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -177,6 +177,7 @@ static inline u64 ktime_get_raw_ns(void) extern u64 ktime_get_mono_fast_ns(void); extern u64 ktime_get_raw_fast_ns(void); extern u64 ktime_get_boot_fast_ns(void); +extern u64 ktime_get_tai_fast_ns(void); extern u64 ktime_get_real_fast_ns(void); /* diff --git a/include/linux/timer.h b/include/linux/timer.h index fda13c9d1256..648f00105f58 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -196,14 +196,6 @@ extern void init_timers(void); struct hrtimer; extern enum hrtimer_restart it_real_fn(struct hrtimer *); -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) -struct ctl_table; - -extern unsigned int sysctl_timer_migration; -int timer_migration_handler(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); -#endif - unsigned long __round_jiffies(unsigned long j, int cpu); unsigned long __round_jiffies_relative(unsigned long j, int cpu); unsigned long round_jiffies(unsigned long j); diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 6ad031c71be7..2e713a7d9aa3 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -48,6 +48,7 @@ DEFINE_EVENT(timer_class, timer_init, * timer_start - called when the timer is started * @timer: pointer to struct timer_list * @expires: the timers expiry time + * @flags: the timers flags */ TRACE_EVENT(timer_start, @@ -84,6 +85,7 @@ TRACE_EVENT(timer_start, /** * timer_expire_entry - called immediately before the timer callback * @timer: pointer to struct timer_list + * @baseclk: value of timer_base::clk when timer expires * * Allows to determine the timer latency. */ @@ -190,7 +192,8 @@ TRACE_EVENT(hrtimer_init, /** * hrtimer_start - called when the hrtimer is started - * @hrtimer: pointer to struct hrtimer + * @hrtimer: pointer to struct hrtimer + * @mode: the hrtimers mode */ TRACE_EVENT(hrtimer_start, diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 830aaf8ca08e..5b7b1a82ae6a 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2288,17 +2288,6 @@ static struct ctl_table kern_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) - { - .procname = "timer_migration", - .data = &sysctl_timer_migration, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = timer_migration_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -#endif #ifdef CONFIG_BPF_SYSCALL { .procname = "unprivileged_bpf_disabled", diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 003ccf338d20..5d85014d59b5 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -690,7 +690,7 @@ static ssize_t unbind_device_store(struct device *dev, { char name[CS_NAME_LEN]; ssize_t ret = sysfs_get_uname(buf, name, count); - struct clock_event_device *ce; + struct clock_event_device *ce = NULL, *iter; if (ret < 0) return ret; @@ -698,9 +698,10 @@ static ssize_t unbind_device_store(struct device *dev, ret = -ENODEV; mutex_lock(&clockevents_mutex); raw_spin_lock_irq(&clockevents_lock); - list_for_each_entry(ce, &clockevent_devices, list) { - if (!strcmp(ce->name, name)) { - ret = __clockevents_try_unbind(ce, dev->id); + list_for_each_entry(iter, &clockevent_devices, list) { + if (!strcmp(iter->name, name)) { + ret = __clockevents_try_unbind(iter, dev->id); + ce = iter; break; } } diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 95d7ca35bdf2..cee5da1e54c4 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -343,7 +343,7 @@ void clocksource_verify_percpu(struct clocksource *cs) cpus_read_lock(); preempt_disable(); clocksource_verify_choose_cpus(); - if (cpumask_weight(&cpus_chosen) == 0) { + if (cpumask_empty(&cpus_chosen)) { preempt_enable(); cpus_read_unlock(); pr_warn("Not enough CPUs to check clocksource '%s'.\n", cs->name); diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index b1b9b12899f5..8464c5acc913 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -8,6 +8,7 @@ #include <linux/jiffies.h> #include <linux/ktime.h> #include <linux/kernel.h> +#include <linux/math.h> #include <linux/moduleparam.h> #include <linux/sched.h> #include <linux/sched/clock.h> @@ -199,15 +200,13 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) r = rate; if (r >= 4000000) { - r /= 1000000; + r = DIV_ROUND_CLOSEST(r, 1000000); r_unit = 'M'; + } else if (r >= 4000) { + r = DIV_ROUND_CLOSEST(r, 1000); + r_unit = 'k'; } else { - if (r >= 1000) { - r /= 1000; - r_unit = 'k'; - } else { - r_unit = ' '; - } + r_unit = ' '; } /* Calculate the ns resolution of this counter */ diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d257721c68b8..58a11f859ac7 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -928,6 +928,8 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu) if (unlikely(expires == KTIME_MAX)) { if (ts->nohz_mode == NOHZ_MODE_HIGHRES) hrtimer_cancel(&ts->sched_timer); + else + tick_program_event(KTIME_MAX, 1); return; } @@ -1364,9 +1366,15 @@ static void tick_nohz_handler(struct clock_event_device *dev) tick_sched_do_timer(ts, now); tick_sched_handle(ts, regs); - /* No need to reprogram if we are running tickless */ - if (unlikely(ts->tick_stopped)) + if (unlikely(ts->tick_stopped)) { + /* + * The clockevent device is not reprogrammed, so change the + * clock event device to ONESHOT_STOPPED to avoid spurious + * interrupts on devices which might not be truly one shot. + */ + tick_program_event(KTIME_MAX, 1); return; + } hrtimer_forward(&ts->sched_timer, now, TICK_NSEC); tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 3b1398fbddaf..4ab9949772d5 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -429,6 +429,14 @@ static void update_fast_timekeeper(const struct tk_read_base *tkr, memcpy(base + 1, base, sizeof(*base)); } +static __always_inline u64 fast_tk_get_delta_ns(struct tk_read_base *tkr) +{ + u64 delta, cycles = tk_clock_read(tkr); + + delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask); + return timekeeping_delta_to_ns(tkr, delta); +} + static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) { struct tk_read_base *tkr; @@ -439,12 +447,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) seq = raw_read_seqcount_latch(&tkf->seq); tkr = tkf->base + (seq & 0x01); now = ktime_to_ns(tkr->base); - - now += timekeeping_delta_to_ns(tkr, - clocksource_delta( - tk_clock_read(tkr), - tkr->cycle_last, - tkr->mask)); + now += fast_tk_get_delta_ns(tkr); } while (read_seqcount_latch_retry(&tkf->seq, seq)); return now; @@ -528,10 +531,27 @@ u64 notrace ktime_get_boot_fast_ns(void) { struct timekeeper *tk = &tk_core.timekeeper; - return (ktime_get_mono_fast_ns() + ktime_to_ns(tk->offs_boot)); + return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_boot))); } EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns); +/** + * ktime_get_tai_fast_ns - NMI safe and fast access to tai clock. + * + * The same limitations as described for ktime_get_boot_fast_ns() apply. The + * mono time and the TAI offset are not read atomically which may yield wrong + * readouts. However, an update of the TAI offset is an rare event e.g., caused + * by settime or adjtimex with an offset. The user of this function has to deal + * with the possibility of wrong timestamps in post processing. + */ +u64 notrace ktime_get_tai_fast_ns(void) +{ + struct timekeeper *tk = &tk_core.timekeeper; + + return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_tai))); +} +EXPORT_SYMBOL_GPL(ktime_get_tai_fast_ns); + static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono) { struct tk_read_base *tkr; @@ -543,10 +563,7 @@ static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono) tkr = tkf->base + (seq & 0x01); basem = ktime_to_ns(tkr->base); baser = ktime_to_ns(tkr->base_real); - - delta = timekeeping_delta_to_ns(tkr, - clocksource_delta(tk_clock_read(tkr), - tkr->cycle_last, tkr->mask)); + delta = fast_tk_get_delta_ns(tkr); } while (read_seqcount_latch_retry(&tkf->seq, seq)); if (mono) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 9dd2a39cb3b0..a0666d948147 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -44,6 +44,7 @@ #include <linux/slab.h> #include <linux/compat.h> #include <linux/random.h> +#include <linux/sysctl.h> #include <linux/uaccess.h> #include <asm/unistd.h> @@ -223,7 +224,7 @@ static void timer_update_keys(struct work_struct *work); static DECLARE_WORK(timer_update_work, timer_update_keys); #ifdef CONFIG_SMP -unsigned int sysctl_timer_migration = 1; +static unsigned int sysctl_timer_migration = 1; DEFINE_STATIC_KEY_FALSE(timers_migration_enabled); @@ -234,7 +235,42 @@ static void timers_update_migration(void) else static_branch_disable(&timers_migration_enabled); } -#else + +#ifdef CONFIG_SYSCTL +static int timer_migration_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + int ret; + + mutex_lock(&timer_keys_mutex); + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (!ret && write) + timers_update_migration(); + mutex_unlock(&timer_keys_mutex); + return ret; +} + +static struct ctl_table timer_sysctl[] = { + { + .procname = "timer_migration", + .data = &sysctl_timer_migration, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = timer_migration_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + {} +}; + +static int __init timer_sysctl_init(void) +{ + register_sysctl("kernel", timer_sysctl); + return 0; +} +device_initcall(timer_sysctl_init); +#endif /* CONFIG_SYSCTL */ +#else /* CONFIG_SMP */ static inline void timers_update_migration(void) { } #endif /* !CONFIG_SMP */ @@ -251,19 +287,6 @@ void timers_update_nohz(void) schedule_work(&timer_update_work); } -int timer_migration_handler(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - int ret; - - mutex_lock(&timer_keys_mutex); - ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); - if (!ret && write) - timers_update_migration(); - mutex_unlock(&timer_keys_mutex); - return ret; -} - static inline bool is_timers_nohz_active(void) { return static_branch_unlikely(&timers_nohz_active); @@ -502,7 +525,7 @@ static inline unsigned calc_index(unsigned long expires, unsigned lvl, * * Round up with level granularity to prevent this. */ - expires = (expires + LVL_GRAN(lvl)) >> LVL_SHIFT(lvl); + expires = (expires >> LVL_SHIFT(lvl)) + 1; *bucket_expiry = expires << LVL_SHIFT(lvl); return LVL_OFFS(lvl) + (expires & LVL_MASK); } @@ -615,9 +638,39 @@ static void internal_add_timer(struct timer_base *base, struct timer_list *timer static const struct debug_obj_descr timer_debug_descr; +struct timer_hint { + void (*function)(struct timer_list *t); + long offset; +}; + +#define TIMER_HINT(fn, container, timr, hintfn) \ + { \ + .function = fn, \ + .offset = offsetof(container, hintfn) - \ + offsetof(container, timr) \ + } + +static const struct timer_hint timer_hints[] = { + TIMER_HINT(delayed_work_timer_fn, + struct delayed_work, timer, work.func), + TIMER_HINT(kthread_delayed_work_timer_fn, + struct kthread_delayed_work, timer, work.func), +}; + static void *timer_debug_hint(void *addr) { - return ((struct timer_list *) addr)->function; + struct timer_list *timer = addr; + int i; + + for (i = 0; i < ARRAY_SIZE(timer_hints); i++) { + if (timer_hints[i].function == timer->function) { + void (**fn)(void) = addr + timer_hints[i].offset; + + return *fn; + } + } + + return timer->function; } static bool timer_is_static_object(void *addr) @@ -1953,6 +2006,7 @@ int timers_prepare_cpu(unsigned int cpu) base = per_cpu_ptr(&timer_bases[b], cpu); base->clk = jiffies; base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA; + base->next_expiry_recalc = false; base->timers_pending = false; base->is_idle = false; } |