diff options
author | Heiko Carstens <hca@linux.ibm.com> | 2020-12-04 12:32:42 +0100 |
---|---|---|
committer | Heiko Carstens <hca@linux.ibm.com> | 2020-12-16 14:55:49 +0100 |
commit | dd6cfe553212b6311cac27ecd2f67621857fb10a (patch) | |
tree | 75a5501adcd811dd9647cd7835d98fafdee32b8c /arch/s390/lib | |
parent | 91c2bad6aec98a51663ce9eec7c4825c1ca0b870 (diff) |
s390/delay: simplify udelay
udelay is implemented by using quite subtle details to make it
possible to load an idle psw and waiting for an interrupt even in irq
context or when interrupts are disabled. Also handling (or better: no
handling) of softirqs is taken into account.
All this is done to optimize for something which should in normal
circumstances never happen: calling udelay to busy wait. Therefore get
rid of the whole complexity and just busy loop like other
architectures are doing it also.
It could have been possible to use diag 0x44 instead of cpu_relax() in
the busy loop, however we have seen too many bad things happen with
diag 0x44 that it seems to be better to simply busy loop.
Also note that with this new implementation kernel preemption does
work when within the udelay loop. This did not work before.
To get a feeling what the former code optimizes for: IPL'ing a kernel
with 'defconfig' and afterwards compiling a kernel ends with a total
of zero udelay calls.
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Diffstat (limited to 'arch/s390/lib')
-rw-r--r-- | arch/s390/lib/delay.c | 99 |
1 files changed, 17 insertions, 82 deletions
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index 68d61f2835df..928b1dbe182c 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -39,79 +39,24 @@ void __delay(unsigned long loops) } EXPORT_SYMBOL(__delay); -static void __udelay_disabled(unsigned long long usecs) +static void delay_loop(unsigned long delta, bool simple) { - unsigned long cr0, cr0_new, psw_mask; - struct s390_idle_data idle; - u64 end; + unsigned long end; - end = get_tod_clock() + (usecs << 12); - __ctl_store(cr0, 0, 0); - cr0_new = cr0 & ~CR0_IRQ_SUBCLASS_MASK; - cr0_new |= (1UL << (63 - 52)); /* enable clock comparator irq */ - __ctl_load(cr0_new, 0, 0); - psw_mask = __extract_psw() | PSW_MASK_EXT | PSW_MASK_WAIT; - set_clock_comparator(end); - set_cpu_flag(CIF_IGNORE_IRQ); - psw_idle(&idle, psw_mask); - trace_hardirqs_off(); - clear_cpu_flag(CIF_IGNORE_IRQ); - set_clock_comparator(S390_lowcore.clock_comparator); - __ctl_load(cr0, 0, 0); + if (static_branch_likely(&udelay_ready) && !simple) { + end = get_tod_clock_monotonic() + delta; + while (!tod_after(get_tod_clock_monotonic(), end)) + cpu_relax(); + } else { + end = get_tod_clock() + delta; + while (!tod_after(get_tod_clock(), end)) + cpu_relax(); + } } -static void __udelay_enabled(unsigned long long usecs) +void __udelay(unsigned long usecs) { - u64 clock_saved, end; - - end = get_tod_clock_fast() + (usecs << 12); - do { - clock_saved = 0; - if (tod_after(S390_lowcore.clock_comparator, end)) { - clock_saved = local_tick_disable(); - set_clock_comparator(end); - } - enabled_wait(); - if (clock_saved) - local_tick_enable(clock_saved); - } while (get_tod_clock_fast() < end); -} - -/* - * Waits for 'usecs' microseconds using the TOD clock comparator. - */ -void __udelay(unsigned long long usecs) -{ - unsigned long flags; - - if (!static_branch_likely(&udelay_ready)) { - udelay_simple(usecs); - return; - } - - preempt_disable(); - local_irq_save(flags); - if (in_irq()) { - __udelay_disabled(usecs); - goto out; - } - if (in_softirq()) { - if (raw_irqs_disabled_flags(flags)) - __udelay_disabled(usecs); - else - __udelay_enabled(usecs); - goto out; - } - if (raw_irqs_disabled_flags(flags)) { - local_bh_disable(); - __udelay_disabled(usecs); - _local_bh_enable(); - goto out; - } - __udelay_enabled(usecs); -out: - local_irq_restore(flags); - preempt_enable(); + delay_loop(usecs << 12, 0); } EXPORT_SYMBOL(__udelay); @@ -119,25 +64,15 @@ EXPORT_SYMBOL(__udelay); * Simple udelay variant. To be used on startup and reboot * when the interrupt handler isn't working. */ -void udelay_simple(unsigned long long usecs) +void udelay_simple(unsigned long usecs) { - u64 end; - - end = get_tod_clock_fast() + (usecs << 12); - while (get_tod_clock_fast() < end) - cpu_relax(); + delay_loop(usecs << 12, 1); } -void __ndelay(unsigned long long nsecs) +void __ndelay(unsigned long nsecs) { - u64 end; - nsecs <<= 9; do_div(nsecs, 125); - end = get_tod_clock_fast() + nsecs; - if (nsecs & ~0xfffUL) - __udelay(nsecs >> 12); - while (get_tod_clock_fast() < end) - barrier(); + delay_loop(nsecs, 0); } EXPORT_SYMBOL(__ndelay); |