diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-26 13:59:56 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-26 13:59:56 -0700 |
commit | 9244724fbf8ab394a7210e8e93bf037abc859514 (patch) | |
tree | 95c2b9caf65ac531b6649247e99dc554e3bca96c /arch/mips | |
parent | 7cffdbe3607a6cc2dc02d135e13732ec36bc4e28 (diff) | |
parent | bf5a8c26ad7caf0772a1cd48c8a0924e48bdbaf0 (diff) |
Merge tag 'smp-core-2023-06-26' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull SMP updates from Thomas Gleixner:
"A large update for SMP management:
- Parallel CPU bringup
The reason why people are interested in parallel bringup is to
shorten the (kexec) reboot time of cloud servers to reduce the
downtime of the VM tenants.
The current fully serialized bringup does the following per AP:
1) Prepare callbacks (allocate, intialize, create threads)
2) Kick the AP alive (e.g. INIT/SIPI on x86)
3) Wait for the AP to report alive state
4) Let the AP continue through the atomic bringup
5) Let the AP run the threaded bringup to full online state
There are two significant delays:
#3 The time for an AP to report alive state in start_secondary()
on x86 has been measured in the range between 350us and 3.5ms
depending on vendor and CPU type, BIOS microcode size etc.
#4 The atomic bringup does the microcode update. This has been
measured to take up to ~8ms on the primary threads depending
on the microcode patch size to apply.
On a two socket SKL server with 56 cores (112 threads) the boot CPU
spends on current mainline about 800ms busy waiting for the APs to
come up and apply microcode. That's more than 80% of the actual
onlining procedure.
This can be reduced significantly by splitting the bringup
mechanism into two parts:
1) Run the prepare callbacks and kick the AP alive for each AP
which needs to be brought up.
The APs wake up, do their firmware initialization and run the
low level kernel startup code including microcode loading in
parallel up to the first synchronization point. (#1 and #2
above)
2) Run the rest of the bringup code strictly serialized per CPU
(#3 - #5 above) as it's done today.
Parallelizing that stage of the CPU bringup might be possible
in theory, but it's questionable whether required surgery
would be justified for a pretty small gain.
If the system is large enough the first AP is already waiting at
the first synchronization point when the boot CPU finished the
wake-up of the last AP. That reduces the AP bringup time on that
SKL from ~800ms to ~80ms, i.e. by a factor ~10x.
The actual gain varies wildly depending on the system, CPU,
microcode patch size and other factors. There are some
opportunities to reduce the overhead further, but that needs some
deep surgery in the x86 CPU bringup code.
For now this is only enabled on x86, but the core functionality
obviously works for all SMP capable architectures.
- Enhancements for SMP function call tracing so it is possible to
locate the scheduling and the actual execution points. That allows
to measure IPI delivery time precisely"
* tag 'smp-core-2023-06-26' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/tip/tip: (45 commits)
trace,smp: Add tracepoints for scheduling remotelly called functions
trace,smp: Add tracepoints around remotelly called functions
MAINTAINERS: Add CPU HOTPLUG entry
x86/smpboot: Fix the parallel bringup decision
x86/realmode: Make stack lock work in trampoline_compat()
x86/smp: Initialize cpu_primary_thread_mask late
cpu/hotplug: Fix off by one in cpuhp_bringup_mask()
x86/apic: Fix use of X{,2}APIC_ENABLE in asm with older binutils
x86/smpboot/64: Implement arch_cpuhp_init_parallel_bringup() and enable it
x86/smpboot: Support parallel startup of secondary CPUs
x86/smpboot: Implement a bit spinlock to protect the realmode stack
x86/apic: Save the APIC virtual base address
cpu/hotplug: Allow "parallel" bringup up to CPUHP_BP_KICK_AP_STATE
x86/apic: Provide cpu_primary_thread mask
x86/smpboot: Enable split CPU startup
cpu/hotplug: Provide a split up CPUHP_BRINGUP mechanism
cpu/hotplug: Reset task stack state in _cpu_up()
cpu/hotplug: Remove unused state functions
riscv: Switch to hotplug core state synchronization
parisc: Switch to hotplug core state synchronization
...
Diffstat (limited to 'arch/mips')
-rw-r--r-- | arch/mips/Kconfig | 1 | ||||
-rw-r--r-- | arch/mips/cavium-octeon/smp.c | 1 | ||||
-rw-r--r-- | arch/mips/include/asm/smp-ops.h | 1 | ||||
-rw-r--r-- | arch/mips/kernel/smp-bmips.c | 1 | ||||
-rw-r--r-- | arch/mips/kernel/smp-cps.c | 14 | ||||
-rw-r--r-- | arch/mips/kernel/smp.c | 8 | ||||
-rw-r--r-- | arch/mips/loongson64/smp.c | 1 |
7 files changed, 18 insertions, 9 deletions
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index c278bf20049f..ada18f3be229 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2287,6 +2287,7 @@ config MIPS_CPS select MIPS_CM select MIPS_CPS_PM if HOTPLUG_CPU select SMP + select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU select SYNC_R4K if (CEVT_R4K || CSRC_R4K) select SYS_SUPPORTS_HOTPLUG_CPU select SYS_SUPPORTS_SCHED_SMT if CPU_MIPSR6 diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c index 4212584e6efa..33c09688210f 100644 --- a/arch/mips/cavium-octeon/smp.c +++ b/arch/mips/cavium-octeon/smp.c @@ -345,6 +345,7 @@ void play_dead(void) int cpu = cpu_number_map(cvmx_get_core_num()); idle_task_exit(); + cpuhp_ap_report_dead(); octeon_processor_boot = 0xff; per_cpu(cpu_state, cpu) = CPU_DEAD; diff --git a/arch/mips/include/asm/smp-ops.h b/arch/mips/include/asm/smp-ops.h index 0145bbfb5efb..5719ff49eff1 100644 --- a/arch/mips/include/asm/smp-ops.h +++ b/arch/mips/include/asm/smp-ops.h @@ -33,6 +33,7 @@ struct plat_smp_ops { #ifdef CONFIG_HOTPLUG_CPU int (*cpu_disable)(void); void (*cpu_die)(unsigned int cpu); + void (*cleanup_dead_cpu)(unsigned cpu); #endif #ifdef CONFIG_KEXEC void (*kexec_nonboot_cpu)(void); diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index 15466d4cf4a0..c074ecce3fbf 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -392,6 +392,7 @@ static void bmips_cpu_die(unsigned int cpu) void __ref play_dead(void) { idle_task_exit(); + cpuhp_ap_report_dead(); /* flush data cache */ _dma_cache_wback_inv(0, ~0); diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index 62f677b2306f..d7fdbec232da 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -503,8 +503,7 @@ void play_dead(void) } } - /* This CPU has chosen its way out */ - (void)cpu_report_death(); + cpuhp_ap_report_dead(); cps_shutdown_this_cpu(cpu_death); @@ -527,7 +526,9 @@ static void wait_for_sibling_halt(void *ptr_cpu) } while (!(halted & TCHALT_H)); } -static void cps_cpu_die(unsigned int cpu) +static void cps_cpu_die(unsigned int cpu) { } + +static void cps_cleanup_dead_cpu(unsigned cpu) { unsigned core = cpu_core(&cpu_data[cpu]); unsigned int vpe_id = cpu_vpe_id(&cpu_data[cpu]); @@ -535,12 +536,6 @@ static void cps_cpu_die(unsigned int cpu) unsigned stat; int err; - /* Wait for the cpu to choose its way out */ - if (!cpu_wait_death(cpu, 5)) { - pr_err("CPU%u: didn't offline\n", cpu); - return; - } - /* * Now wait for the CPU to actually offline. Without doing this that * offlining may race with one or more of: @@ -624,6 +619,7 @@ static const struct plat_smp_ops cps_smp_ops = { #ifdef CONFIG_HOTPLUG_CPU .cpu_disable = cps_cpu_disable, .cpu_die = cps_cpu_die, + .cleanup_dead_cpu = cps_cleanup_dead_cpu, #endif #ifdef CONFIG_KEXEC .kexec_nonboot_cpu = cps_kexec_nonboot_cpu, diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 1d93b85271ba..90c71d800b59 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -690,6 +690,14 @@ void flush_tlb_one(unsigned long vaddr) EXPORT_SYMBOL(flush_tlb_page); EXPORT_SYMBOL(flush_tlb_one); +#ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD +void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) +{ + if (mp_ops->cleanup_dead_cpu) + mp_ops->cleanup_dead_cpu(cpu); +} +#endif + #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST static void tick_broadcast_callee(void *info) diff --git a/arch/mips/loongson64/smp.c b/arch/mips/loongson64/smp.c index b0e8bb9fa036..cdecd7af11a6 100644 --- a/arch/mips/loongson64/smp.c +++ b/arch/mips/loongson64/smp.c @@ -775,6 +775,7 @@ void play_dead(void) void (*play_dead_at_ckseg1)(int *); idle_task_exit(); + cpuhp_ap_report_dead(); prid_imp = read_c0_prid() & PRID_IMP_MASK; prid_rev = read_c0_prid() & PRID_REV_MASK; |