From ea53069231f9317062910d6e772cca4ce93de8c8 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 17 Sep 2010 15:39:11 -0700 Subject: x86, hotplug: Use mwait to offline a processor, fix the legacy case The code in native_play_dead() has a number of problems: 1. We should use MWAIT when available, to put ourselves into a deeper sleep state. 2. We use the existence of CLFLUSH to determine if WBINVD is safe, but that is totally bogus -- WBINVD is 486+, whereas CLFLUSH is a much later addition. 3. We should do WBINVD inside the loop, just in case of something like setting an A bit on page tables. Pointed out by Arjan van de Ven. This code is based in part of a previous patch by Venki Pallipadi, but unlike that patch this one keeps all the detection code local instead of pre-caching a bunch of information. We're shutting down the CPU; there is absolutely no hurry. This patch moves all the code to C and deletes the global wbinvd_halt() which is broken anyway. Originally-by: Venkatesh Pallipadi Signed-off-by: H. Peter Anvin Reviewed-by: Arjan van de Ven Cc: Len Brown Cc: Venkatesh Pallipadi Cc: Peter Zijlstra LKML-Reference: <20090522232230.162239000@intel.com> --- arch/x86/kernel/smpboot.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel/smpboot.c') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 8b3bfc4dd708..07bf4233441d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include #include @@ -1383,11 +1384,71 @@ void play_dead_common(void) local_irq_disable(); } +/* + * We need to flush the caches before going to sleep, lest we have + * dirty data in our caches when we come back up. + */ +static inline void mwait_play_dead(void) +{ + unsigned int eax, ebx, ecx, edx; + unsigned int highest_cstate = 0; + unsigned int highest_subcstate = 0; + int i; + + if (!cpu_has(¤t_cpu_data, X86_FEATURE_MWAIT)) + return; + if (current_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) + return; + + eax = CPUID_MWAIT_LEAF; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + + /* + * eax will be 0 if EDX enumeration is not valid. + * Initialized below to cstate, sub_cstate value when EDX is valid. + */ + if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) { + eax = 0; + } else { + edx >>= MWAIT_SUBSTATE_SIZE; + for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { + if (edx & MWAIT_SUBSTATE_MASK) { + highest_cstate = i; + highest_subcstate = edx & MWAIT_SUBSTATE_MASK; + } + } + eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) | + (highest_subcstate - 1); + } + + while (1) { + mb(); + wbinvd(); + __monitor(¤t_thread_info()->flags, 0, 0); + mb(); + __mwait(eax, 0); + } +} + +static inline void hlt_play_dead(void) +{ + while (1) { + mb(); + if (current_cpu_data.x86 >= 4) + wbinvd(); + mb(); + native_halt(); + } +} + void native_play_dead(void) { play_dead_common(); tboot_shutdown(TB_SHUTDOWN_WFS); - wbinvd_halt(); + + mwait_play_dead(); /* Only returns on failure */ + hlt_play_dead(); } #else /* ... !CONFIG_HOTPLUG_CPU */ -- cgit v1.2.3 From a68e5c94f7d3dd64fef34dd5d97e365cae4bb42a Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 17 Sep 2010 17:06:46 -0700 Subject: x86, hotplug: Move WBINVD back outside the play_dead loop On processors with hyperthreading, when only one thread is offlined the other thread can cause a spurious wakeup on the idled thread. We do not want to re-WBINVD when that happens. Ideally, we should simply skip WBINVD unless we're the last thread on a particular core to shut down, but there might be similar issues elsewhere in the system. Thus, revert to previous behavior of only WBINVD outside the loop. Partly as a result, remove the mb()'s around it: they are not necessary since wbinvd() is a serializing instruction, but they were intended to make sure the compiler didn't do any funny loop optimizations. Reported-by: Asit Mallick Signed-off-by: H. Peter Anvin Cc: Arjan van de Ven Cc: Len Brown Cc: Venkatesh Pallipadi Cc: Peter Zijlstra LKML-Reference: --- arch/x86/kernel/smpboot.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel/smpboot.c') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 07bf4233441d..55c80ffb8719 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1422,9 +1422,9 @@ static inline void mwait_play_dead(void) (highest_subcstate - 1); } + wbinvd(); + while (1) { - mb(); - wbinvd(); __monitor(¤t_thread_info()->flags, 0, 0); mb(); __mwait(eax, 0); @@ -1433,11 +1433,10 @@ static inline void mwait_play_dead(void) static inline void hlt_play_dead(void) { + if (current_cpu_data.x86 >= 4) + wbinvd(); + while (1) { - mb(); - if (current_cpu_data.x86 >= 4) - wbinvd(); - mb(); native_halt(); } } -- cgit v1.2.3 From ce5f68246bf2385d6174856708d0b746dc378f20 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 20 Sep 2010 13:04:45 -0700 Subject: x86, hotplug: In the MWAIT case of play_dead, CLFLUSH the cache line When we're using MWAIT for play_dead, explicitly CLFLUSH the cache line before executing MONITOR. This is a potential workaround for the Xeon 7400 erratum AAI65 after having a spurious wakeup and returning around the loop. "Potential" here because it is not certain that that erratum could actually trigger; however, the CLFLUSH should be harmless. Signed-off-by: H. Peter Anvin Acked-by: Venkatesh Pallipadi Cc: Asit Mallick Cc: Arjan van de Ven Cc: Len Brown --- arch/x86/kernel/smpboot.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel/smpboot.c') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 55c80ffb8719..fdccfe9dc63d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1394,9 +1394,12 @@ static inline void mwait_play_dead(void) unsigned int highest_cstate = 0; unsigned int highest_subcstate = 0; int i; + void *mwait_ptr; if (!cpu_has(¤t_cpu_data, X86_FEATURE_MWAIT)) return; + if (!cpu_has(¤t_cpu_data, X86_FEATURE_CLFLSH)) + return; if (current_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) return; @@ -1422,10 +1425,25 @@ static inline void mwait_play_dead(void) (highest_subcstate - 1); } + /* + * This should be a memory location in a cache line which is + * unlikely to be touched by other processors. The actual + * content is immaterial as it is not actually modified in any way. + */ + mwait_ptr = ¤t_thread_info()->flags; + wbinvd(); while (1) { - __monitor(¤t_thread_info()->flags, 0, 0); + /* + * The CLFLUSH is a workaround for erratum AAI65 for + * the Xeon 7400 series. It's not clear it is actually + * needed, but it should be harmless in either case. + * The WBINVD is insufficient due to the spurious-wakeup + * case where we return around the loop. + */ + clflush(mwait_ptr); + __monitor(mwait_ptr, 0, 0); mb(); __mwait(eax, 0); } -- cgit v1.2.3