From 02f037d641dc6672be5cfe7875a48ab99b95b154 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Wed, 23 Mar 2016 15:41:57 -0600 Subject: x86/mm/pat: Add support of non-default PAT MSR setting In preparation for fixing a regression caused by: 9cd25aac1f44 ("x86/mm/pat: Emulate PAT when it is disabled")' ... PAT needs to support a case that PAT MSR is initialized with a non-default value. When pat_init() is called and PAT is disabled, it initializes the PAT table with the BIOS default value. Xen, however, sets PAT MSR with a non-default value to enable WC. This causes inconsistency between the PAT table and PAT MSR when PAT is set to disable on Xen. Change pat_init() to handle the PAT disable cases properly. Add init_cache_modes() to handle two cases when PAT is set to disable. 1. CPU supports PAT: Set PAT table to be consistent with PAT MSR. 2. CPU does not support PAT: Set PAT table to be consistent with PWT and PCD bits in a PTE. Note, __init_cache_modes(), renamed from pat_init_cache_modes(), will be changed to a static function in a later patch. Signed-off-by: Toshi Kani Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Juergen Gross Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Toshi Kani Cc: elliott@hpe.com Cc: konrad.wilk@oracle.com Cc: paul.gortmaker@windriver.com Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1458769323-24491-2-git-send-email-toshi.kani@hpe.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pat.h | 2 +- arch/x86/mm/pat.c | 73 +++++++++++++++++++++++++++++++++------------- arch/x86/xen/enlighten.c | 2 +- 3 files changed, 55 insertions(+), 22 deletions(-) diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h index ca6c228d5e62..97ea55bc2b54 100644 --- a/arch/x86/include/asm/pat.h +++ b/arch/x86/include/asm/pat.h @@ -6,7 +6,7 @@ bool pat_enabled(void); extern void pat_init(void); -void pat_init_cache_modes(u64); +void __init_cache_modes(u64); extern int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm); diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index faec01e7a17d..b4663885308f 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -181,7 +181,7 @@ static enum page_cache_mode pat_get_cache_mode(unsigned pat_val, char *msg) * configuration. * Using lower indices is preferred, so we start with highest index. */ -void pat_init_cache_modes(u64 pat) +void __init_cache_modes(u64 pat) { enum page_cache_mode cache; char pat_msg[33]; @@ -207,9 +207,6 @@ static void pat_bsp_init(u64 pat) return; } - if (!pat_enabled()) - goto done; - rdmsrl(MSR_IA32_CR_PAT, tmp_pat); if (!tmp_pat) { pat_disable("PAT MSR is 0, disabled."); @@ -218,15 +215,11 @@ static void pat_bsp_init(u64 pat) wrmsrl(MSR_IA32_CR_PAT, pat); -done: - pat_init_cache_modes(pat); + __init_cache_modes(pat); } static void pat_ap_init(u64 pat) { - if (!pat_enabled()) - return; - if (!cpu_has_pat) { /* * If this happens we are on a secondary CPU, but switched to @@ -238,18 +231,32 @@ static void pat_ap_init(u64 pat) wrmsrl(MSR_IA32_CR_PAT, pat); } -void pat_init(void) +static void init_cache_modes(void) { - u64 pat; - struct cpuinfo_x86 *c = &boot_cpu_data; + u64 pat = 0; + static int init_cm_done; - if (!pat_enabled()) { + if (init_cm_done) + return; + + if (boot_cpu_has(X86_FEATURE_PAT)) { + /* + * CPU supports PAT. Set PAT table to be consistent with + * PAT MSR. This case supports "nopat" boot option, and + * virtual machine environments which support PAT without + * MTRRs. In specific, Xen has unique setup to PAT MSR. + * + * If PAT MSR returns 0, it is considered invalid and emulates + * as No PAT. + */ + rdmsrl(MSR_IA32_CR_PAT, pat); + } + + if (!pat) { /* * No PAT. Emulate the PAT table that corresponds to the two - * cache bits, PWT (Write Through) and PCD (Cache Disable). This - * setup is the same as the BIOS default setup when the system - * has PAT but the "nopat" boot option has been specified. This - * emulated PAT table is used when MSR_IA32_CR_PAT returns 0. + * cache bits, PWT (Write Through) and PCD (Cache Disable). + * This setup is also the same as the BIOS default setup. * * PTE encoding: * @@ -266,10 +273,36 @@ void pat_init(void) */ pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) | PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC); + } + + __init_cache_modes(pat); + + init_cm_done = 1; +} + +/** + * pat_init - Initialize PAT MSR and PAT table + * + * This function initializes PAT MSR and PAT table with an OS-defined value + * to enable additional cache attributes, WC and WT. + * + * This function must be called on all CPUs using the specific sequence of + * operations defined in Intel SDM. mtrr_rendezvous_handler() provides this + * procedure for PAT. + */ +void pat_init(void) +{ + u64 pat; + struct cpuinfo_x86 *c = &boot_cpu_data; + + if (!pat_enabled()) { + init_cache_modes(); + return; + } - } else if ((c->x86_vendor == X86_VENDOR_INTEL) && - (((c->x86 == 0x6) && (c->x86_model <= 0xd)) || - ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) { + if ((c->x86_vendor == X86_VENDOR_INTEL) && + (((c->x86 == 0x6) && (c->x86_model <= 0xd)) || + ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) { /* * PAT support with the lower four entries. Intel Pentium 2, * 3, M, and 4 are affected by PAT errata, which makes the diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 880862c7d9dd..c469a7c7c309 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1623,7 +1623,7 @@ asmlinkage __visible void __init xen_start_kernel(void) * configuration. */ rdmsrl(MSR_IA32_CR_PAT, pat); - pat_init_cache_modes(pat); + __init_cache_modes(pat); /* keep using Xen gdt for now; no urgent need to change it */ -- cgit v1.2.3 From 224bb1e5d67ba0f2872c98002d6a6f991ac6fd4a Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Wed, 23 Mar 2016 15:41:58 -0600 Subject: x86/mm/pat: Add pat_disable() interface In preparation for fixing a regression caused by: 9cd25aac1f44 ("x86/mm/pat: Emulate PAT when it is disabled") ... PAT needs to provide an interface that prevents the OS from initializing the PAT MSR. PAT MSR initialization must be done on all CPUs using the specific sequence of operations defined in the Intel SDM. This requires MTRRs to be enabled since pat_init() is called as part of MTRR init from mtrr_rendezvous_handler(). Make pat_disable() as the interface that prevents the OS from initializing the PAT MSR. MTRR will call this interface when it cannot provide the SDM-defined sequence to initialize PAT. This also assures that pat_disable() called from pat_bsp_init() will set the PAT table properly when CPU does not support PAT. Signed-off-by: Toshi Kani Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Juergen Gross Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Robert Elliott Cc: Toshi Kani Cc: konrad.wilk@oracle.com Cc: paul.gortmaker@windriver.com Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1458769323-24491-3-git-send-email-toshi.kani@hpe.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pat.h | 1 + arch/x86/mm/pat.c | 13 ++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h index 97ea55bc2b54..0ad356c066ef 100644 --- a/arch/x86/include/asm/pat.h +++ b/arch/x86/include/asm/pat.h @@ -5,6 +5,7 @@ #include bool pat_enabled(void); +void pat_disable(const char *reason); extern void pat_init(void); void __init_cache_modes(u64); diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index b4663885308f..1cc1d37f1de7 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -40,11 +40,22 @@ static bool boot_cpu_done; static int __read_mostly __pat_enabled = IS_ENABLED(CONFIG_X86_PAT); +static void init_cache_modes(void); -static inline void pat_disable(const char *reason) +void pat_disable(const char *reason) { + if (!__pat_enabled) + return; + + if (boot_cpu_done) { + WARN_ONCE(1, "x86/PAT: PAT cannot be disabled after initialization\n"); + return; + } + __pat_enabled = 0; pr_info("x86/PAT: %s\n", reason); + + init_cache_modes(); } static int __init nopat(char *str) -- cgit v1.2.3 From d63dcf49cf5ae5605f4d14229e3888e104f294b1 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Wed, 23 Mar 2016 15:41:59 -0600 Subject: x86/mm/pat: Replace cpu_has_pat with boot_cpu_has() Borislav Petkov suggested: > Please use on init paths boot_cpu_has(X86_FEATURE_PAT) and on fast > paths static_cpu_has(X86_FEATURE_PAT). No more of that cpu_has_XXX > ugliness. Replace the use of cpu_has_pat on init paths with boot_cpu_has(). Suggested-by: Borislav Petkov Signed-off-by: Toshi Kani Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Juergen Gross Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Robert Elliott Cc: Toshi Kani Cc: konrad.wilk@oracle.com Cc: paul.gortmaker@windriver.com Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1458769323-24491-4-git-send-email-toshi.kani@hpe.com Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 1cc1d37f1de7..59ec038b9833 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -213,7 +213,7 @@ static void pat_bsp_init(u64 pat) { u64 tmp_pat; - if (!cpu_has_pat) { + if (!boot_cpu_has(X86_FEATURE_PAT)) { pat_disable("PAT not supported by CPU."); return; } @@ -231,7 +231,7 @@ static void pat_bsp_init(u64 pat) static void pat_ap_init(u64 pat) { - if (!cpu_has_pat) { + if (!boot_cpu_has(X86_FEATURE_PAT)) { /* * If this happens we are on a secondary CPU, but switched to * PAT on the boot CPU. We have no way to undo PAT. -- cgit v1.2.3 From edfe63ec97ed8d4496225f7ba54c9ce4207c5431 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Wed, 23 Mar 2016 15:42:00 -0600 Subject: x86/mtrr: Fix Xorg crashes in Qemu sessions A Xorg failure on qemu32 was reported as a regression [1] caused by commit 9cd25aac1f44 ("x86/mm/pat: Emulate PAT when it is disabled"). This patch fixes the Xorg crash. Negative effects of this regression were the following two failures [2] in Xorg on QEMU with QEMU CPU model "qemu32" (-cpu qemu32), which were triggered by the fact that its virtual CPU does not support MTRRs. #1. copy_process() failed in the check in reserve_pfn_range() copy_process copy_mm dup_mm dup_mmap copy_page_range track_pfn_copy reserve_pfn_range A WC map request was tracked as WC in memtype, which set a PTE as UC (pgprot) per __cachemode2pte_tbl[]. This led to this error in reserve_pfn_range() called from track_pfn_copy(), which obtained a pgprot from a PTE. It converts pgprot to page_cache_mode, which does not necessarily result in the original page_cache_mode since __cachemode2pte_tbl[] redirects multiple types to UC. #2. error path in copy_process() then hit WARN_ON_ONCE in untrack_pfn(). x86/PAT: Xorg:509 map pfn expected mapping type uncached- minus for [mem 0xfd000000-0xfdffffff], got write-combining Call Trace: dump_stack warn_slowpath_common ? untrack_pfn ? untrack_pfn warn_slowpath_null untrack_pfn ? __kunmap_atomic unmap_single_vma ? pagevec_move_tail_fn unmap_vmas exit_mmap mmput copy_process.part.47 _do_fork SyS_clone do_syscall_32_irqs_on entry_INT80_32 These negative effects are caused by two separate bugs, but they can be addressed in separate patches. Fixing the pat_init() issue described below addresses the root cause, and avoids Xorg to hit these cases. When the CPU does not support MTRRs, MTRR does not call pat_init(), which leaves PAT enabled without initializing PAT. This pat_init() issue is a long-standing issue, but manifested as issue #1 (and then hit issue #2) with the above-mentioned commit because the memtype now tracks cache attribute with 'page_cache_mode'. This pat_init() issue existed before the commit, but we used pgprot in memtype. Hence, we did not have issue #1 before. But WC request resulted in WT in effect because WC pgrot is actually WT when PAT is not initialized. This is not how it was designed to work. When PAT is set to disable properly, WC is converted to UC. The use of WT can result in a system crash if the target range does not support WT. Fortunately, nobody ran into such issue before. To fix this pat_init() issue, PAT code has been enhanced to provide pat_disable() interface. Call this interface when MTRRs are disabled. By setting PAT to disable properly, PAT bypasses the memtype check, and avoids issue #1. [1]: https://lkml.org/lkml/2016/3/3/828 [2]: https://lkml.org/lkml/2016/3/4/775 Signed-off-by: Toshi Kani Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Juergen Gross Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Toshi Kani Cc: elliott@hpe.com Cc: konrad.wilk@oracle.com Cc: paul.gortmaker@windriver.com Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1458769323-24491-5-git-send-email-toshi.kani@hpe.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mtrr.h | 6 +++++- arch/x86/kernel/cpu/mtrr/main.c | 10 +++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index b94f6f64e23d..dbff1456d215 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -24,6 +24,7 @@ #define _ASM_X86_MTRR_H #include +#include /* @@ -83,9 +84,12 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn) static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) { } +static inline void mtrr_bp_init(void) +{ + pat_disable("MTRRs disabled, skipping PAT initialization too."); +} #define mtrr_ap_init() do {} while (0) -#define mtrr_bp_init() do {} while (0) #define set_mtrr_aps_delayed_init() do {} while (0) #define mtrr_aps_init() do {} while (0) #define mtrr_bp_restore() do {} while (0) diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 10f8d4796240..8b1947ba3e62 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -759,8 +759,16 @@ void __init mtrr_bp_init(void) } } - if (!mtrr_enabled()) + if (!mtrr_enabled()) { pr_info("MTRR: Disabled\n"); + + /* + * PAT initialization relies on MTRR's rendezvous handler. + * Skip PAT init until the handler can initialize both + * features independently. + */ + pat_disable("MTRRs disabled, skipping PAT initialization too."); + } } void mtrr_ap_init(void) -- cgit v1.2.3 From ad025a73f0e9344ac73ffe1b74c184033e08e7d5 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Wed, 23 Mar 2016 15:42:01 -0600 Subject: x86/mtrr: Fix PAT init handling when MTRR is disabled get_mtrr_state() calls pat_init() on BSP even if MTRR is disabled. This results in calling pat_init() on BSP only since APs do not call pat_init() when MTRR is disabled. This inconsistency between BSP and APs leads to undefined behavior. Make BSP's calling condition to pat_init() consistent with AP's, mtrr_ap_init() and mtrr_aps_init(). Signed-off-by: Toshi Kani Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Juergen Gross Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Toshi Kani Cc: elliott@hpe.com Cc: konrad.wilk@oracle.com Cc: paul.gortmaker@windriver.com Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1458769323-24491-6-git-send-email-toshi.kani@hpe.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/generic.c | 24 ++++++++++++++---------- arch/x86/kernel/cpu/mtrr/main.c | 3 +++ arch/x86/kernel/cpu/mtrr/mtrr.h | 1 + 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 19f57360dfd2..8d7a29ed9377 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -444,11 +444,24 @@ static void __init print_mtrr_state(void) pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20); } +/* PAT setup for BP. We need to go through sync steps here */ +void __init mtrr_bp_pat_init(void) +{ + unsigned long flags; + + local_irq_save(flags); + prepare_set(); + + pat_init(); + + post_set(); + local_irq_restore(flags); +} + /* Grab all of the MTRR state for this CPU into *state */ bool __init get_mtrr_state(void) { struct mtrr_var_range *vrs; - unsigned long flags; unsigned lo, dummy; unsigned int i; @@ -481,15 +494,6 @@ bool __init get_mtrr_state(void) mtrr_state_set = 1; - /* PAT setup for BP. We need to go through sync steps here */ - local_irq_save(flags); - prepare_set(); - - pat_init(); - - post_set(); - local_irq_restore(flags); - return !!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED); } diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 8b1947ba3e62..7d393ecdeee6 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -752,6 +752,9 @@ void __init mtrr_bp_init(void) /* BIOS may override */ __mtrr_enabled = get_mtrr_state(); + if (mtrr_enabled()) + mtrr_bp_pat_init(); + if (mtrr_cleanup(phys_addr)) { changed_by_mtrr_cleanup = 1; mtrr_if->set_all(); diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 951884dcc433..6c7ced07d16d 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -52,6 +52,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); void fill_mtrr_var_range(unsigned int index, u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); bool get_mtrr_state(void); +void mtrr_bp_pat_init(void); extern void set_mtrr_ops(const struct mtrr_ops *ops); -- cgit v1.2.3 From 88ba281108ed0c25c9d292b48bd3f272fcb90dd0 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Wed, 23 Mar 2016 15:42:02 -0600 Subject: x86/xen, pat: Remove PAT table init code from Xen Xen supports PAT without MTRRs for its guests. In order to enable WC attribute, it was necessary for xen_start_kernel() to call pat_init_cache_modes() to update PAT table before starting guest kernel. Now that the kernel initializes PAT table to the BIOS handoff state when MTRR is disabled, this Xen-specific PAT init code is no longer necessary. Delete it from xen_start_kernel(). Also change __init_cache_modes() to a static function since PAT table should not be tweaked by other modules. Signed-off-by: Toshi Kani Reviewed-by: Thomas Gleixner Acked-by: Juergen Gross Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Toshi Kani Cc: elliott@hpe.com Cc: paul.gortmaker@windriver.com Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1458769323-24491-7-git-send-email-toshi.kani@hpe.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pat.h | 1 - arch/x86/mm/pat.c | 2 +- arch/x86/xen/enlighten.c | 9 --------- 3 files changed, 1 insertion(+), 11 deletions(-) diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h index 0ad356c066ef..0b1ff4c1c14e 100644 --- a/arch/x86/include/asm/pat.h +++ b/arch/x86/include/asm/pat.h @@ -7,7 +7,6 @@ bool pat_enabled(void); void pat_disable(const char *reason); extern void pat_init(void); -void __init_cache_modes(u64); extern int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm); diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 59ec038b9833..c4c3ddcc9069 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -192,7 +192,7 @@ static enum page_cache_mode pat_get_cache_mode(unsigned pat_val, char *msg) * configuration. * Using lower indices is preferred, so we start with highest index. */ -void __init_cache_modes(u64 pat) +static void __init_cache_modes(u64 pat) { enum page_cache_mode cache; char pat_msg[33]; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c469a7c7c309..d8cca75e3b3e 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -75,7 +75,6 @@ #include #include #include -#include #include #ifdef CONFIG_ACPI @@ -1511,7 +1510,6 @@ asmlinkage __visible void __init xen_start_kernel(void) { struct physdev_set_iopl set_iopl; unsigned long initrd_start = 0; - u64 pat; int rc; if (!xen_start_info) @@ -1618,13 +1616,6 @@ asmlinkage __visible void __init xen_start_kernel(void) xen_start_info->nr_pages); xen_reserve_special_pages(); - /* - * Modify the cache mode translation tables to match Xen's PAT - * configuration. - */ - rdmsrl(MSR_IA32_CR_PAT, pat); - __init_cache_modes(pat); - /* keep using Xen gdt for now; no urgent need to change it */ #ifdef CONFIG_X86_32 -- cgit v1.2.3 From b6350c21cfe8aa9d65e189509a23c0ea4b8362c2 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Wed, 23 Mar 2016 15:42:03 -0600 Subject: x86/pat: Document the PAT initialization sequence Update PAT documentation to describe how PAT is initialized under various configurations. Signed-off-by: Toshi Kani Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Juergen Gross Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Toshi Kani Cc: elliott@hpe.com Cc: konrad.wilk@oracle.com Cc: paul.gortmaker@windriver.com Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1458769323-24491-8-git-send-email-toshi.kani@hpe.com Signed-off-by: Ingo Molnar --- Documentation/x86/pat.txt | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/Documentation/x86/pat.txt b/Documentation/x86/pat.txt index 54944c71b819..2a4ee6302122 100644 --- a/Documentation/x86/pat.txt +++ b/Documentation/x86/pat.txt @@ -196,3 +196,35 @@ Another, more verbose way of getting PAT related debug messages is with "debugpat" boot parameter. With this parameter, various debug messages are printed to dmesg log. +PAT Initialization +------------------ + +The following table describes how PAT is initialized under various +configurations. The PAT MSR must be updated by Linux in order to support WC +and WT attributes. Otherwise, the PAT MSR has the value programmed in it +by the firmware. Note, Xen enables WC attribute in the PAT MSR for guests. + + MTRR PAT Call Sequence PAT State PAT MSR + ========================================================= + E E MTRR -> PAT init Enabled OS + E D MTRR -> PAT init Disabled - + D E MTRR -> PAT disable Disabled BIOS + D D MTRR -> PAT disable Disabled - + - np/E PAT -> PAT disable Disabled BIOS + - np/D PAT -> PAT disable Disabled - + E !P/E MTRR -> PAT init Disabled BIOS + D !P/E MTRR -> PAT disable Disabled BIOS + !M !P/E MTRR stub -> PAT disable Disabled BIOS + + Legend + ------------------------------------------------ + E Feature enabled in CPU + D Feature disabled/unsupported in CPU + np "nopat" boot option specified + !P CONFIG_X86_PAT option unset + !M CONFIG_MTRR option unset + Enabled PAT state set to enabled + Disabled PAT state set to disabled + OS PAT initializes PAT MSR with OS setting + BIOS PAT keeps PAT MSR with BIOS setting + -- cgit v1.2.3 From b3edfda4382ffaef5e5c1cffb25a33b3b9ef4546 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 16 Mar 2016 13:19:29 +0100 Subject: x86/cpu: Do the feature test first in enable_sep_cpu() ... before assigning local vars. Kill out label too and simplify. No functionality change. Signed-off-by: Borislav Petkov Acked-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1458130769-24963-1-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8394b3d1f94f..7fea4079d102 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1076,12 +1076,12 @@ void enable_sep_cpu(void) struct tss_struct *tss; int cpu; + if (!boot_cpu_has(X86_FEATURE_SEP)) + return; + cpu = get_cpu(); tss = &per_cpu(cpu_tss, cpu); - if (!boot_cpu_has(X86_FEATURE_SEP)) - goto out; - /* * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field -- * see the big comment in struct x86_hw_tss's definition. @@ -1096,7 +1096,6 @@ void enable_sep_cpu(void) wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0); -out: put_cpu(); } #endif -- cgit v1.2.3 From 568a58e5dfbcb88011cad7f87ed046aa00f19d1a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 29 Mar 2016 17:42:01 +0200 Subject: x86/mm/pat, x86/cpufeature: Remove cpu_has_pat Signed-off-by: Borislav Petkov Acked-by: Daniel Vetter Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: intel-gfx@lists.freedesktop.org Link: http://lkml.kernel.org/r/1459266123-21878-9-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeature.h | 1 - drivers/gpu/drm/i915/i915_gem.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 3636ec06c887..7a3fa7d70bd7 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -132,7 +132,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLUSH) #define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES) #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) -#define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT) #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #define cpu_has_xsaves boot_cpu_has(X86_FEATURE_XSAVES) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3d31d3ac589e..aaec8aef9fd4 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1732,7 +1732,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, if (args->flags & ~(I915_MMAP_WC)) return -EINVAL; - if (args->flags & I915_MMAP_WC && !cpu_has_pat) + if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) return -ENODEV; obj = drm_gem_object_lookup(dev, file, args->handle); -- cgit v1.2.3 From 7b5e74e637e4a977c7cf40fd7de332f60b68180e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 29 Mar 2016 17:41:54 +0200 Subject: x86/cpufeature: Remove cpu_has_arch_perfmon Use boot_cpu_has() instead. Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: oprofile-list@lists.sf.net Link: http://lkml.kernel.org/r/1459266123-21878-2-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeature.h | 1 - arch/x86/oprofile/nmi_int.c | 4 ++-- arch/x86/oprofile/op_model_ppro.c | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 3636ec06c887..fee7a6efcd2d 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -131,7 +131,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define cpu_has_avx2 boot_cpu_has(X86_FEATURE_AVX2) #define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLUSH) #define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES) -#define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) #define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT) #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 0e07e0968c3a..25171e9595f7 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -636,7 +636,7 @@ static int __init ppro_init(char **cpu_type) __u8 cpu_model = boot_cpu_data.x86_model; struct op_x86_model_spec *spec = &op_ppro_spec; /* default */ - if (force_cpu_type == arch_perfmon && cpu_has_arch_perfmon) + if (force_cpu_type == arch_perfmon && boot_cpu_has(X86_FEATURE_ARCH_PERFMON)) return 0; /* @@ -761,7 +761,7 @@ int __init op_nmi_init(struct oprofile_operations *ops) if (cpu_type) break; - if (!cpu_has_arch_perfmon) + if (!boot_cpu_has(X86_FEATURE_ARCH_PERFMON)) return -ENODEV; /* use arch perfmon as fallback */ diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index d90528ea5412..350f7096baac 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -75,7 +75,7 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model, u64 val; int i; - if (cpu_has_arch_perfmon) { + if (boot_cpu_has(X86_FEATURE_ARCH_PERFMON)) { union cpuid10_eax eax; eax.full = cpuid_eax(0xa); -- cgit v1.2.3 From 0c9f3536cc712dfd5ec3127d55cd7b807cc0adb5 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 29 Mar 2016 17:41:55 +0200 Subject: x86/cpufeature: Remove cpu_has_hypervisor Use boot_cpu_has() instead. Tested-by: David Kershner Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: sparmaintainer@unisys.com Cc: virtualization@lists.linux-foundation.org Link: http://lkml.kernel.org/r/1459266123-21878-3-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore.c | 2 +- arch/x86/include/asm/cpufeature.h | 1 - arch/x86/kernel/cpu/vmware.c | 2 +- arch/x86/kernel/kvm.c | 2 +- drivers/staging/unisys/visorbus/visorchipset.c | 2 +- 5 files changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 7012d18bb293..3f6d8b5672d5 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1383,7 +1383,7 @@ static int __init intel_uncore_init(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return -ENODEV; - if (cpu_has_hypervisor) + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) return -ENODEV; max_packages = topology_max_packages(); diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index fee7a6efcd2d..3aea54ecabfd 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -136,7 +136,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #define cpu_has_xsaves boot_cpu_has(X86_FEATURE_XSAVES) #define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE) -#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) /* * Do not add any more of those clumsy macros - use static_cpu_has() for * fast paths and boot_cpu_has() otherwise! diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 364e58346897..8cac429b6a1d 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -94,7 +94,7 @@ static void __init vmware_platform_setup(void) */ static uint32_t __init vmware_platform(void) { - if (cpu_has_hypervisor) { + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { unsigned int eax; unsigned int hyper_vendor_id[3]; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 807950860fb7..dc1207e2f193 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -522,7 +522,7 @@ static noinline uint32_t __kvm_cpuid_base(void) if (boot_cpu_data.cpuid_level < 0) return 0; /* So we don't blow up on old processors */ - if (cpu_has_hypervisor) + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0); return 0; diff --git a/drivers/staging/unisys/visorbus/visorchipset.c b/drivers/staging/unisys/visorbus/visorchipset.c index 5fbda7b218c7..9cf4f8463c4e 100644 --- a/drivers/staging/unisys/visorbus/visorchipset.c +++ b/drivers/staging/unisys/visorbus/visorchipset.c @@ -2425,7 +2425,7 @@ static __init uint32_t visorutil_spar_detect(void) { unsigned int eax, ebx, ecx, edx; - if (cpu_has_hypervisor) { + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { /* check the ID */ cpuid(UNISYS_SPAR_LEAF_ID, &eax, &ebx, &ecx, &edx); return (ebx == UNISYS_SPAR_ID_EBX) && -- cgit v1.2.3 From ab4a56fa2c6ce9384ca077b6570c56fe18361f17 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 29 Mar 2016 17:41:56 +0200 Subject: x86/cpufeature: Remove cpu_has_osxsave Use boot_cpu_has() instead. Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-crypto@vger.kernel.org Link: http://lkml.kernel.org/r/1459266123-21878-4-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/crypto/camellia_aesni_avx2_glue.c | 3 ++- arch/x86/crypto/camellia_aesni_avx_glue.c | 2 +- arch/x86/crypto/serpent_avx2_glue.c | 2 +- arch/x86/include/asm/cpufeature.h | 1 - arch/x86/include/asm/xor_avx.h | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index d84456924563..c37f7028c85a 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -562,7 +562,8 @@ static int __init camellia_aesni_init(void) { const char *feature_name; - if (!cpu_has_avx2 || !cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) { + if (!cpu_has_avx2 || !cpu_has_avx || !cpu_has_aes || + !boot_cpu_has(X86_FEATURE_OSXSAVE)) { pr_info("AVX2 or AES-NI instructions are not detected.\n"); return -ENODEV; } diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index 93d8f295784e..65f64556725b 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -554,7 +554,7 @@ static int __init camellia_aesni_init(void) { const char *feature_name; - if (!cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) { + if (!cpu_has_avx || !cpu_has_aes || !boot_cpu_has(X86_FEATURE_OSXSAVE)) { pr_info("AVX or AES-NI instructions are not detected.\n"); return -ENODEV; } diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index 6d198342e2de..408cae2b3543 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -538,7 +538,7 @@ static int __init init(void) { const char *feature_name; - if (!cpu_has_avx2 || !cpu_has_osxsave) { + if (!cpu_has_avx2 || !boot_cpu_has(X86_FEATURE_OSXSAVE)) { pr_info("AVX2 instructions are not detected.\n"); return -ENODEV; } diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 3aea54ecabfd..33c29aabc9aa 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -135,7 +135,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #define cpu_has_xsaves boot_cpu_has(X86_FEATURE_XSAVES) -#define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE) /* * Do not add any more of those clumsy macros - use static_cpu_has() for * fast paths and boot_cpu_has() otherwise! diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h index 7c0a517ec751..e45e556140af 100644 --- a/arch/x86/include/asm/xor_avx.h +++ b/arch/x86/include/asm/xor_avx.h @@ -167,12 +167,12 @@ static struct xor_block_template xor_block_avx = { #define AVX_XOR_SPEED \ do { \ - if (cpu_has_avx && cpu_has_osxsave) \ + if (cpu_has_avx && boot_cpu_has(X86_FEATURE_OSXSAVE)) \ xor_speed(&xor_block_avx); \ } while (0) #define AVX_SELECT(FASTEST) \ - (cpu_has_avx && cpu_has_osxsave ? &xor_block_avx : FASTEST) + (cpu_has_avx && boot_cpu_has(X86_FEATURE_OSXSAVE) ? &xor_block_avx : FASTEST) #else -- cgit v1.2.3 From 62436a4d36c94d202784cd8a997ff8bb4b880237 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 29 Mar 2016 17:41:57 +0200 Subject: x86/cpufeature: Remove cpu_has_x2apic Signed-off-by: Borislav Petkov Acked-by: Tony Luck Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1459266123-21878-5-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/ia64/include/asm/iommu.h | 1 - arch/x86/include/asm/apic.h | 4 ++-- arch/x86/include/asm/cpufeature.h | 1 - arch/x86/kernel/apic/apic.c | 2 +- 4 files changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/ia64/include/asm/iommu.h b/arch/ia64/include/asm/iommu.h index 105c93b00b1b..1d1212901ae7 100644 --- a/arch/ia64/include/asm/iommu.h +++ b/arch/ia64/include/asm/iommu.h @@ -1,7 +1,6 @@ #ifndef _ASM_IA64_IOMMU_H #define _ASM_IA64_IOMMU_H 1 -#define cpu_has_x2apic 0 /* 10 seconds */ #define DMAR_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 98f25bbafac4..bc27611fa58f 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -239,10 +239,10 @@ extern void __init check_x2apic(void); extern void x2apic_setup(void); static inline int x2apic_enabled(void) { - return cpu_has_x2apic && apic_is_x2apic_enabled(); + return boot_cpu_has(X86_FEATURE_X2APIC) && apic_is_x2apic_enabled(); } -#define x2apic_supported() (cpu_has_x2apic) +#define x2apic_supported() (boot_cpu_has(X86_FEATURE_X2APIC)) #else /* !CONFIG_X86_X2APIC */ static inline void check_x2apic(void) { } static inline void x2apic_setup(void) { } diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 33c29aabc9aa..3da7aec9fca1 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -132,7 +132,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLUSH) #define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES) #define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT) -#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #define cpu_has_xsaves boot_cpu_has(X86_FEATURE_XSAVES) /* diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index d356987a04e9..d7867c885bf8 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1561,7 +1561,7 @@ void __init check_x2apic(void) pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n"); x2apic_mode = 1; x2apic_state = X2APIC_ON; - } else if (!cpu_has_x2apic) { + } else if (!boot_cpu_has(X86_FEATURE_X2APIC)) { x2apic_state = X2APIC_DISABLED; } } -- cgit v1.2.3 From b8291adc191abec2095f03a130ac91506d345cae Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 29 Mar 2016 17:41:58 +0200 Subject: x86/cpufeature: Remove cpu_has_gbpages Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1459266123-21878-6-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeature.h | 1 - arch/x86/kvm/mmu.c | 3 ++- arch/x86/mm/hugetlbpage.c | 4 ++-- arch/x86/mm/init.c | 2 +- arch/x86/mm/ioremap.c | 2 +- arch/x86/mm/pageattr.c | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 3da7aec9fca1..693b4aa43908 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -130,7 +130,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define cpu_has_avx boot_cpu_has(X86_FEATURE_AVX) #define cpu_has_avx2 boot_cpu_has(X86_FEATURE_AVX2) #define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLUSH) -#define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES) #define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #define cpu_has_xsaves boot_cpu_has(X86_FEATURE_XSAVES) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 70e95d097ef1..bc1e0b65909e 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3836,7 +3836,8 @@ reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check, boot_cpu_data.x86_phys_bits, context->shadow_root_level, false, - cpu_has_gbpages, true, true); + boot_cpu_has(X86_FEATURE_GBPAGES), + true, true); else __reset_rsvds_bits_mask_ept(&context->shadow_zero_check, boot_cpu_data.x86_phys_bits, diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 740d7ac03a55..14a95054d4e0 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -162,7 +162,7 @@ static __init int setup_hugepagesz(char *opt) unsigned long ps = memparse(opt, &opt); if (ps == PMD_SIZE) { hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); - } else if (ps == PUD_SIZE && cpu_has_gbpages) { + } else if (ps == PUD_SIZE && boot_cpu_has(X86_FEATURE_GBPAGES)) { hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); } else { printk(KERN_ERR "hugepagesz: Unsupported page size %lu M\n", @@ -177,7 +177,7 @@ __setup("hugepagesz=", setup_hugepagesz); static __init int gigantic_pages_init(void) { /* With compaction or CMA we can allocate gigantic pages at runtime */ - if (cpu_has_gbpages && !size_to_hstate(1UL << PUD_SHIFT)) + if (boot_cpu_has(X86_FEATURE_GBPAGES) && !size_to_hstate(1UL << PUD_SHIFT)) hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); return 0; } diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 9d56f271d519..14377e98f279 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -173,7 +173,7 @@ static void __init probe_page_size_mask(void) __supported_pte_mask &= ~_PAGE_GLOBAL; /* Enable 1 GB linear kernel mappings if available: */ - if (direct_gbpages && cpu_has_gbpages) { + if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) { printk(KERN_INFO "Using GB pages for direct mapping\n"); page_size_mask |= 1 << PG_LEVEL_1G; } else { diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 0d8d53d1f5cc..5a116ace9cbb 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -378,7 +378,7 @@ EXPORT_SYMBOL(iounmap); int __init arch_ioremap_pud_supported(void) { #ifdef CONFIG_X86_64 - return cpu_has_gbpages; + return boot_cpu_has(X86_FEATURE_GBPAGES); #else return 0; #endif diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 01be9ec3bf79..fb20c2ee0092 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1055,7 +1055,7 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, /* * Map everything starting from the Gb boundary, possibly with 1G pages */ - while (cpu_has_gbpages && end - start >= PUD_SIZE) { + while (boot_cpu_has(X86_FEATURE_GBPAGES) && end - start >= PUD_SIZE) { set_pud(pud, __pud(cpa->pfn << PAGE_SHIFT | _PAGE_PSE | massage_pgprot(pud_pgprot))); -- cgit v1.2.3 From 906bf7fda2c9cf5c1762ec607943ed54b6c5b203 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 29 Mar 2016 17:41:59 +0200 Subject: x86/cpufeature: Remove cpu_has_clflush Use the fast variant in the DRM code. Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dri-devel@lists.freedesktop.org Cc: intel-gfx@lists.freedesktop.org Link: http://lkml.kernel.org/r/1459266123-21878-7-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeature.h | 1 - arch/x86/kernel/cpu/intel.c | 2 +- arch/x86/kernel/tce_64.c | 2 +- arch/x86/mm/pageattr.c | 2 +- drivers/gpu/drm/drm_cache.c | 6 +++--- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- 6 files changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 693b4aa43908..a75154232db5 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -129,7 +129,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define cpu_has_aes boot_cpu_has(X86_FEATURE_AES) #define cpu_has_avx boot_cpu_has(X86_FEATURE_AVX) #define cpu_has_avx2 boot_cpu_has(X86_FEATURE_AVX2) -#define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLUSH) #define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #define cpu_has_xsaves boot_cpu_has(X86_FEATURE_XSAVES) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 1f7fdb91a818..628a9f853b84 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -468,7 +468,7 @@ static void init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_PEBS); } - if (c->x86 == 6 && cpu_has_clflush && + if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_CLFLUSH) && (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47)) set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR); diff --git a/arch/x86/kernel/tce_64.c b/arch/x86/kernel/tce_64.c index ab40954e113e..f386bad0984e 100644 --- a/arch/x86/kernel/tce_64.c +++ b/arch/x86/kernel/tce_64.c @@ -40,7 +40,7 @@ static inline void flush_tce(void* tceaddr) { /* a single tce can't cross a cache line */ - if (cpu_has_clflush) + if (boot_cpu_has(X86_FEATURE_CLFLUSH)) clflush(tceaddr); else wbinvd(); diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index fb20c2ee0092..bbf462ff9745 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1460,7 +1460,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, * error case we fall back to cpa_flush_all (which uses * WBINVD): */ - if (!ret && cpu_has_clflush) { + if (!ret && boot_cpu_has(X86_FEATURE_CLFLUSH)) { if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) { cpa_flush_array(addr, numpages, cache, cpa.flags, pages); diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c index 6743ff7dccfa..059f7c39c582 100644 --- a/drivers/gpu/drm/drm_cache.c +++ b/drivers/gpu/drm/drm_cache.c @@ -72,7 +72,7 @@ drm_clflush_pages(struct page *pages[], unsigned long num_pages) { #if defined(CONFIG_X86) - if (cpu_has_clflush) { + if (static_cpu_has(X86_FEATURE_CLFLUSH)) { drm_cache_flush_clflush(pages, num_pages); return; } @@ -105,7 +105,7 @@ void drm_clflush_sg(struct sg_table *st) { #if defined(CONFIG_X86) - if (cpu_has_clflush) { + if (static_cpu_has(X86_FEATURE_CLFLUSH)) { struct sg_page_iter sg_iter; mb(); @@ -129,7 +129,7 @@ void drm_clflush_virt_range(void *addr, unsigned long length) { #if defined(CONFIG_X86) - if (cpu_has_clflush) { + if (static_cpu_has(X86_FEATURE_CLFLUSH)) { const int size = boot_cpu_data.x86_clflush_size; void *end = addr + length; addr = (void *)(((unsigned long)addr) & -size); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 1328bc5021b4..b845f468dd74 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -488,7 +488,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, ret = relocate_entry_cpu(obj, reloc, target_offset); else if (obj->map_and_fenceable) ret = relocate_entry_gtt(obj, reloc, target_offset); - else if (cpu_has_clflush) + else if (static_cpu_has(X86_FEATURE_CLFLUSH)) ret = relocate_entry_clflush(obj, reloc, target_offset); else { WARN_ONCE(1, "Impossible case in relocation handling\n"); -- cgit v1.2.3 From 054efb6467f84490bdf92afab6d9dbd5102e620a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 29 Mar 2016 17:42:00 +0200 Subject: x86/cpufeature: Remove cpu_has_xmm2 Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-crypto@vger.kernel.org Link: http://lkml.kernel.org/r/1459266123-21878-8-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/crypto/poly1305_glue.c | 2 +- arch/x86/crypto/serpent_sse2_glue.c | 2 +- arch/x86/include/asm/cpufeature.h | 1 - arch/x86/kernel/cpu/amd.c | 2 +- arch/x86/kernel/cpu/intel.c | 2 +- arch/x86/lib/usercopy_32.c | 4 ++-- 6 files changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c index 4264a3d59589..b283868acdf8 100644 --- a/arch/x86/crypto/poly1305_glue.c +++ b/arch/x86/crypto/poly1305_glue.c @@ -179,7 +179,7 @@ static struct shash_alg alg = { static int __init poly1305_simd_mod_init(void) { - if (!cpu_has_xmm2) + if (!boot_cpu_has(X86_FEATURE_XMM2)) return -ENODEV; #ifdef CONFIG_AS_AVX2 diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index 8943407e8917..644f97ab8cac 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -600,7 +600,7 @@ static struct crypto_alg serpent_algs[10] = { { static int __init serpent_sse2_init(void) { - if (!cpu_has_xmm2) { + if (!boot_cpu_has(X86_FEATURE_XMM2)) { printk(KERN_INFO "SSE2 instructions are not detected.\n"); return -ENODEV; } diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index a75154232db5..5e02bc2e8444 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -125,7 +125,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC) #define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR) #define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM) -#define cpu_has_xmm2 boot_cpu_has(X86_FEATURE_XMM2) #define cpu_has_aes boot_cpu_has(X86_FEATURE_AES) #define cpu_has_avx boot_cpu_has(X86_FEATURE_AVX) #define cpu_has_avx2 boot_cpu_has(X86_FEATURE_AVX2) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 6e47e3a916f1..ea8f88a2a688 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -750,7 +750,7 @@ static void init_amd(struct cpuinfo_x86 *c) if (c->x86 >= 0xf) set_cpu_cap(c, X86_FEATURE_K8); - if (cpu_has_xmm2) { + if (cpu_has(c, X86_FEATURE_XMM2)) { /* MFENCE stops RDTSC speculation */ set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); } diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 628a9f853b84..1dba36fe73e5 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -456,7 +456,7 @@ static void init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); } - if (cpu_has_xmm2) + if (cpu_has(c, X86_FEATURE_XMM2)) set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); if (boot_cpu_has(X86_FEATURE_DS)) { diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 91d93b95bd86..b559d9238781 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -612,7 +612,7 @@ unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from, { stac(); #ifdef CONFIG_X86_INTEL_USERCOPY - if (n > 64 && cpu_has_xmm2) + if (n > 64 && static_cpu_has(X86_FEATURE_XMM2)) n = __copy_user_zeroing_intel_nocache(to, from, n); else __copy_user_zeroing(to, from, n); @@ -629,7 +629,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr { stac(); #ifdef CONFIG_X86_INTEL_USERCOPY - if (n > 64 && cpu_has_xmm2) + if (n > 64 && static_cpu_has(X86_FEATURE_XMM2)) n = __copy_user_intel_nocache(to, from, n); else __copy_user(to, from, n); -- cgit v1.2.3 From c109bf95992b391bb40bc37c5d309d13fead99b5 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 29 Mar 2016 17:42:02 +0200 Subject: x86/cpufeature: Remove cpu_has_pge Use static_cpu_has() in __flush_tlb_all() due to the time-sensitivity of this one. Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1459266123-21878-10-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeature.h | 1 - arch/x86/include/asm/tlbflush.h | 2 +- arch/x86/kernel/cpu/intel.c | 6 +++--- arch/x86/kernel/cpu/mtrr/cyrix.c | 4 ++-- arch/x86/kernel/cpu/mtrr/generic.c | 4 ++-- arch/x86/mm/init.c | 2 +- arch/x86/xen/enlighten.c | 2 +- drivers/lguest/x86/core.c | 2 +- 8 files changed, 11 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 5e02bc2e8444..f97b53417d44 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -121,7 +121,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) #define cpu_has_pse boot_cpu_has(X86_FEATURE_PSE) #define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC) -#define cpu_has_pge boot_cpu_has(X86_FEATURE_PGE) #define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC) #define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR) #define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index c24b4224d439..3628e6c5ebf4 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -181,7 +181,7 @@ static inline void __native_flush_tlb_single(unsigned long addr) static inline void __flush_tlb_all(void) { - if (cpu_has_pge) + if (static_cpu_has(X86_FEATURE_PGE)) __flush_tlb_global(); else __flush_tlb(); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 1dba36fe73e5..f71a34944b56 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -152,9 +152,9 @@ static void early_init_intel(struct cpuinfo_x86 *c) * the TLB when any changes are made to any of the page table entries. * The operating system must reload CR3 to cause the TLB to be flushed" * - * As a result cpu_has_pge() in arch/x86/include/asm/tlbflush.h should - * be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE - * to be modified + * As a result, boot_cpu_has(X86_FEATURE_PGE) in arch/x86/include/asm/tlbflush.h + * should be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE + * to be modified. */ if (c->x86 == 5 && c->x86_model == 9) { pr_info("Disabling PGE capability bit\n"); diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c index f8c81ba0b465..b1086f79e57e 100644 --- a/arch/x86/kernel/cpu/mtrr/cyrix.c +++ b/arch/x86/kernel/cpu/mtrr/cyrix.c @@ -137,7 +137,7 @@ static void prepare_set(void) u32 cr0; /* Save value of CR4 and clear Page Global Enable (bit 7) */ - if (cpu_has_pge) { + if (boot_cpu_has(X86_FEATURE_PGE)) { cr4 = __read_cr4(); __write_cr4(cr4 & ~X86_CR4_PGE); } @@ -170,7 +170,7 @@ static void post_set(void) write_cr0(read_cr0() & ~X86_CR0_CD); /* Restore value of CR4 */ - if (cpu_has_pge) + if (boot_cpu_has(X86_FEATURE_PGE)) __write_cr4(cr4); } diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 19f57360dfd2..f1bed301bdb2 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -741,7 +741,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock) wbinvd(); /* Save value of CR4 and clear Page Global Enable (bit 7) */ - if (cpu_has_pge) { + if (boot_cpu_has(X86_FEATURE_PGE)) { cr4 = __read_cr4(); __write_cr4(cr4 & ~X86_CR4_PGE); } @@ -771,7 +771,7 @@ static void post_set(void) __releases(set_atomicity_lock) write_cr0(read_cr0() & ~X86_CR0_CD); /* Restore value of CR4 */ - if (cpu_has_pge) + if (boot_cpu_has(X86_FEATURE_PGE)) __write_cr4(cr4); raw_spin_unlock(&set_atomicity_lock); } diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 14377e98f279..05ff46a9c261 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -166,7 +166,7 @@ static void __init probe_page_size_mask(void) cr4_set_bits_and_update_boot(X86_CR4_PSE); /* Enable PGE if available */ - if (cpu_has_pge) { + if (boot_cpu_has(X86_FEATURE_PGE)) { cr4_set_bits_and_update_boot(X86_CR4_PGE); __supported_pte_mask |= _PAGE_GLOBAL; } else diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 880862c7d9dd..055f48ddb03c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1472,7 +1472,7 @@ static void xen_pvh_set_cr_flags(int cpu) if (cpu_has_pse) cr4_set_bits_and_update_boot(X86_CR4_PSE); - if (cpu_has_pge) + if (boot_cpu_has(X86_FEATURE_PGE)) cr4_set_bits_and_update_boot(X86_CR4_PGE); } diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 6a4cd771a2be..65f22debf3c6 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -599,7 +599,7 @@ void __init lguest_arch_host_init(void) * doing this. */ get_online_cpus(); - if (cpu_has_pge) { /* We have a broader idea of "global". */ + if (boot_cpu_has(X86_FEATURE_PGE)) { /* We have a broader idea of "global". */ /* Remember that this was originally set (for cleanup). */ cpu_had_pge = 1; /* -- cgit v1.2.3 From 16bf92261b1b6cb1a1c0671b445a2fcb5a1ecc96 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 29 Mar 2016 17:42:03 +0200 Subject: x86/cpufeature: Remove cpu_has_pse Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1459266123-21878-11-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeature.h | 1 - arch/x86/include/asm/pgtable.h | 2 +- arch/x86/mm/init.c | 4 ++-- arch/x86/mm/init_32.c | 2 +- arch/x86/mm/init_64.c | 4 ++-- arch/x86/mm/ioremap.c | 2 +- arch/x86/power/hibernate_32.c | 2 +- arch/x86/xen/enlighten.c | 2 +- 8 files changed, 9 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index f97b53417d44..97e5f13ea471 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -119,7 +119,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; } while (0) #define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) -#define cpu_has_pse boot_cpu_has(X86_FEATURE_PSE) #define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC) #define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC) #define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 97f3242e133c..f86491a7bc9d 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -183,7 +183,7 @@ static inline int pmd_trans_huge(pmd_t pmd) static inline int has_transparent_hugepage(void) { - return cpu_has_pse; + return boot_cpu_has(X86_FEATURE_PSE); } #ifdef __HAVE_ARCH_PTE_DEVMAP diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 05ff46a9c261..372aad2b3291 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -157,12 +157,12 @@ static void __init probe_page_size_mask(void) * This will simplify cpa(), which otherwise needs to support splitting * large pages into small in interrupt context, etc. */ - if (cpu_has_pse && !debug_pagealloc_enabled()) + if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled()) page_size_mask |= 1 << PG_LEVEL_2M; #endif /* Enable PSE if available */ - if (cpu_has_pse) + if (boot_cpu_has(X86_FEATURE_PSE)) cr4_set_bits_and_update_boot(X86_CR4_PSE); /* Enable PGE if available */ diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index bd7a9b9e2e14..85af914e3d27 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -284,7 +284,7 @@ kernel_physical_mapping_init(unsigned long start, */ mapping_iter = 1; - if (!cpu_has_pse) + if (!boot_cpu_has(X86_FEATURE_PSE)) use_pse = 0; repeat: diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 214afda97911..89d97477c1d9 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1295,7 +1295,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) struct vmem_altmap *altmap = to_vmem_altmap(start); int err; - if (cpu_has_pse) + if (boot_cpu_has(X86_FEATURE_PSE)) err = vmemmap_populate_hugepages(start, end, node, altmap); else if (altmap) { pr_err_once("%s: no cpu support for altmap allocations\n", @@ -1338,7 +1338,7 @@ void register_page_bootmem_memmap(unsigned long section_nr, } get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO); - if (!cpu_has_pse) { + if (!boot_cpu_has(X86_FEATURE_PSE)) { next = (addr + PAGE_SIZE) & PAGE_MASK; pmd = pmd_offset(pud, addr); if (pmd_none(*pmd)) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 5a116ace9cbb..f0894910bdd7 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -386,7 +386,7 @@ int __init arch_ioremap_pud_supported(void) int __init arch_ioremap_pmd_supported(void) { - return cpu_has_pse; + return boot_cpu_has(X86_FEATURE_PSE); } /* diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c index 291226b952a9..9f14bd34581d 100644 --- a/arch/x86/power/hibernate_32.c +++ b/arch/x86/power/hibernate_32.c @@ -106,7 +106,7 @@ static int resume_physical_mapping_init(pgd_t *pgd_base) * normal page tables. * NOTE: We can mark everything as executable here */ - if (cpu_has_pse) { + if (boot_cpu_has(X86_FEATURE_PSE)) { set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); pfn += PTRS_PER_PTE; } else { diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 055f48ddb03c..ff2a2e6ef7af 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1469,7 +1469,7 @@ static void xen_pvh_set_cr_flags(int cpu) * For BSP, PSE PGE are set in probe_page_size_mask(), for APs * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu__init_cpu(). */ - if (cpu_has_pse) + if (boot_cpu_has(X86_FEATURE_PSE)) cr4_set_bits_and_update_boot(X86_CR4_PSE); if (boot_cpu_has(X86_FEATURE_PGE)) -- cgit v1.2.3 From d7847a7017b2a2759dd5590c0cffdbdf2994918e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 1 Apr 2016 09:00:35 +0200 Subject: x86/cpufeature: Fix build bug caused by merge artifact with the removal of cpu_has_hypervisor The 0-day build robot by Fengguang Wu reported a build failure: arch/x86/events//intel/cstate.c: In function 'cstate_pmu_init': arch/x86/events//intel/cstate.c:680:6: error: 'cpu_has_hypervisor' undeclared (first use in this function) ... which was caused by a merge mistake I made when applying the following patch: 0c9f3536cc71 ("x86/cpufeature: Remove cpu_has_hypervisor") apply the missing hunk as well. Reported-by: kbuild test robot Cc: David Kershner Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: sparmaintainer@unisys.com Cc: virtualization@lists.linux-foundation.org Link: http://lkml.kernel.org/r/1459266123-21878-3-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/events/intel/cstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 7946c4231169..d5045c8e2e63 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -677,7 +677,7 @@ static int __init cstate_pmu_init(void) { int err; - if (cpu_has_hypervisor) + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) return -ENODEV; err = cstate_init(); -- cgit v1.2.3 From 0051202f6ad5fd9c04d220343e66d1eb890f7b81 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 7 Apr 2016 17:31:44 -0700 Subject: selftests/x86: Test the FSBASE/GSBASE API and context switching This catches two distinct bugs in the current code. I'll fix them. Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rudolf Marek Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/7e5941148d1e2199f070dadcdf7355959f5f8e85.1460075211.git.luto@kernel.org Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/Makefile | 1 + tools/testing/selftests/x86/fsgsbase.c | 398 +++++++++++++++++++++++++++++++++ 2 files changed, 399 insertions(+) create mode 100644 tools/testing/selftests/x86/fsgsbase.c diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index b47ebd170690..c73425de3cfe 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -9,6 +9,7 @@ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_sysc TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer +TARGETS_C_64BIT_ONLY := fsgsbase TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY) diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c new file mode 100644 index 000000000000..5b2b4b3c634c --- /dev/null +++ b/tools/testing/selftests/x86/fsgsbase.c @@ -0,0 +1,398 @@ +/* + * fsgsbase.c, an fsgsbase test + * Copyright (c) 2014-2016 Andy Lutomirski + * GPL v2 + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef __x86_64__ +# error This test is 64-bit only +#endif + +static volatile sig_atomic_t want_segv; +static volatile unsigned long segv_addr; + +static int nerrs; + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static void clearhandler(int sig) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = SIG_DFL; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static void sigsegv(int sig, siginfo_t *si, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t*)ctx_void; + + if (!want_segv) { + clearhandler(SIGSEGV); + return; /* Crash cleanly. */ + } + + want_segv = false; + segv_addr = (unsigned long)si->si_addr; + + ctx->uc_mcontext.gregs[REG_RIP] += 4; /* Skip the faulting mov */ + +} + +enum which_base { FS, GS }; + +static unsigned long read_base(enum which_base which) +{ + unsigned long offset; + /* + * Unless we have FSGSBASE, there's no direct way to do this from + * user mode. We can get at it indirectly using signals, though. + */ + + want_segv = true; + + offset = 0; + if (which == FS) { + /* Use a constant-length instruction here. */ + asm volatile ("mov %%fs:(%%rcx), %%rax" : : "c" (offset) : "rax"); + } else { + asm volatile ("mov %%gs:(%%rcx), %%rax" : : "c" (offset) : "rax"); + } + if (!want_segv) + return segv_addr + offset; + + /* + * If that didn't segfault, try the other end of the address space. + * Unless we get really unlucky and run into the vsyscall page, this + * is guaranteed to segfault. + */ + + offset = (ULONG_MAX >> 1) + 1; + if (which == FS) { + asm volatile ("mov %%fs:(%%rcx), %%rax" + : : "c" (offset) : "rax"); + } else { + asm volatile ("mov %%gs:(%%rcx), %%rax" + : : "c" (offset) : "rax"); + } + if (!want_segv) + return segv_addr + offset; + + abort(); +} + +static void check_gs_value(unsigned long value) +{ + unsigned long base; + unsigned short sel; + + printf("[RUN]\tARCH_SET_GS to 0x%lx\n", value); + if (syscall(SYS_arch_prctl, ARCH_SET_GS, value) != 0) + err(1, "ARCH_SET_GS"); + + asm volatile ("mov %%gs, %0" : "=rm" (sel)); + base = read_base(GS); + if (base == value) { + printf("[OK]\tGSBASE was set as expected (selector 0x%hx)\n", + sel); + } else { + nerrs++; + printf("[FAIL]\tGSBASE was not as expected: got 0x%lx (selector 0x%hx)\n", + base, sel); + } + + if (syscall(SYS_arch_prctl, ARCH_GET_GS, &base) != 0) + err(1, "ARCH_GET_GS"); + if (base == value) { + printf("[OK]\tARCH_GET_GS worked as expected (selector 0x%hx)\n", + sel); + } else { + nerrs++; + printf("[FAIL]\tARCH_GET_GS was not as expected: got 0x%lx (selector 0x%hx)\n", + base, sel); + } +} + +static void mov_0_gs(unsigned long initial_base, bool schedule) +{ + unsigned long base, arch_base; + + printf("[RUN]\tARCH_SET_GS to 0x%lx then mov 0 to %%gs%s\n", initial_base, schedule ? " and schedule " : ""); + if (syscall(SYS_arch_prctl, ARCH_SET_GS, initial_base) != 0) + err(1, "ARCH_SET_GS"); + + if (schedule) + usleep(10); + + asm volatile ("mov %0, %%gs" : : "rm" (0)); + base = read_base(GS); + if (syscall(SYS_arch_prctl, ARCH_GET_GS, &arch_base) != 0) + err(1, "ARCH_GET_GS"); + if (base == arch_base) { + printf("[OK]\tGSBASE is 0x%lx\n", base); + } else { + nerrs++; + printf("[FAIL]\tGSBASE changed to 0x%lx but kernel reports 0x%lx\n", base, arch_base); + } +} + +static volatile unsigned long remote_base; +static volatile bool remote_hard_zero; +static volatile unsigned int ftx; + +/* + * ARCH_SET_FS/GS(0) may or may not program a selector of zero. HARD_ZERO + * means to force the selector to zero to improve test coverage. + */ +#define HARD_ZERO 0xa1fa5f343cb85fa4 + +static void do_remote_base() +{ + unsigned long to_set = remote_base; + bool hard_zero = false; + if (to_set == HARD_ZERO) { + to_set = 0; + hard_zero = true; + } + + if (syscall(SYS_arch_prctl, ARCH_SET_GS, to_set) != 0) + err(1, "ARCH_SET_GS"); + + if (hard_zero) + asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0)); + + unsigned short sel; + asm volatile ("mov %%gs, %0" : "=rm" (sel)); + printf("\tother thread: ARCH_SET_GS(0x%lx)%s -- sel is 0x%hx\n", + to_set, hard_zero ? " and clear gs" : "", sel); +} + +void do_unexpected_base(void) +{ + /* + * The goal here is to try to arrange for GS == 0, GSBASE != + * 0, and for the the kernel the think that GSBASE == 0. + * + * To make the test as reliable as possible, this uses + * explicit descriptorss. (This is not the only way. This + * could use ARCH_SET_GS with a low, nonzero base, but the + * relevant side effect of ARCH_SET_GS could change.) + */ + + /* Step 1: tell the kernel that we have GSBASE == 0. */ + if (syscall(SYS_arch_prctl, ARCH_SET_GS, 0) != 0) + err(1, "ARCH_SET_GS"); + + /* Step 2: change GSBASE without telling the kernel. */ + struct user_desc desc = { + .entry_number = 0, + .base_addr = 0xBAADF00D, + .limit = 0xfffff, + .seg_32bit = 1, + .contents = 0, /* Data, grow-up */ + .read_exec_only = 0, + .limit_in_pages = 1, + .seg_not_present = 0, + .useable = 0 + }; + if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) == 0) { + printf("\tother thread: using LDT slot 0\n"); + asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0x7)); + } else { + /* No modify_ldt for us (configured out, perhaps) */ + + struct user_desc *low_desc = mmap( + NULL, sizeof(desc), + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0); + memcpy(low_desc, &desc, sizeof(desc)); + + low_desc->entry_number = -1; + + /* 32-bit set_thread_area */ + long ret; + asm volatile ("int $0x80" + : "=a" (ret) : "a" (243), "b" (low_desc) + : "flags"); + memcpy(&desc, low_desc, sizeof(desc)); + munmap(low_desc, sizeof(desc)); + + if (ret != 0) { + printf("[NOTE]\tcould not create a segment -- test won't do anything\n"); + return; + } + printf("\tother thread: using GDT slot %d\n", desc.entry_number); + asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)((desc.entry_number << 3) | 0x3))); + } + + /* + * Step 3: set the selector back to zero. On AMD chips, this will + * preserve GSBASE. + */ + + asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0)); +} + +static void *threadproc(void *ctx) +{ + while (1) { + while (ftx == 0) + syscall(SYS_futex, &ftx, FUTEX_WAIT, 0, NULL, NULL, 0); + if (ftx == 3) + return NULL; + + if (ftx == 1) + do_remote_base(); + else if (ftx == 2) + do_unexpected_base(); + else + errx(1, "helper thread got bad command"); + + ftx = 0; + syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); + } +} + +static void set_gs_and_switch_to(unsigned long local, unsigned long remote) +{ + unsigned long base; + + bool hard_zero = false; + if (local == HARD_ZERO) { + hard_zero = true; + local = 0; + } + + printf("[RUN]\tARCH_SET_GS(0x%lx)%s, then schedule to 0x%lx\n", + local, hard_zero ? " and clear gs" : "", remote); + if (syscall(SYS_arch_prctl, ARCH_SET_GS, local) != 0) + err(1, "ARCH_SET_GS"); + if (hard_zero) + asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0)); + + if (read_base(GS) != local) { + nerrs++; + printf("[FAIL]\tGSBASE wasn't set as expected\n"); + } + + remote_base = remote; + ftx = 1; + syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); + while (ftx != 0) + syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0); + + base = read_base(GS); + if (base == local) { + printf("[OK]\tGSBASE remained 0x%lx\n", local); + } else { + nerrs++; + printf("[FAIL]\tGSBASE changed to 0x%lx\n", base); + } +} + +static void test_unexpected_base(void) +{ + unsigned long base; + + printf("[RUN]\tARCH_SET_GS(0), clear gs, then manipulate GSBASE in a different thread\n"); + if (syscall(SYS_arch_prctl, ARCH_SET_GS, 0) != 0) + err(1, "ARCH_SET_GS"); + asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0)); + + ftx = 2; + syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); + while (ftx != 0) + syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0); + + base = read_base(GS); + if (base == 0) { + printf("[OK]\tGSBASE remained 0\n"); + } else { + nerrs++; + printf("[FAIL]\tGSBASE changed to 0x%lx\n", base); + } +} + +int main() +{ + pthread_t thread; + + sethandler(SIGSEGV, sigsegv, 0); + + check_gs_value(0); + check_gs_value(1); + check_gs_value(0x200000000); + check_gs_value(0); + check_gs_value(0x200000000); + check_gs_value(1); + + for (int sched = 0; sched < 2; sched++) { + mov_0_gs(0, !!sched); + mov_0_gs(1, !!sched); + mov_0_gs(0x200000000, !!sched); + } + + /* Set up for multithreading. */ + + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) + err(1, "sched_setaffinity to CPU 0"); /* should never fail */ + + if (pthread_create(&thread, 0, threadproc, 0) != 0) + err(1, "pthread_create"); + + static unsigned long bases_with_hard_zero[] = { + 0, HARD_ZERO, 1, 0x200000000, + }; + + for (int local = 0; local < 4; local++) { + for (int remote = 0; remote < 4; remote++) { + set_gs_and_switch_to(bases_with_hard_zero[local], + bases_with_hard_zero[remote]); + } + } + + test_unexpected_base(); + + ftx = 3; /* Kill the thread. */ + syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); + + if (pthread_join(thread, NULL) != 0) + err(1, "pthread_join"); + + return nerrs == 0 ? 0 : 1; +} -- cgit v1.2.3 From d47b50e7a111bb7a56fb1c974728b56209d7f515 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 7 Apr 2016 17:31:45 -0700 Subject: x86/arch_prctl: Fix ARCH_GET_FS and ARCH_GET_GS ARCH_GET_FS and ARCH_GET_GS attempted to figure out the fsbase and gsbase respectively from saved thread state. This was wrong: fsbase and gsbase live in registers while a thread is running, not in memory. For reasons I can't fathom, the fsbase and gsbase code were different. Since neither was correct, I didn't try to figure out what the point of the difference was. Change it to simply read the MSRs. The code for reading the base for a remote thread is also completely wrong if the target thread uses its own descriptors (which is the case for all 32-bit threaded programs), but fixing that is a different story. Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rudolf Marek Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/c6e7b507c72ca3bdbf6c7a8a3ceaa0334e873bd9.1460075211.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_64.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 6cbab31ac23a..c671b9b015c0 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -566,10 +566,10 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) break; case ARCH_GET_FS: { unsigned long base; - if (task->thread.fsindex == FS_TLS_SEL) - base = read_32bit_tls(task, FS_TLS); - else if (doit) + if (doit) rdmsrl(MSR_FS_BASE, base); + else if (task->thread.fsindex == FS_TLS_SEL) + base = read_32bit_tls(task, FS_TLS); else base = task->thread.fs; ret = put_user(base, (unsigned long __user *)addr); @@ -577,16 +577,11 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) } case ARCH_GET_GS: { unsigned long base; - unsigned gsindex; - if (task->thread.gsindex == GS_TLS_SEL) + if (doit) + rdmsrl(MSR_KERNEL_GS_BASE, base); + else if (task->thread.gsindex == GS_TLS_SEL) base = read_32bit_tls(task, GS_TLS); - else if (doit) { - savesegment(gs, gsindex); - if (gsindex) - rdmsrl(MSR_KERNEL_GS_BASE, base); - else - base = task->thread.gs; - } else + else base = task->thread.gs; ret = put_user(base, (unsigned long __user *)addr); break; -- cgit v1.2.3 From 7a5d67048745e3eab62779c6d043a2e3d95dc848 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 7 Apr 2016 17:31:46 -0700 Subject: x86/cpu: Probe the behavior of nulling out a segment at boot time AMD and Intel do different things when writing zero to a segment selector. Since neither vendor documents the behavior well and it's easy to test the behavior, try nulling fs to see what happens. Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rudolf Marek Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/61588ba0e0df35beafd363dc8b68a4c5878ef095.1460075211.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/common.c | 31 +++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 8f9afefd2dc5..2a052302bc43 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -294,6 +294,7 @@ #define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ #define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ #define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ +#define X86_BUG_NULL_SEG X86_BUG(9) /* Nulling a selector preserves the base */ #ifdef CONFIG_X86_32 /* diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7fea4079d102..8e40eee5843a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -889,6 +889,35 @@ static void detect_nopl(struct cpuinfo_x86 *c) #endif } +static void detect_null_seg_behavior(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_X86_64 + /* + * Empirically, writing zero to a segment selector on AMD does + * not clear the base, whereas writing zero to a segment + * selector on Intel does clear the base. Intel's behavior + * allows slightly faster context switches in the common case + * where GS is unused by the prev and next threads. + * + * Since neither vendor documents this anywhere that I can see, + * detect it directly instead of hardcoding the choice by + * vendor. + * + * I've designated AMD's behavior as the "bug" because it's + * counterintuitive and less friendly. + */ + + unsigned long old_base, tmp; + rdmsrl(MSR_FS_BASE, old_base); + wrmsrl(MSR_FS_BASE, 1); + loadsegment(fs, 0); + rdmsrl(MSR_FS_BASE, tmp); + if (tmp != 0) + set_cpu_bug(c, X86_BUG_NULL_SEG); + wrmsrl(MSR_FS_BASE, old_base); +#endif +} + static void generic_identify(struct cpuinfo_x86 *c) { c->extended_cpuid_level = 0; @@ -921,6 +950,8 @@ static void generic_identify(struct cpuinfo_x86 *c) get_model_name(c); /* Default name */ detect_nopl(c); + + detect_null_seg_behavior(c); } static void x86_init_cache_qos(struct cpuinfo_x86 *c) -- cgit v1.2.3 From 3e2b68d752c9e09c40d76442aa94d3b8e421b0f1 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 7 Apr 2016 17:31:47 -0700 Subject: x86/asm, sched/x86: Rewrite the FS and GS context switch code The old code was incomprehensible and was buggy on AMD CPUs. Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rudolf Marek Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/5f6bde874c6fe6831c6711b5b1522a238ba035b4.1460075211.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_64.c | 148 +++++++++++++++++++++++++++---------------- 1 file changed, 93 insertions(+), 55 deletions(-) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index c671b9b015c0..50337eac1ca2 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -282,7 +282,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); struct tss_struct *tss = &per_cpu(cpu_tss, cpu); - unsigned fsindex, gsindex; + unsigned prev_fsindex, prev_gsindex; fpu_switch_t fpu_switch; fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu); @@ -292,8 +292,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * * (e.g. xen_load_tls()) */ - savesegment(fs, fsindex); - savesegment(gs, gsindex); + savesegment(fs, prev_fsindex); + savesegment(gs, prev_gsindex); /* * Load TLS before restoring any segments so that segment loads @@ -336,66 +336,104 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * Switch FS and GS. * * These are even more complicated than DS and ES: they have - * 64-bit bases are that controlled by arch_prctl. Those bases - * only differ from the values in the GDT or LDT if the selector - * is 0. + * 64-bit bases are that controlled by arch_prctl. The bases + * don't necessarily match the selectors, as user code can do + * any number of things to cause them to be inconsistent. * - * Loading the segment register resets the hidden base part of - * the register to 0 or the value from the GDT / LDT. If the - * next base address zero, writing 0 to the segment register is - * much faster than using wrmsr to explicitly zero the base. + * We don't promise to preserve the bases if the selectors are + * nonzero. We also don't promise to preserve the base if the + * selector is zero and the base doesn't match whatever was + * most recently passed to ARCH_SET_FS/GS. (If/when the + * FSGSBASE instructions are enabled, we'll need to offer + * stronger guarantees.) * - * The thread_struct.fs and thread_struct.gs values are 0 - * if the fs and gs bases respectively are not overridden - * from the values implied by fsindex and gsindex. They - * are nonzero, and store the nonzero base addresses, if - * the bases are overridden. - * - * (fs != 0 && fsindex != 0) || (gs != 0 && gsindex != 0) should - * be impossible. - * - * Therefore we need to reload the segment registers if either - * the old or new selector is nonzero, and we need to override - * the base address if next thread expects it to be overridden. - * - * This code is unnecessarily slow in the case where the old and - * new indexes are zero and the new base is nonzero -- it will - * unnecessarily write 0 to the selector before writing the new - * base address. - * - * Note: This all depends on arch_prctl being the only way that - * user code can override the segment base. Once wrfsbase and - * wrgsbase are enabled, most of this code will need to change. + * As an invariant, + * (fs != 0 && fsindex != 0) || (gs != 0 && gsindex != 0) is + * impossible. */ - if (unlikely(fsindex | next->fsindex | prev->fs)) { + if (next->fsindex) { + /* Loading a nonzero value into FS sets the index and base. */ loadsegment(fs, next->fsindex); - - /* - * If user code wrote a nonzero value to FS, then it also - * cleared the overridden base address. - * - * XXX: if user code wrote 0 to FS and cleared the base - * address itself, we won't notice and we'll incorrectly - * restore the prior base address next time we reschdule - * the process. - */ - if (fsindex) - prev->fs = 0; + } else { + if (next->fs) { + /* Next index is zero but next base is nonzero. */ + if (prev_fsindex) + loadsegment(fs, 0); + wrmsrl(MSR_FS_BASE, next->fs); + } else { + /* Next base and index are both zero. */ + if (static_cpu_has_bug(X86_BUG_NULL_SEG)) { + /* + * We don't know the previous base and can't + * find out without RDMSR. Forcibly clear it. + */ + loadsegment(fs, __USER_DS); + loadsegment(fs, 0); + } else { + /* + * If the previous index is zero and ARCH_SET_FS + * didn't change the base, then the base is + * also zero and we don't need to do anything. + */ + if (prev->fs || prev_fsindex) + loadsegment(fs, 0); + } + } } - if (next->fs) - wrmsrl(MSR_FS_BASE, next->fs); - prev->fsindex = fsindex; + /* + * Save the old state and preserve the invariant. + * NB: if prev_fsindex == 0, then we can't reliably learn the base + * without RDMSR because Intel user code can zero it without telling + * us and AMD user code can program any 32-bit value without telling + * us. + */ + if (prev_fsindex) + prev->fs = 0; + prev->fsindex = prev_fsindex; - if (unlikely(gsindex | next->gsindex | prev->gs)) { + if (next->gsindex) { + /* Loading a nonzero value into GS sets the index and base. */ load_gs_index(next->gsindex); - - /* This works (and fails) the same way as fsindex above. */ - if (gsindex) - prev->gs = 0; + } else { + if (next->gs) { + /* Next index is zero but next base is nonzero. */ + if (prev_gsindex) + load_gs_index(0); + wrmsrl(MSR_KERNEL_GS_BASE, next->gs); + } else { + /* Next base and index are both zero. */ + if (static_cpu_has_bug(X86_BUG_NULL_SEG)) { + /* + * We don't know the previous base and can't + * find out without RDMSR. Forcibly clear it. + * + * This contains a pointless SWAPGS pair. + * Fixing it would involve an explicit check + * for Xen or a new pvop. + */ + load_gs_index(__USER_DS); + load_gs_index(0); + } else { + /* + * If the previous index is zero and ARCH_SET_GS + * didn't change the base, then the base is + * also zero and we don't need to do anything. + */ + if (prev->gs || prev_gsindex) + load_gs_index(0); + } + } } - if (next->gs) - wrmsrl(MSR_KERNEL_GS_BASE, next->gs); - prev->gsindex = gsindex; + /* + * Save the old state and preserve the invariant. + * NB: if prev_gsindex == 0, then we can't reliably learn the base + * without RDMSR because Intel user code can zero it without telling + * us and AMD user code can program any 32-bit value without telling + * us. + */ + if (prev_gsindex) + prev->gs = 0; + prev->gsindex = prev_gsindex; switch_fpu_finish(next_fpu, fpu_switch); -- cgit v1.2.3 From 0230bb038fa99af0c425fc4cffed307e545a9642 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 7 Apr 2016 17:31:48 -0700 Subject: x86/cpu: Move X86_BUG_ESPFIX initialization to generic_identify() It was in detect_nopl(), which was either a mistake by me or some kind of mis-merge. Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rudolf Marek Cc: Thomas Gleixner Fixes: ff236456f072 ("x86/cpu: Move X86_BUG_ESPFIX initialization to generic_identify") Link: http://lkml.kernel.org/r/0949337f13660461edca08ab67d1a841441289c9.1460075211.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 50 ++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8e40eee5843a..28d3255edf00 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -861,31 +861,6 @@ static void detect_nopl(struct cpuinfo_x86 *c) clear_cpu_cap(c, X86_FEATURE_NOPL); #else set_cpu_cap(c, X86_FEATURE_NOPL); -#endif - - /* - * ESPFIX is a strange bug. All real CPUs have it. Paravirt - * systems that run Linux at CPL > 0 may or may not have the - * issue, but, even if they have the issue, there's absolutely - * nothing we can do about it because we can't use the real IRET - * instruction. - * - * NB: For the time being, only 32-bit kernels support - * X86_BUG_ESPFIX as such. 64-bit kernels directly choose - * whether to apply espfix using paravirt hooks. If any - * non-paravirt system ever shows up that does *not* have the - * ESPFIX issue, we can change this. - */ -#ifdef CONFIG_X86_32 -#ifdef CONFIG_PARAVIRT - do { - extern void native_iret(void); - if (pv_cpu_ops.iret == native_iret) - set_cpu_bug(c, X86_BUG_ESPFIX); - } while (0); -#else - set_cpu_bug(c, X86_BUG_ESPFIX); -#endif #endif } @@ -952,6 +927,31 @@ static void generic_identify(struct cpuinfo_x86 *c) detect_nopl(c); detect_null_seg_behavior(c); + + /* + * ESPFIX is a strange bug. All real CPUs have it. Paravirt + * systems that run Linux at CPL > 0 may or may not have the + * issue, but, even if they have the issue, there's absolutely + * nothing we can do about it because we can't use the real IRET + * instruction. + * + * NB: For the time being, only 32-bit kernels support + * X86_BUG_ESPFIX as such. 64-bit kernels directly choose + * whether to apply espfix using paravirt hooks. If any + * non-paravirt system ever shows up that does *not* have the + * ESPFIX issue, we can change this. + */ +#ifdef CONFIG_X86_32 +# ifdef CONFIG_PARAVIRT + do { + extern void native_iret(void); + if (pv_cpu_ops.iret == native_iret) + set_cpu_bug(c, X86_BUG_ESPFIX); + } while (0); +# else + set_cpu_bug(c, X86_BUG_ESPFIX); +# endif +#endif } static void x86_init_cache_qos(struct cpuinfo_x86 *c) -- cgit v1.2.3 From 96e5d28ae7a5250f3deb2434f1895c9daf48b1bd Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 7 Apr 2016 17:31:49 -0700 Subject: x86/cpu: Add Erratum 88 detection on AMD Erratum 88 affects old AMD K8s, where a SWAPGS fails to cause an input dependency on GS. Therefore, we need to MFENCE before it. But that MFENCE is expensive and unnecessary on the remaining x86 CPUs out there so patch it out on the CPUs which don't require it. Signed-off-by: Borislav Petkov Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rudolf Marek Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/aec6b2df1bfc56101d4e9e2e5d5d570bf41663c6.1460075211.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 2 +- arch/x86/include/asm/cpufeatures.h | 2 ++ arch/x86/kernel/cpu/amd.c | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 858b555e274b..64d2033d1e49 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -783,7 +783,7 @@ ENTRY(native_load_gs_index) SWAPGS gs_change: movl %edi, %gs -2: mfence /* workaround */ +2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE SWAPGS popfq ret diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 2a052302bc43..7bfb6b70c745 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -295,6 +295,8 @@ #define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ #define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ #define X86_BUG_NULL_SEG X86_BUG(9) /* Nulling a selector preserves the base */ +#define X86_BUG_SWAPGS_FENCE X86_BUG(10) /* SWAPGS without input dep on GS */ + #ifdef CONFIG_X86_32 /* diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 6e47e3a916f1..b7cc9efe08b5 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -632,6 +632,7 @@ static void init_amd_k8(struct cpuinfo_x86 *c) */ msr_set_bit(MSR_K7_HWCR, 6); #endif + set_cpu_bug(c, X86_BUG_SWAPGS_FENCE); } static void init_amd_gh(struct cpuinfo_x86 *c) -- cgit v1.2.3 From 42c748bb2544f21c3d115240527fe4478e193641 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 7 Apr 2016 17:31:50 -0700 Subject: x86/entry/64: Make gs_change a local label ... so that it doesn't appear in objdump output. Signed-off-by: Borislav Petkov Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rudolf Marek Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/b9c532a0e5f8d56dede2bd59767d40024d5a75e2.1460075211.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 64d2033d1e49..1693c17dbf81 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -781,7 +781,7 @@ ENTRY(native_load_gs_index) pushfq DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) SWAPGS -gs_change: +.Lgs_change: movl %edi, %gs 2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE SWAPGS @@ -789,7 +789,7 @@ gs_change: ret END(native_load_gs_index) - _ASM_EXTABLE(gs_change, bad_gs) + _ASM_EXTABLE(.Lgs_change, bad_gs) .section .fixup, "ax" /* running with kernelgs */ bad_gs: @@ -1019,13 +1019,13 @@ ENTRY(error_entry) movl %ecx, %eax /* zero extend */ cmpq %rax, RIP+8(%rsp) je .Lbstep_iret - cmpq $gs_change, RIP+8(%rsp) + cmpq $.Lgs_change, RIP+8(%rsp) jne .Lerror_entry_done /* - * hack: gs_change can fail with user gsbase. If this happens, fix up + * hack: .Lgs_change can fail with user gsbase. If this happens, fix up * gsbase and proceed. We'll fix up the exception and land in - * gs_change's error handler with kernel gsbase. + * .Lgs_change's error handler with kernel gsbase. */ jmp .Lerror_entry_from_usermode_swapgs -- cgit v1.2.3 From 1ed95e52d902035e39a715ff3a314a893a96e5b7 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 7 Apr 2016 17:16:59 -0700 Subject: x86/vdso: Remove direct HPET access through the vDSO Allowing user code to map the HPET is problematic. HPET implementations are notoriously buggy, and there are probably many machines on which even MMIO reads from bogus HPET addresses are problematic. We have a report that the Dell Precision M2800 with: ACPI: HPET 0x00000000C8FE6238 000038 (v01 DELL CBX3 01072009 AMI. 00000005) is either so slow when accessing the HPET or actually hangs in some regard, causing soft lockups to be reported if users do unexpected things to the HPET. The vclock HPET code has also always been a questionable speedup. Accessing an HPET is exceedingly slow (on the order of several microseconds), so the added overhead in requiring a syscall to read the HPET is a small fraction of the total code of accessing it. To avoid future problems, let's just delete the code entirely. In the long run, this could actually be a speedup. Waiman Long as a patch to optimize the case where multiple CPUs contend for the HPET, but that won't help unless all the accesses are mediated by the kernel. Reported-by: Rasmus Villemoes Signed-off-by: Andy Lutomirski Reviewed-by: Thomas Gleixner Acked-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Waiman Long Cc: Waiman Long Link: http://lkml.kernel.org/r/d2f90bba98db9905041cff294646d290d378f67a.1460074438.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/vdso/vclock_gettime.c | 15 --------------- arch/x86/entry/vdso/vdso-layout.lds.S | 5 ++--- arch/x86/entry/vdso/vma.c | 11 ----------- arch/x86/include/asm/clocksource.h | 9 ++++----- arch/x86/kernel/hpet.c | 1 - arch/x86/kvm/trace.h | 3 +-- 6 files changed, 7 insertions(+), 37 deletions(-) diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 03c3eb77bfce..2f02d23a05ef 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -13,7 +13,6 @@ #include #include -#include #include #include #include @@ -28,16 +27,6 @@ extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts); extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); extern time_t __vdso_time(time_t *t); -#ifdef CONFIG_HPET_TIMER -extern u8 hpet_page - __attribute__((visibility("hidden"))); - -static notrace cycle_t vread_hpet(void) -{ - return *(const volatile u32 *)(&hpet_page + HPET_COUNTER); -} -#endif - #ifdef CONFIG_PARAVIRT_CLOCK extern u8 pvclock_page __attribute__((visibility("hidden"))); @@ -195,10 +184,6 @@ notrace static inline u64 vgetsns(int *mode) if (gtod->vclock_mode == VCLOCK_TSC) cycles = vread_tsc(); -#ifdef CONFIG_HPET_TIMER - else if (gtod->vclock_mode == VCLOCK_HPET) - cycles = vread_hpet(); -#endif #ifdef CONFIG_PARAVIRT_CLOCK else if (gtod->vclock_mode == VCLOCK_PVCLOCK) cycles = vread_pvclock(mode); diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S index 4158acc17df0..a708aa90b507 100644 --- a/arch/x86/entry/vdso/vdso-layout.lds.S +++ b/arch/x86/entry/vdso/vdso-layout.lds.S @@ -25,7 +25,7 @@ SECTIONS * segment. */ - vvar_start = . - 3 * PAGE_SIZE; + vvar_start = . - 2 * PAGE_SIZE; vvar_page = vvar_start; /* Place all vvars at the offsets in asm/vvar.h. */ @@ -35,8 +35,7 @@ SECTIONS #undef __VVAR_KERNEL_LDS #undef EMIT_VVAR - hpet_page = vvar_start + PAGE_SIZE; - pvclock_page = vvar_start + 2 * PAGE_SIZE; + pvclock_page = vvar_start + PAGE_SIZE; . = SIZEOF_HEADERS; diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 10f704584922..b3cf81333a54 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include @@ -129,16 +128,6 @@ static int vvar_fault(const struct vm_special_mapping *sm, if (sym_offset == image->sym_vvar_page) { ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, __pa_symbol(&__vvar_page) >> PAGE_SHIFT); - } else if (sym_offset == image->sym_hpet_page) { -#ifdef CONFIG_HPET_TIMER - if (hpet_address && vclock_was_used(VCLOCK_HPET)) { - ret = vm_insert_pfn_prot( - vma, - (unsigned long)vmf->virtual_address, - hpet_address >> PAGE_SHIFT, - pgprot_noncached(PAGE_READONLY)); - } -#endif } else if (sym_offset == image->sym_pvclock_page) { struct pvclock_vsyscall_time_info *pvti = pvclock_pvti_cpu0_va(); diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h index d194266acb28..eae33c7170c8 100644 --- a/arch/x86/include/asm/clocksource.h +++ b/arch/x86/include/asm/clocksource.h @@ -3,11 +3,10 @@ #ifndef _ASM_X86_CLOCKSOURCE_H #define _ASM_X86_CLOCKSOURCE_H -#define VCLOCK_NONE 0 /* No vDSO clock available. */ -#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */ -#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */ -#define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */ -#define VCLOCK_MAX 3 +#define VCLOCK_NONE 0 /* No vDSO clock available. */ +#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */ +#define VCLOCK_PVCLOCK 2 /* vDSO should use vread_pvclock. */ +#define VCLOCK_MAX 2 struct arch_clocksource_data { int vclock_mode; diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index a1f0e4a5c47e..7282c2e3858e 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -773,7 +773,6 @@ static struct clocksource clocksource_hpet = { .mask = HPET_MASK, .flags = CLOCK_SOURCE_IS_CONTINUOUS, .resume = hpet_resume_counter, - .archdata = { .vclock_mode = VCLOCK_HPET }, }; static int hpet_clocksource_register(void) diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 2f1ea2f61e1f..b72743c5668d 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -809,8 +809,7 @@ TRACE_EVENT(kvm_write_tsc_offset, #define host_clocks \ {VCLOCK_NONE, "none"}, \ - {VCLOCK_TSC, "tsc"}, \ - {VCLOCK_HPET, "hpet"} \ + {VCLOCK_TSC, "tsc"} \ TRACE_EVENT(kvm_update_master_clock, TP_PROTO(bool use_master_clock, unsigned int host_clock, bool offset_matched), -- cgit v1.2.3 From 482dd2ef124484601adea82e5e806e81e2bc5521 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 Apr 2016 22:43:59 +0200 Subject: x86/syscalls: Wire up compat readv2/writev2 syscalls Reported-by: David Smith Tested-by: David Smith Signed-off-by: Christoph Hellwig Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160407204359.GA3720@lst.de Signed-off-by: Ingo Molnar --- arch/x86/entry/syscalls/syscall_32.tbl | 4 ++-- arch/x86/entry/syscalls/syscall_64.tbl | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index b30dd8154cc2..4cddd17153fb 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -384,5 +384,5 @@ 375 i386 membarrier sys_membarrier 376 i386 mlock2 sys_mlock2 377 i386 copy_file_range sys_copy_file_range -378 i386 preadv2 sys_preadv2 -379 i386 pwritev2 sys_pwritev2 +378 i386 preadv2 sys_preadv2 compat_sys_preadv2 +379 i386 pwritev2 sys_pwritev2 compat_sys_pwritev2 diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index cac6d17ce5db..555263e385c9 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -374,3 +374,5 @@ 543 x32 io_setup compat_sys_io_setup 544 x32 io_submit compat_sys_io_submit 545 x32 execveat compat_sys_execveat/ptregs +534 x32 preadv2 compat_sys_preadv2 +535 x32 pwritev2 compat_sys_pwritev2 -- cgit v1.2.3 From 1886297ce0c8d563a08c8a8c4c0b97743e06cd37 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Mon, 11 Apr 2016 13:36:00 -0600 Subject: x86/mm/pat: Fix BUG_ON() in mmap_mem() on QEMU/i386 The following BUG_ON() crash was reported on QEMU/i386: kernel BUG at arch/x86/mm/physaddr.c:79! Call Trace: phys_mem_access_prot_allowed mmap_mem ? mmap_region mmap_region do_mmap vm_mmap_pgoff SyS_mmap_pgoff do_int80_syscall_32 entry_INT80_32 after commit: edfe63ec97ed ("x86/mtrr: Fix Xorg crashes in Qemu sessions") PAT is now set to disabled state when MTRRs are disabled. Thus, reactivating the __pa(high_memory) check in phys_mem_access_prot_allowed(). When CONFIG_DEBUG_VIRTUAL is set, __pa() calls __phys_addr(), which in turn calls slow_virt_to_phys() for 'high_memory'. Because 'high_memory' is set to (the max direct mapped virt addr + 1), it is not a valid virtual address. Hence, slow_virt_to_phys() returns 0 and hit the BUG_ON. Using __pa_nodebug() instead of __pa() will fix this BUG_ON. However, this code block, originally written for Pentiums and earlier, is no longer adequate since a 32-bit Xen guest has MTRRs disabled and supports ZONE_HIGHMEM. In this setup, this code sets UC attribute for accessing RAM in high memory range. Delete this code block as it has been unused for a long time. Reported-by: kernel test robot Reviewed-by: Borislav Petkov Signed-off-by: Toshi Kani Cc: Andrew Morton Cc: David Vrabel Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1460403360-25441-1-git-send-email-toshi.kani@hpe.com Link: https://lkml.org/lkml/2016/4/1/608 Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index c4c3ddcc9069..fb0604f11eec 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -778,25 +778,6 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, if (file->f_flags & O_DSYNC) pcm = _PAGE_CACHE_MODE_UC_MINUS; -#ifdef CONFIG_X86_32 - /* - * On the PPro and successors, the MTRRs are used to set - * memory types for physical addresses outside main memory, - * so blindly setting UC or PWT on those pages is wrong. - * For Pentiums and earlier, the surround logic should disable - * caching for the high addresses through the KEN pin, but - * we maintain the tradition of paranoia in this code. - */ - if (!pat_enabled() && - !(boot_cpu_has(X86_FEATURE_MTRR) || - boot_cpu_has(X86_FEATURE_K6_MTRR) || - boot_cpu_has(X86_FEATURE_CYRIX_ARR) || - boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) && - (pfn << PAGE_SHIFT) >= __pa(high_memory)) { - pcm = _PAGE_CACHE_MODE_UC; - } -#endif - *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) | cachemode2protval(pcm)); return 1; -- cgit v1.2.3 From abcfdfe07de75f830cbec1aa3eb17833a0166697 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 4 Apr 2016 22:24:54 +0200 Subject: x86/cpufeature: Replace cpu_has_avx2 with boot_cpu_has() usage Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-crypto@vger.kernel.org Link: http://lkml.kernel.org/r/1459801503-15600-2-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/crypto/camellia_aesni_avx2_glue.c | 2 +- arch/x86/crypto/chacha20_glue.c | 2 +- arch/x86/crypto/poly1305_glue.c | 2 +- arch/x86/crypto/serpent_avx2_glue.c | 2 +- arch/x86/include/asm/cpufeature.h | 1 - 5 files changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index c37f7028c85a..39389662e29b 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -562,7 +562,7 @@ static int __init camellia_aesni_init(void) { const char *feature_name; - if (!cpu_has_avx2 || !cpu_has_avx || !cpu_has_aes || + if (!boot_cpu_has(X86_FEATURE_AVX2) || !cpu_has_avx || !cpu_has_aes || !boot_cpu_has(X86_FEATURE_OSXSAVE)) { pr_info("AVX2 or AES-NI instructions are not detected.\n"); return -ENODEV; diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c index 8baaff5af0b5..cea061e137da 100644 --- a/arch/x86/crypto/chacha20_glue.c +++ b/arch/x86/crypto/chacha20_glue.c @@ -129,7 +129,7 @@ static int __init chacha20_simd_mod_init(void) return -ENODEV; #ifdef CONFIG_AS_AVX2 - chacha20_use_avx2 = cpu_has_avx && cpu_has_avx2 && + chacha20_use_avx2 = cpu_has_avx && boot_cpu_has(X86_FEATURE_AVX2) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); #endif return crypto_register_alg(&alg); diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c index b283868acdf8..ea21d2e440f7 100644 --- a/arch/x86/crypto/poly1305_glue.c +++ b/arch/x86/crypto/poly1305_glue.c @@ -183,7 +183,7 @@ static int __init poly1305_simd_mod_init(void) return -ENODEV; #ifdef CONFIG_AS_AVX2 - poly1305_use_avx2 = cpu_has_avx && cpu_has_avx2 && + poly1305_use_avx2 = cpu_has_avx && boot_cpu_has(X86_FEATURE_AVX2) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); alg.descsize = sizeof(struct poly1305_simd_desc_ctx); if (poly1305_use_avx2) diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index 408cae2b3543..870f6d812a2d 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -538,7 +538,7 @@ static int __init init(void) { const char *feature_name; - if (!cpu_has_avx2 || !boot_cpu_has(X86_FEATURE_OSXSAVE)) { + if (!boot_cpu_has(X86_FEATURE_AVX2) || !boot_cpu_has(X86_FEATURE_OSXSAVE)) { pr_info("AVX2 instructions are not detected.\n"); return -ENODEV; } diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index c594e04bf529..810166530cbf 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -125,7 +125,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM) #define cpu_has_aes boot_cpu_has(X86_FEATURE_AES) #define cpu_has_avx boot_cpu_has(X86_FEATURE_AVX) -#define cpu_has_avx2 boot_cpu_has(X86_FEATURE_AVX2) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #define cpu_has_xsaves boot_cpu_has(X86_FEATURE_XSAVES) /* -- cgit v1.2.3 From 1f4dd7938ea575a2d1972e180eaef31e6edb1808 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 4 Apr 2016 22:24:55 +0200 Subject: x86/cpufeature: Replace cpu_has_aes with boot_cpu_has() usage Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-crypto@vger.kernel.org Link: http://lkml.kernel.org/r/1459801503-15600-3-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/crypto/camellia_aesni_avx2_glue.c | 3 ++- arch/x86/crypto/camellia_aesni_avx_glue.c | 4 +++- arch/x86/include/asm/cpufeature.h | 1 - 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index 39389662e29b..c07f699826a0 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -562,7 +562,8 @@ static int __init camellia_aesni_init(void) { const char *feature_name; - if (!boot_cpu_has(X86_FEATURE_AVX2) || !cpu_has_avx || !cpu_has_aes || + if (!boot_cpu_has(X86_FEATURE_AVX2) || !cpu_has_avx || + !boot_cpu_has(X86_FEATURE_AES) || !boot_cpu_has(X86_FEATURE_OSXSAVE)) { pr_info("AVX2 or AES-NI instructions are not detected.\n"); return -ENODEV; diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index 65f64556725b..6d256d59c5fd 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -554,7 +554,9 @@ static int __init camellia_aesni_init(void) { const char *feature_name; - if (!cpu_has_avx || !cpu_has_aes || !boot_cpu_has(X86_FEATURE_OSXSAVE)) { + if (!cpu_has_avx || + !boot_cpu_has(X86_FEATURE_AES) || + !boot_cpu_has(X86_FEATURE_OSXSAVE)) { pr_info("AVX or AES-NI instructions are not detected.\n"); return -ENODEV; } diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 810166530cbf..a6627b30bf45 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -123,7 +123,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC) #define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR) #define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM) -#define cpu_has_aes boot_cpu_has(X86_FEATURE_AES) #define cpu_has_avx boot_cpu_has(X86_FEATURE_AVX) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #define cpu_has_xsaves boot_cpu_has(X86_FEATURE_XSAVES) -- cgit v1.2.3 From da154e82af4d0c63e2334d5b3822426600b0490f Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 4 Apr 2016 22:24:56 +0200 Subject: x86/cpufeature: Replace cpu_has_avx with boot_cpu_has() usage Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-crypto@vger.kernel.org Link: http://lkml.kernel.org/r/1459801503-15600-4-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/crypto/aesni-intel_glue.c | 2 +- arch/x86/crypto/camellia_aesni_avx2_glue.c | 3 ++- arch/x86/crypto/camellia_aesni_avx_glue.c | 2 +- arch/x86/crypto/chacha20_glue.c | 3 ++- arch/x86/crypto/poly1305_glue.c | 3 ++- arch/x86/crypto/sha1_ssse3_glue.c | 2 +- arch/x86/crypto/sha256_ssse3_glue.c | 2 +- arch/x86/crypto/sha512_ssse3_glue.c | 2 +- arch/x86/include/asm/cpufeature.h | 1 - arch/x86/include/asm/xor_avx.h | 4 ++-- 10 files changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 064c7e2bd7c8..5b7fa1471007 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -1477,7 +1477,7 @@ static int __init aesni_init(void) } aesni_ctr_enc_tfm = aesni_ctr_enc; #ifdef CONFIG_AS_AVX - if (cpu_has_avx) { + if (boot_cpu_has(X86_FEATURE_AVX)) { /* optimize performance of ctr mode encryption transform */ aesni_ctr_enc_tfm = aesni_ctr_enc_avx_tfm; pr_info("AES CTR mode by8 optimization enabled\n"); diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index c07f699826a0..60907c139c4e 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -562,7 +562,8 @@ static int __init camellia_aesni_init(void) { const char *feature_name; - if (!boot_cpu_has(X86_FEATURE_AVX2) || !cpu_has_avx || + if (!boot_cpu_has(X86_FEATURE_AVX) || + !boot_cpu_has(X86_FEATURE_AVX2) || !boot_cpu_has(X86_FEATURE_AES) || !boot_cpu_has(X86_FEATURE_OSXSAVE)) { pr_info("AVX2 or AES-NI instructions are not detected.\n"); diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index 6d256d59c5fd..d96429da88eb 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -554,7 +554,7 @@ static int __init camellia_aesni_init(void) { const char *feature_name; - if (!cpu_has_avx || + if (!boot_cpu_has(X86_FEATURE_AVX) || !boot_cpu_has(X86_FEATURE_AES) || !boot_cpu_has(X86_FEATURE_OSXSAVE)) { pr_info("AVX or AES-NI instructions are not detected.\n"); diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c index cea061e137da..2d5c2e0bd939 100644 --- a/arch/x86/crypto/chacha20_glue.c +++ b/arch/x86/crypto/chacha20_glue.c @@ -129,7 +129,8 @@ static int __init chacha20_simd_mod_init(void) return -ENODEV; #ifdef CONFIG_AS_AVX2 - chacha20_use_avx2 = cpu_has_avx && boot_cpu_has(X86_FEATURE_AVX2) && + chacha20_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) && + boot_cpu_has(X86_FEATURE_AVX2) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); #endif return crypto_register_alg(&alg); diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c index ea21d2e440f7..e32142bc071d 100644 --- a/arch/x86/crypto/poly1305_glue.c +++ b/arch/x86/crypto/poly1305_glue.c @@ -183,7 +183,8 @@ static int __init poly1305_simd_mod_init(void) return -ENODEV; #ifdef CONFIG_AS_AVX2 - poly1305_use_avx2 = cpu_has_avx && boot_cpu_has(X86_FEATURE_AVX2) && + poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) && + boot_cpu_has(X86_FEATURE_AVX2) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); alg.descsize = sizeof(struct poly1305_simd_desc_ctx); if (poly1305_use_avx2) diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index dd14616b7739..1024e378a358 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -166,7 +166,7 @@ static struct shash_alg sha1_avx_alg = { static bool avx_usable(void) { if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { - if (cpu_has_avx) + if (boot_cpu_has(X86_FEATURE_AVX)) pr_info("AVX detected but unusable.\n"); return false; } diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index 5f4d6086dc59..3ae0f43ebd37 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -201,7 +201,7 @@ static struct shash_alg sha256_avx_algs[] = { { static bool avx_usable(void) { if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { - if (cpu_has_avx) + if (boot_cpu_has(X86_FEATURE_AVX)) pr_info("AVX detected but unusable.\n"); return false; } diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index 34e5083d6f36..0b17c83d027d 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -151,7 +151,7 @@ asmlinkage void sha512_transform_avx(u64 *digest, const char *data, static bool avx_usable(void) { if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { - if (cpu_has_avx) + if (boot_cpu_has(X86_FEATURE_AVX)) pr_info("AVX detected but unusable.\n"); return false; } diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index a6627b30bf45..3b232a120a5d 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -123,7 +123,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC) #define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR) #define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM) -#define cpu_has_avx boot_cpu_has(X86_FEATURE_AVX) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #define cpu_has_xsaves boot_cpu_has(X86_FEATURE_XSAVES) /* diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h index e45e556140af..22a7b1870a31 100644 --- a/arch/x86/include/asm/xor_avx.h +++ b/arch/x86/include/asm/xor_avx.h @@ -167,12 +167,12 @@ static struct xor_block_template xor_block_avx = { #define AVX_XOR_SPEED \ do { \ - if (cpu_has_avx && boot_cpu_has(X86_FEATURE_OSXSAVE)) \ + if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE)) \ xor_speed(&xor_block_avx); \ } while (0) #define AVX_SELECT(FASTEST) \ - (cpu_has_avx && boot_cpu_has(X86_FEATURE_OSXSAVE) ? &xor_block_avx : FASTEST) + (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE) ? &xor_block_avx : FASTEST) #else -- cgit v1.2.3 From dda9edf7c1fdc0d7a7ed7f46299a26282190fb6d Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 4 Apr 2016 22:24:57 +0200 Subject: x86/cpufeature: Replace cpu_has_xmm with boot_cpu_has() usage Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1459801503-15600-5-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeature.h | 1 - arch/x86/include/asm/xor_32.h | 2 +- arch/x86/kernel/fpu/core.c | 2 +- arch/x86/kernel/fpu/init.c | 2 +- 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 3b232a120a5d..6463258b4619 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -122,7 +122,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC) #define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC) #define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR) -#define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #define cpu_has_xsaves boot_cpu_has(X86_FEATURE_XSAVES) /* diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h index c54beb44c4c1..635eac543922 100644 --- a/arch/x86/include/asm/xor_32.h +++ b/arch/x86/include/asm/xor_32.h @@ -550,7 +550,7 @@ static struct xor_block_template xor_block_pIII_sse = { #define XOR_TRY_TEMPLATES \ do { \ AVX_XOR_SPEED; \ - if (cpu_has_xmm) { \ + if (boot_cpu_has(X86_FEATURE_XMM)) { \ xor_speed(&xor_block_pIII_sse); \ xor_speed(&xor_block_sse_pf64); \ } else if (boot_cpu_has(X86_FEATURE_MMX)) { \ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 8e37cc8a539a..b05aa68f88c0 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -526,7 +526,7 @@ static inline unsigned short get_fpu_swd(struct fpu *fpu) static inline unsigned short get_fpu_mxcsr(struct fpu *fpu) { - if (cpu_has_xmm) { + if (boot_cpu_has(X86_FEATURE_XMM)) { return fpu->state.fxsave.mxcsr; } else { return MXCSR_DEFAULT; diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 54c86fffbf9f..9bbb332a71ff 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -31,7 +31,7 @@ static void fpu__init_cpu_generic(void) if (cpu_has_fxsr) cr4_mask |= X86_CR4_OSFXSR; - if (cpu_has_xmm) + if (boot_cpu_has(X86_FEATURE_XMM)) cr4_mask |= X86_CR4_OSXMMEXCPT; if (cr4_mask) cr4_set_bits(cr4_mask); -- cgit v1.2.3 From a402a8dffc9f838b413c5ee0317d2d3184968f5b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 4 Apr 2016 22:24:58 +0200 Subject: x86/cpufeature: Replace cpu_has_fpu with boot_cpu_has() usage Use static_cpu_has() in the timing-sensitive paths in fpstate_init() and fpu__copy(). While at it, simplify the use in init_cyrix() and get rid of the ternary operator. Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1459801503-15600-6-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeature.h | 1 - arch/x86/kernel/cpu/cyrix.c | 2 +- arch/x86/kernel/fpu/bugs.c | 2 +- arch/x86/kernel/fpu/core.c | 4 ++-- arch/x86/kernel/fpu/init.c | 8 ++++---- 5 files changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 6463258b4619..b23d5570a5f4 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -118,7 +118,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; set_bit(bit, (unsigned long *)cpu_caps_set); \ } while (0) -#define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) #define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC) #define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC) #define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR) diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 6adef9cac23e..bd9dcd6b712d 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -333,7 +333,7 @@ static void init_cyrix(struct cpuinfo_x86 *c) switch (dir0_lsn) { case 0xd: /* either a 486SLC or DLC w/o DEVID */ dir0_msn = 0; - p = Cx486_name[(cpu_has_fpu ? 1 : 0)]; + p = Cx486_name[!!boot_cpu_has(X86_FEATURE_FPU)]; break; case 0xe: /* a 486S A step */ diff --git a/arch/x86/kernel/fpu/bugs.c b/arch/x86/kernel/fpu/bugs.c index dd9ca9b60ff3..224b5ec52195 100644 --- a/arch/x86/kernel/fpu/bugs.c +++ b/arch/x86/kernel/fpu/bugs.c @@ -66,6 +66,6 @@ void __init fpu__init_check_bugs(void) * kernel_fpu_begin/end() in check_fpu() relies on the patched * alternative instructions. */ - if (cpu_has_fpu) + if (boot_cpu_has(X86_FEATURE_FPU)) check_fpu(); } diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index b05aa68f88c0..0e7859f9aedc 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -217,7 +217,7 @@ static inline void fpstate_init_fstate(struct fregs_state *fp) void fpstate_init(union fpregs_state *state) { - if (!cpu_has_fpu) { + if (!static_cpu_has(X86_FEATURE_FPU)) { fpstate_init_soft(&state->soft); return; } @@ -237,7 +237,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) dst_fpu->fpregs_active = 0; dst_fpu->last_cpu = -1; - if (!src_fpu->fpstate_active || !cpu_has_fpu) + if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU)) return 0; WARN_ON_FPU(src_fpu != ¤t->thread.fpu); diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 9bbb332a71ff..3a84275f012e 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -38,13 +38,13 @@ static void fpu__init_cpu_generic(void) cr0 = read_cr0(); cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */ - if (!cpu_has_fpu) + if (!boot_cpu_has(X86_FEATURE_FPU)) cr0 |= X86_CR0_EM; write_cr0(cr0); /* Flush out any pending x87 state: */ #ifdef CONFIG_MATH_EMULATION - if (!cpu_has_fpu) + if (!boot_cpu_has(X86_FEATURE_FPU)) fpstate_init_soft(¤t->thread.fpu.state.soft); else #endif @@ -89,7 +89,7 @@ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) } #ifndef CONFIG_MATH_EMULATION - if (!cpu_has_fpu) { + if (!boot_cpu_has(X86_FEATURE_FPU)) { pr_emerg("x86/fpu: Giving up, no FPU found and no math emulation present\n"); for (;;) asm volatile("hlt"); @@ -212,7 +212,7 @@ static void __init fpu__init_system_xstate_size_legacy(void) * fpu__init_system_xstate(). */ - if (!cpu_has_fpu) { + if (!boot_cpu_has(X86_FEATURE_FPU)) { /* * Disable xsave as we do not support it if i387 * emulation is enabled. -- cgit v1.2.3 From 59e21e3d00e6bc23186763c3e0bf11baf8924124 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 4 Apr 2016 22:24:59 +0200 Subject: x86/cpufeature: Replace cpu_has_tsc with boot_cpu_has() usage Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dmitry Torokhov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Thomas Sailer Link: http://lkml.kernel.org/r/1459801503-15600-7-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeature.h | 1 - arch/x86/include/asm/tsc.h | 2 +- arch/x86/kernel/apic/apic.c | 10 +++++----- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/tsc.c | 12 ++++++------ drivers/input/joystick/analog.c | 6 +++--- drivers/net/hamradio/baycom_epp.c | 8 ++++---- 7 files changed, 20 insertions(+), 21 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index b23d5570a5f4..8f58cd215f6d 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -118,7 +118,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; set_bit(bit, (unsigned long *)cpu_caps_set); \ } while (0) -#define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC) #define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC) #define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 174c4212780a..7428697c5b8d 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -22,7 +22,7 @@ extern void disable_TSC(void); static inline cycles_t get_cycles(void) { #ifndef CONFIG_X86_TSC - if (!cpu_has_tsc) + if (!boot_cpu_has(X86_FEATURE_TSC)) return 0; #endif diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index d7867c885bf8..0b6509f1a4fe 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -607,7 +607,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev) long tapic = apic_read(APIC_TMCCT); unsigned long pm = acpi_pm_read_early(); - if (cpu_has_tsc) + if (boot_cpu_has(X86_FEATURE_TSC)) tsc = rdtsc(); switch (lapic_cal_loops++) { @@ -668,7 +668,7 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) *delta = (long)res; /* Correct the tsc counter value */ - if (cpu_has_tsc) { + if (boot_cpu_has(X86_FEATURE_TSC)) { res = (((u64)(*deltatsc)) * pm_100ms); do_div(res, deltapm); apic_printk(APIC_VERBOSE, "TSC delta adjusted to " @@ -760,7 +760,7 @@ static int __init calibrate_APIC_clock(void) apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", lapic_timer_frequency); - if (cpu_has_tsc) { + if (boot_cpu_has(X86_FEATURE_TSC)) { apic_printk(APIC_VERBOSE, "..... CPU clock speed is " "%ld.%04ld MHz.\n", (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ), @@ -1227,7 +1227,7 @@ void setup_local_APIC(void) unsigned long long tsc = 0, ntsc; long long max_loops = cpu_khz ? cpu_khz : 1000000; - if (cpu_has_tsc) + if (boot_cpu_has(X86_FEATURE_TSC)) tsc = rdtsc(); if (disable_apic) { @@ -1311,7 +1311,7 @@ void setup_local_APIC(void) break; } if (queued) { - if (cpu_has_tsc && cpu_khz) { + if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) { ntsc = rdtsc(); max_loops = (cpu_khz << 10) - (ntsc - tsc); } else diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 28d3255edf00..6bfa36de6d9f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1558,7 +1558,7 @@ void cpu_init(void) pr_info("Initializing CPU#%d\n", cpu); if (cpu_feature_enabled(X86_FEATURE_VME) || - cpu_has_tsc || + boot_cpu_has(X86_FEATURE_TSC) || boot_cpu_has(X86_FEATURE_DE)) cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index c9c4c7ce3eb2..a0346bc51833 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -36,7 +36,7 @@ static int __read_mostly tsc_unstable; /* native_sched_clock() is called before tsc_init(), so we must start with the TSC soft disabled to prevent - erroneous rdtsc usage on !cpu_has_tsc processors */ + erroneous rdtsc usage on !boot_cpu_has(X86_FEATURE_TSC) processors */ static int __read_mostly tsc_disabled = -1; static DEFINE_STATIC_KEY_FALSE(__use_tsc); @@ -834,7 +834,7 @@ int recalibrate_cpu_khz(void) #ifndef CONFIG_SMP unsigned long cpu_khz_old = cpu_khz; - if (cpu_has_tsc) { + if (boot_cpu_has(X86_FEATURE_TSC)) { tsc_khz = x86_platform.calibrate_tsc(); cpu_khz = tsc_khz; cpu_data(0).loops_per_jiffy = @@ -956,7 +956,7 @@ static struct notifier_block time_cpufreq_notifier_block = { static int __init cpufreq_tsc(void) { - if (!cpu_has_tsc) + if (!boot_cpu_has(X86_FEATURE_TSC)) return 0; if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) return 0; @@ -1081,7 +1081,7 @@ static void __init check_system_tsc_reliable(void) */ int unsynchronized_tsc(void) { - if (!cpu_has_tsc || tsc_unstable) + if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_unstable) return 1; #ifdef CONFIG_SMP @@ -1205,7 +1205,7 @@ out: static int __init init_tsc_clocksource(void) { - if (!cpu_has_tsc || tsc_disabled > 0 || !tsc_khz) + if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz) return 0; if (tsc_clocksource_reliable) @@ -1242,7 +1242,7 @@ void __init tsc_init(void) u64 lpj; int cpu; - if (!cpu_has_tsc) { + if (!boot_cpu_has(X86_FEATURE_TSC)) { setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); return; } diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c index 6f8b084e13d0..3d8ff09eba57 100644 --- a/drivers/input/joystick/analog.c +++ b/drivers/input/joystick/analog.c @@ -143,9 +143,9 @@ struct analog_port { #include -#define GET_TIME(x) do { if (cpu_has_tsc) x = (unsigned int)rdtsc(); else x = get_time_pit(); } while (0) -#define DELTA(x,y) (cpu_has_tsc ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? PIT_TICK_RATE / HZ : 0))) -#define TIME_NAME (cpu_has_tsc?"TSC":"PIT") +#define GET_TIME(x) do { if (boot_cpu_has(X86_FEATURE_TSC)) x = (unsigned int)rdtsc(); else x = get_time_pit(); } while (0) +#define DELTA(x,y) (boot_cpu_has(X86_FEATURE_TSC) ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? PIT_TICK_RATE / HZ : 0))) +#define TIME_NAME (boot_cpu_has(X86_FEATURE_TSC)?"TSC":"PIT") static unsigned int get_time_pit(void) { unsigned long flags; diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c index 72c9f1f352b4..7c7830722ea2 100644 --- a/drivers/net/hamradio/baycom_epp.c +++ b/drivers/net/hamradio/baycom_epp.c @@ -635,10 +635,10 @@ static int receive(struct net_device *dev, int cnt) #ifdef __i386__ #include -#define GETTICK(x) \ -({ \ - if (cpu_has_tsc) \ - x = (unsigned int)rdtsc(); \ +#define GETTICK(x) \ +({ \ + if (boot_cpu_has(X86_FEATURE_TSC)) \ + x = (unsigned int)rdtsc(); \ }) #else /* __i386__ */ #define GETTICK(x) -- cgit v1.2.3 From 93984fbd4e33cc861d5b49caed02a02cbfb01340 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 4 Apr 2016 22:25:00 +0200 Subject: x86/cpufeature: Replace cpu_has_apic with boot_cpu_has() usage Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: iommu@lists.linux-foundation.org Cc: linux-pm@vger.kernel.org Cc: oprofile-list@lists.sf.net Link: http://lkml.kernel.org/r/1459801503-15600-8-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 2 +- arch/x86/include/asm/cpufeature.h | 1 - arch/x86/include/asm/irq_work.h | 2 +- arch/x86/kernel/acpi/boot.c | 8 ++++---- arch/x86/kernel/apic/apic.c | 20 ++++++++++---------- arch/x86/kernel/apic/apic_noop.c | 4 ++-- arch/x86/kernel/apic/io_apic.c | 2 +- arch/x86/kernel/apic/ipi.c | 2 +- arch/x86/kernel/apic/vector.c | 2 +- arch/x86/kernel/cpu/amd.c | 4 ++-- arch/x86/kernel/cpu/intel.c | 2 +- arch/x86/kernel/cpu/mcheck/mce_intel.c | 2 +- arch/x86/kernel/cpu/mcheck/therm_throt.c | 2 +- arch/x86/kernel/devicetree.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- arch/x86/oprofile/nmi_int.c | 2 +- arch/x86/pci/xen.c | 2 +- drivers/cpufreq/longhaul.c | 2 +- drivers/iommu/irq_remapping.c | 2 +- 19 files changed, 32 insertions(+), 33 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 041e442a3e28..54c17455600e 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1518,7 +1518,7 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) static void __init pmu_check_apic(void) { - if (cpu_has_apic) + if (boot_cpu_has(X86_FEATURE_APIC)) return; x86_pmu.apic = 0; diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 8f58cd215f6d..c532961c7439 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -118,7 +118,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; set_bit(bit, (unsigned long *)cpu_caps_set); \ } while (0) -#define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC) #define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #define cpu_has_xsaves boot_cpu_has(X86_FEATURE_XSAVES) diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h index d0afb05c84fc..f70604125286 100644 --- a/arch/x86/include/asm/irq_work.h +++ b/arch/x86/include/asm/irq_work.h @@ -5,7 +5,7 @@ static inline bool arch_irq_work_has_interrupt(void) { - return cpu_has_apic; + return boot_cpu_has(X86_FEATURE_APIC); } #endif /* _ASM_IRQ_WORK_H */ diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 8c2f1ef6ca23..2522e564269e 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -136,7 +136,7 @@ static int __init acpi_parse_madt(struct acpi_table_header *table) { struct acpi_table_madt *madt = NULL; - if (!cpu_has_apic) + if (!boot_cpu_has(X86_FEATURE_APIC)) return -EINVAL; madt = (struct acpi_table_madt *)table; @@ -951,7 +951,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void) { int count; - if (!cpu_has_apic) + if (!boot_cpu_has(X86_FEATURE_APIC)) return -ENODEV; /* @@ -979,7 +979,7 @@ static int __init acpi_parse_madt_lapic_entries(void) int ret; struct acpi_subtable_proc madt_proc[2]; - if (!cpu_has_apic) + if (!boot_cpu_has(X86_FEATURE_APIC)) return -ENODEV; /* @@ -1125,7 +1125,7 @@ static int __init acpi_parse_madt_ioapic_entries(void) if (acpi_disabled || acpi_noirq) return -ENODEV; - if (!cpu_has_apic) + if (!boot_cpu_has(X86_FEATURE_APIC)) return -ENODEV; /* diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 0b6509f1a4fe..60078a67d7e3 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1085,7 +1085,7 @@ void lapic_shutdown(void) { unsigned long flags; - if (!cpu_has_apic && !apic_from_smp_config()) + if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config()) return; local_irq_save(flags); @@ -1134,7 +1134,7 @@ void __init init_bsp_APIC(void) * Don't do the setup now if we have a SMP BIOS as the * through-I/O-APIC virtual wire mode might be active. */ - if (smp_found_config || !cpu_has_apic) + if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC)) return; /* @@ -1445,7 +1445,7 @@ static void __x2apic_disable(void) { u64 msr; - if (!cpu_has_apic) + if (!boot_cpu_has(X86_FEATURE_APIC)) return; rdmsrl(MSR_IA32_APICBASE, msr); @@ -1632,7 +1632,7 @@ void __init enable_IR_x2apic(void) */ static int __init detect_init_APIC(void) { - if (!cpu_has_apic) { + if (!boot_cpu_has(X86_FEATURE_APIC)) { pr_info("No local APIC present\n"); return -1; } @@ -1711,14 +1711,14 @@ static int __init detect_init_APIC(void) goto no_apic; case X86_VENDOR_INTEL: if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 || - (boot_cpu_data.x86 == 5 && cpu_has_apic)) + (boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC))) break; goto no_apic; default: goto no_apic; } - if (!cpu_has_apic) { + if (!boot_cpu_has(X86_FEATURE_APIC)) { /* * Over-ride BIOS and try to enable the local APIC only if * "lapic" specified. @@ -2233,19 +2233,19 @@ int __init APIC_init_uniprocessor(void) return -1; } #ifdef CONFIG_X86_64 - if (!cpu_has_apic) { + if (!boot_cpu_has(X86_FEATURE_APIC)) { disable_apic = 1; pr_info("Apic disabled by BIOS\n"); return -1; } #else - if (!smp_found_config && !cpu_has_apic) + if (!smp_found_config && !boot_cpu_has(X86_FEATURE_APIC)) return -1; /* * Complain if the BIOS pretends there is one. */ - if (!cpu_has_apic && + if (!boot_cpu_has(X86_FEATURE_APIC) && APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { pr_err("BIOS bug, local APIC 0x%x not detected!...\n", boot_cpu_physical_apicid); @@ -2426,7 +2426,7 @@ static void apic_pm_activate(void) static int __init init_lapic_sysfs(void) { /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ - if (cpu_has_apic) + if (boot_cpu_has(X86_FEATURE_APIC)) register_syscore_ops(&lapic_syscore_ops); return 0; diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 331a7a07c48f..13d19ed58514 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -100,13 +100,13 @@ static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask, static u32 noop_apic_read(u32 reg) { - WARN_ON_ONCE((cpu_has_apic && !disable_apic)); + WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic); return 0; } static void noop_apic_write(u32 reg, u32 v) { - WARN_ON_ONCE(cpu_has_apic && !disable_apic); + WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic); } struct apic apic_noop = { diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index fdb0fbfb1197..84e33ff5a6d5 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1454,7 +1454,7 @@ void native_disable_io_apic(void) ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); } - if (cpu_has_apic || apic_from_smp_config()) + if (boot_cpu_has(X86_FEATURE_APIC) || apic_from_smp_config()) disconnect_bsp_APIC(ioapic_i8259.pin != -1); } diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index 28bde88b0085..2a0f225afebd 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -230,7 +230,7 @@ int safe_smp_processor_id(void) { int apicid, cpuid; - if (!cpu_has_apic) + if (!boot_cpu_has(X86_FEATURE_APIC)) return 0; apicid = hard_smp_processor_id(); diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index ad59d70bcb1a..26d3ccc63e40 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -943,7 +943,7 @@ static int __init print_ICs(void) print_PIC(); /* don't print out if apic is not there */ - if (!cpu_has_apic && !apic_from_smp_config()) + if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config()) return 0; print_local_APICs(show_lapic); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 19d7dcfc8b3e..54f7b44dcf01 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -565,9 +565,9 @@ static void early_init_amd(struct cpuinfo_x86 *c) * can safely set X86_FEATURE_EXTD_APICID unconditionally for families * after 16h. */ - if (cpu_has_apic && c->x86 > 0x16) { + if (boot_cpu_has(X86_FEATURE_APIC) && c->x86 > 0x16) { set_cpu_cap(c, X86_FEATURE_EXTD_APICID); - } else if (cpu_has_apic && c->x86 >= 0xf) { + } else if (boot_cpu_has(X86_FEATURE_APIC) && c->x86 >= 0xf) { /* check CPU config space for extended APIC ID */ unsigned int val; val = read_pci_config(0, 24, 0, 0x68); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index f71a34944b56..1d5582259b20 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -281,7 +281,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * integrated APIC (see 11AP erratum in "Pentium Processor * Specification Update"). */ - if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && + if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 && (c->x86_mask < 0x6 || c->x86_mask == 0xb)) set_cpu_bug(c, X86_BUG_11AP); diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 1e8bb6c94f14..1defb8ea882c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -84,7 +84,7 @@ static int cmci_supported(int *banks) */ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return 0; - if (!cpu_has_apic || lapic_get_maxlvt() < 6) + if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6) return 0; rdmsrl(MSR_IA32_MCG_CAP, cap); *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 0b445c2ff735..615793321c49 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -447,7 +447,7 @@ asmlinkage __visible void smp_trace_thermal_interrupt(struct pt_regs *regs) /* Thermal monitoring depends on APIC, ACPI and clock modulation */ static int intel_thermal_supported(struct cpuinfo_x86 *c) { - if (!cpu_has_apic) + if (!boot_cpu_has(X86_FEATURE_APIC)) return 0; if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) return 0; diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 1f4acd68b98b..3fe45f84ced4 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -151,7 +151,7 @@ static void __init dtb_lapic_setup(void) return; /* Did the boot loader setup the local APIC ? */ - if (!cpu_has_apic) { + if (!boot_cpu_has(X86_FEATURE_APIC)) { if (apic_force_enable(r.start)) return; } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a2065d3b3b39..1fe4130b14d9 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1231,7 +1231,7 @@ static int __init smp_sanity_check(unsigned max_cpus) * If we couldn't find a local APIC, then get out of here now! */ if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && - !cpu_has_apic) { + !boot_cpu_has(X86_FEATURE_APIC)) { if (!disable_apic) { pr_err("BIOS bug, local APIC #%d not detected!...\n", boot_cpu_physical_apicid); diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 25171e9595f7..28c04123b6dd 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -700,7 +700,7 @@ int __init op_nmi_init(struct oprofile_operations *ops) char *cpu_type = NULL; int ret = 0; - if (!cpu_has_apic) + if (!boot_cpu_has(X86_FEATURE_APIC)) return -ENODEV; if (force_cpu_type == timer) diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index beac4dfdade6..4bd08b0fc8ea 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -445,7 +445,7 @@ void __init xen_msi_init(void) uint32_t eax = cpuid_eax(xen_cpuid_base() + 4); if (((eax & XEN_HVM_CPUID_X2APIC_VIRT) && x2apic_mode) || - ((eax & XEN_HVM_CPUID_APIC_ACCESS_VIRT) && cpu_has_apic)) + ((eax & XEN_HVM_CPUID_APIC_ACCESS_VIRT) && boot_cpu_has(X86_FEATURE_APIC))) return; } diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c index 0f6b229afcb9..247bfa8eaddb 100644 --- a/drivers/cpufreq/longhaul.c +++ b/drivers/cpufreq/longhaul.c @@ -945,7 +945,7 @@ static int __init longhaul_init(void) } #endif #ifdef CONFIG_X86_IO_APIC - if (cpu_has_apic) { + if (boot_cpu_has(X86_FEATURE_APIC)) { printk(KERN_ERR PFX "APIC detected. Longhaul is currently " "broken in this configuration.\n"); return -ENODEV; diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 8adaaeae3268..49721b4e1975 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -36,7 +36,7 @@ static void irq_remapping_disable_io_apic(void) * As this gets called during crash dump, keep this simple for * now. */ - if (cpu_has_apic || apic_from_smp_config()) + if (boot_cpu_has(X86_FEATURE_APIC) || apic_from_smp_config()) disconnect_bsp_APIC(0); } -- cgit v1.2.3 From 01f8fd7379149fb9a4046e76617958bf771f856f Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 4 Apr 2016 22:25:01 +0200 Subject: x86/cpufeature: Replace cpu_has_fxsr with boot_cpu_has() usage Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1459801503-15600-9-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeature.h | 1 - arch/x86/kernel/fpu/core.c | 6 +++--- arch/x86/kernel/fpu/init.c | 6 +++--- arch/x86/kernel/fpu/regset.c | 13 ++++++++----- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index c532961c7439..526381a14546 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -118,7 +118,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; set_bit(bit, (unsigned long *)cpu_caps_set); \ } while (0) -#define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #define cpu_has_xsaves boot_cpu_has(X86_FEATURE_XSAVES) /* diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 0e7859f9aedc..1551b28398a4 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -224,7 +224,7 @@ void fpstate_init(union fpregs_state *state) memset(state, 0, xstate_size); - if (cpu_has_fxsr) + if (static_cpu_has(X86_FEATURE_FXSR)) fpstate_init_fxstate(&state->fxsave); else fpstate_init_fstate(&state->fsave); @@ -508,7 +508,7 @@ void fpu__clear(struct fpu *fpu) static inline unsigned short get_fpu_cwd(struct fpu *fpu) { - if (cpu_has_fxsr) { + if (boot_cpu_has(X86_FEATURE_FXSR)) { return fpu->state.fxsave.cwd; } else { return (unsigned short)fpu->state.fsave.cwd; @@ -517,7 +517,7 @@ static inline unsigned short get_fpu_cwd(struct fpu *fpu) static inline unsigned short get_fpu_swd(struct fpu *fpu) { - if (cpu_has_fxsr) { + if (boot_cpu_has(X86_FEATURE_FXSR)) { return fpu->state.fxsave.swd; } else { return (unsigned short)fpu->state.fsave.swd; diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 3a84275f012e..aacfd7a82cec 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -29,7 +29,7 @@ static void fpu__init_cpu_generic(void) unsigned long cr0; unsigned long cr4_mask = 0; - if (cpu_has_fxsr) + if (boot_cpu_has(X86_FEATURE_FXSR)) cr4_mask |= X86_CR4_OSFXSR; if (boot_cpu_has(X86_FEATURE_XMM)) cr4_mask |= X86_CR4_OSXMMEXCPT; @@ -106,7 +106,7 @@ static void __init fpu__init_system_mxcsr(void) { unsigned int mask = 0; - if (cpu_has_fxsr) { + if (boot_cpu_has(X86_FEATURE_FXSR)) { /* Static because GCC does not get 16-byte stack alignment right: */ static struct fxregs_state fxregs __initdata; @@ -221,7 +221,7 @@ static void __init fpu__init_system_xstate_size_legacy(void) setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); xstate_size = sizeof(struct swregs_state); } else { - if (cpu_has_fxsr) + if (boot_cpu_has(X86_FEATURE_FXSR)) xstate_size = sizeof(struct fxregs_state); else xstate_size = sizeof(struct fregs_state); diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 8bd1c003942a..4cff7af735c5 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -21,7 +21,10 @@ int regset_xregset_fpregs_active(struct task_struct *target, const struct user_r { struct fpu *target_fpu = &target->thread.fpu; - return (cpu_has_fxsr && target_fpu->fpstate_active) ? regset->n : 0; + if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->fpstate_active) + return regset->n; + else + return 0; } int xfpregs_get(struct task_struct *target, const struct user_regset *regset, @@ -30,7 +33,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, { struct fpu *fpu = &target->thread.fpu; - if (!cpu_has_fxsr) + if (!boot_cpu_has(X86_FEATURE_FXSR)) return -ENODEV; fpu__activate_fpstate_read(fpu); @@ -47,7 +50,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, struct fpu *fpu = &target->thread.fpu; int ret; - if (!cpu_has_fxsr) + if (!boot_cpu_has(X86_FEATURE_FXSR)) return -ENODEV; fpu__activate_fpstate_write(fpu); @@ -278,7 +281,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, if (!static_cpu_has(X86_FEATURE_FPU)) return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); - if (!cpu_has_fxsr) + if (!boot_cpu_has(X86_FEATURE_FXSR)) return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &fpu->state.fsave, 0, -1); @@ -309,7 +312,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, if (!static_cpu_has(X86_FEATURE_FPU)) return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); - if (!cpu_has_fxsr) + if (!boot_cpu_has(X86_FEATURE_FXSR)) return user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fpu->state.fsave, 0, -1); -- cgit v1.2.3 From d366bf7eb99d0644e47ecd52c184d7ad95df02f2 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 4 Apr 2016 22:25:02 +0200 Subject: x86/cpufeature: Replace cpu_has_xsave with boot_cpu_has() usage Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kvm@vger.kernel.org Link: http://lkml.kernel.org/r/1459801503-15600-10-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/ia32/ia32_signal.c | 2 +- arch/x86/include/asm/cpufeature.h | 1 - arch/x86/kernel/fpu/regset.c | 8 ++++---- arch/x86/kernel/fpu/xstate.c | 8 ++++---- arch/x86/kernel/signal.c | 4 ++-- arch/x86/kvm/cpuid.c | 2 +- arch/x86/kvm/x86.c | 12 ++++++------ 7 files changed, 18 insertions(+), 19 deletions(-) diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 0552884da18d..2f29f4e407c3 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -357,7 +357,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig, put_user_ex(ptr_to_compat(&frame->uc), &frame->puc); /* Create the ucontext. */ - if (cpu_has_xsave) + if (boot_cpu_has(X86_FEATURE_XSAVE)) put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); else put_user_ex(0, &frame->uc.uc_flags); diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 526381a14546..732a00f12ac0 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -118,7 +118,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; set_bit(bit, (unsigned long *)cpu_caps_set); \ } while (0) -#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #define cpu_has_xsaves boot_cpu_has(X86_FEATURE_XSAVES) /* * Do not add any more of those clumsy macros - use static_cpu_has() for diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 4cff7af735c5..bc5e76c1d7c5 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -68,7 +68,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, * update the header bits in the xsave header, indicating the * presence of FP and SSE state. */ - if (cpu_has_xsave) + if (boot_cpu_has(X86_FEATURE_XSAVE)) fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE; return ret; @@ -82,7 +82,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, struct xregs_state *xsave; int ret; - if (!cpu_has_xsave) + if (!boot_cpu_has(X86_FEATURE_XSAVE)) return -ENODEV; fpu__activate_fpstate_read(fpu); @@ -111,7 +111,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, struct xregs_state *xsave; int ret; - if (!cpu_has_xsave) + if (!boot_cpu_has(X86_FEATURE_XSAVE)) return -ENODEV; fpu__activate_fpstate_write(fpu); @@ -328,7 +328,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, * update the header bit in the xsave header, indicating the * presence of FP. */ - if (cpu_has_xsave) + if (boot_cpu_has(X86_FEATURE_XSAVE)) fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FP; return ret; } diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index b48ef35b28d4..18b9fd809fe7 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -190,7 +190,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu) */ void fpu__init_cpu_xstate(void) { - if (!cpu_has_xsave || !xfeatures_mask) + if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask) return; cr4_set_bits(X86_CR4_OSXSAVE); @@ -316,7 +316,7 @@ static void __init setup_init_fpu_buf(void) WARN_ON_FPU(!on_boot_cpu); on_boot_cpu = 0; - if (!cpu_has_xsave) + if (!boot_cpu_has(X86_FEATURE_XSAVE)) return; setup_xstate_features(); @@ -630,7 +630,7 @@ void __init fpu__init_system_xstate(void) WARN_ON_FPU(!on_boot_cpu); on_boot_cpu = 0; - if (!cpu_has_xsave) { + if (!boot_cpu_has(X86_FEATURE_XSAVE)) { pr_info("x86/fpu: Legacy x87 FPU detected.\n"); return; } @@ -678,7 +678,7 @@ void fpu__resume_cpu(void) /* * Restore XCR0 on xsave capable CPUs: */ - if (cpu_has_xsave) + if (boot_cpu_has(X86_FEATURE_XSAVE)) xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask); } diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 548ddf7d6fd2..6408c09bbcd4 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -391,7 +391,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, put_user_ex(&frame->uc, &frame->puc); /* Create the ucontext. */ - if (cpu_has_xsave) + if (boot_cpu_has(X86_FEATURE_XSAVE)) put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); else put_user_ex(0, &frame->uc.uc_flags); @@ -442,7 +442,7 @@ static unsigned long frame_uc_flags(struct pt_regs *regs) { unsigned long flags; - if (cpu_has_xsave) + if (boot_cpu_has(X86_FEATURE_XSAVE)) flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS; else flags = UC_SIGCONTEXT_SS; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 8efb839948e5..a056b72c2f33 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -75,7 +75,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) return 0; /* Update OSXSAVE bit */ - if (cpu_has_xsave && best->function == 0x1) { + if (boot_cpu_has(X86_FEATURE_XSAVE) && best->function == 0x1) { best->ecx &= ~F(OSXSAVE); if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) best->ecx |= F(OSXSAVE); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 742d0f7d3556..4eb2fca335c9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2612,7 +2612,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = KVM_MAX_MCE_BANKS; break; case KVM_CAP_XCRS: - r = cpu_has_xsave; + r = boot_cpu_has(X86_FEATURE_XSAVE); break; case KVM_CAP_TSC_CONTROL: r = kvm_has_tsc_control; @@ -3122,7 +3122,7 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, struct kvm_xsave *guest_xsave) { - if (cpu_has_xsave) { + if (boot_cpu_has(X86_FEATURE_XSAVE)) { memset(guest_xsave, 0, sizeof(struct kvm_xsave)); fill_xsave((u8 *) guest_xsave->region, vcpu); } else { @@ -3140,7 +3140,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, u64 xstate_bv = *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)]; - if (cpu_has_xsave) { + if (boot_cpu_has(X86_FEATURE_XSAVE)) { /* * Here we allow setting states that are not present in * CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility @@ -3161,7 +3161,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu, struct kvm_xcrs *guest_xcrs) { - if (!cpu_has_xsave) { + if (!boot_cpu_has(X86_FEATURE_XSAVE)) { guest_xcrs->nr_xcrs = 0; return; } @@ -3177,7 +3177,7 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu, { int i, r = 0; - if (!cpu_has_xsave) + if (!boot_cpu_has(X86_FEATURE_XSAVE)) return -EINVAL; if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags) @@ -5866,7 +5866,7 @@ int kvm_arch_init(void *opaque) perf_register_guest_info_callbacks(&kvm_guest_cbs); - if (cpu_has_xsave) + if (boot_cpu_has(X86_FEATURE_XSAVE)) host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); kvm_lapic_init(); -- cgit v1.2.3 From 782511b00f749cfebc0cb5d6ce960de5410c221d Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 4 Apr 2016 22:25:03 +0200 Subject: x86/cpufeature: Replace cpu_has_xsaves with boot_cpu_has() usage Signed-off-by: Borislav Petkov Cc: Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1459801503-15600-11-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeature.h | 6 ------ arch/x86/kernel/fpu/xstate.c | 10 +++++----- arch/x86/kvm/vmx.c | 2 +- arch/x86/kvm/x86.c | 4 ++-- 4 files changed, 8 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 732a00f12ac0..07c942d84662 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -118,12 +118,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; set_bit(bit, (unsigned long *)cpu_caps_set); \ } while (0) -#define cpu_has_xsaves boot_cpu_has(X86_FEATURE_XSAVES) -/* - * Do not add any more of those clumsy macros - use static_cpu_has() for - * fast paths and boot_cpu_has() otherwise! - */ - #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS) /* * Static testing of CPU features. Used the same as boot_cpu_has(). diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 18b9fd809fe7..4ea2a59483c7 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -280,7 +280,7 @@ static void __init setup_xstate_comp(void) xstate_comp_offsets[0] = 0; xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space); - if (!cpu_has_xsaves) { + if (!boot_cpu_has(X86_FEATURE_XSAVES)) { for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { if (xfeature_enabled(i)) { xstate_comp_offsets[i] = xstate_offsets[i]; @@ -322,7 +322,7 @@ static void __init setup_init_fpu_buf(void) setup_xstate_features(); print_xstate_features(); - if (cpu_has_xsaves) { + if (boot_cpu_has(X86_FEATURE_XSAVES)) { init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask; init_fpstate.xsave.header.xfeatures = xfeatures_mask; } @@ -417,7 +417,7 @@ static int xfeature_size(int xfeature_nr) */ static int using_compacted_format(void) { - return cpu_has_xsaves; + return boot_cpu_has(X86_FEATURE_XSAVES); } static void __xstate_dump_leaves(void) @@ -549,7 +549,7 @@ static unsigned int __init calculate_xstate_size(void) unsigned int eax, ebx, ecx, edx; unsigned int calculated_xstate_size; - if (!cpu_has_xsaves) { + if (!boot_cpu_has(X86_FEATURE_XSAVES)) { /* * - CPUID function 0DH, sub-function 0: * EBX enumerates the size (in bytes) required by @@ -667,7 +667,7 @@ void __init fpu__init_system_xstate(void) pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", xfeatures_mask, xstate_size, - cpu_has_xsaves ? "compacted" : "standard"); + boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard"); } /* diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ee1c8a93871c..d5908bde9342 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3386,7 +3386,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) } } - if (cpu_has_xsaves) + if (boot_cpu_has(X86_FEATURE_XSAVES)) rdmsrl(MSR_IA32_XSS, host_xss); return 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4eb2fca335c9..33102ded1398 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3095,7 +3095,7 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) /* Set XSTATE_BV and possibly XCOMP_BV. */ xsave->header.xfeatures = xstate_bv; - if (cpu_has_xsaves) + if (boot_cpu_has(X86_FEATURE_XSAVES)) xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED; /* @@ -7292,7 +7292,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) static void fx_init(struct kvm_vcpu *vcpu) { fpstate_init(&vcpu->arch.guest_fpu.state); - if (cpu_has_xsaves) + if (boot_cpu_has(X86_FEATURE_XSAVES)) vcpu->arch.guest_fpu.state.xsave.header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED; -- cgit v1.2.3 From 78df526c74a4db696e1e058b9869471937d0773b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 5 Apr 2016 08:29:50 +0200 Subject: x86/fpu/regset: Replace static_cpu_has() usage with boot_cpu_has() fpregs_{g,s}et() are not sizzling-hot paths to justify the need for static_cpu_has(). Use the normal boot_cpu_has() helper. Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1459837795-2588-2-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/regset.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index bc5e76c1d7c5..81422dfb152b 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -278,7 +278,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, fpu__activate_fpstate_read(fpu); - if (!static_cpu_has(X86_FEATURE_FPU)) + if (!boot_cpu_has(X86_FEATURE_FPU)) return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); if (!boot_cpu_has(X86_FEATURE_FXSR)) @@ -309,7 +309,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, fpu__activate_fpstate_write(fpu); fpstate_sanitize_xstate(fpu); - if (!static_cpu_has(X86_FEATURE_FPU)) + if (!boot_cpu_has(X86_FEATURE_FPU)) return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); if (!boot_cpu_has(X86_FEATURE_FXSR)) -- cgit v1.2.3 From 425d8c2fc5e6dddbad083502bb77c7beae545620 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 5 Apr 2016 08:29:51 +0200 Subject: x86/cpu: Simplify extended APIC ID detection on AMD Both if-branches are under if (boot_cpu_has(X86_FEATURE_APIC)), unify them. Also, simplify the test for bits: - 17 ("ApicExtBrdCst: APIC extended broadcast enable") and - 18 ("ApicExtId: APIC extended ID enable.") in "D18F0x68 Link Transaction Control." No functionality change. Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1459837795-2588-3-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 54f7b44dcf01..c343a54bed39 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -565,14 +565,17 @@ static void early_init_amd(struct cpuinfo_x86 *c) * can safely set X86_FEATURE_EXTD_APICID unconditionally for families * after 16h. */ - if (boot_cpu_has(X86_FEATURE_APIC) && c->x86 > 0x16) { - set_cpu_cap(c, X86_FEATURE_EXTD_APICID); - } else if (boot_cpu_has(X86_FEATURE_APIC) && c->x86 >= 0xf) { - /* check CPU config space for extended APIC ID */ - unsigned int val; - val = read_pci_config(0, 24, 0, 0x68); - if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18))) + if (boot_cpu_has(X86_FEATURE_APIC)) { + if (c->x86 > 0x16) set_cpu_cap(c, X86_FEATURE_EXTD_APICID); + else if (c->x86 >= 0xf) { + /* check CPU config space for extended APIC ID */ + unsigned int val; + + val = read_pci_config(0, 24, 0, 0x68); + if ((val >> 17 & 0x3) == 0x3) + set_cpu_cap(c, X86_FEATURE_EXTD_APICID); + } } #endif -- cgit v1.2.3 From a841cca74ea7612508aee161c89987b2646ed769 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 5 Apr 2016 08:29:52 +0200 Subject: x86/tsc: Do not check X86_FEATURE_CONSTANT_TSC in notifier call ... because the notifier-registering routine already does that. Also, rename cpufreq_tsc() init call to something more telling. Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1459837795-2588-4-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index a0346bc51833..5bb702c77e8f 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -922,9 +922,6 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, struct cpufreq_freqs *freq = data; unsigned long *lpj; - if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) - return 0; - lpj = &boot_cpu_data.loops_per_jiffy; #ifdef CONFIG_SMP if (!(freq->flags & CPUFREQ_CONST_LOOPS)) @@ -954,7 +951,7 @@ static struct notifier_block time_cpufreq_notifier_block = { .notifier_call = time_cpufreq_notifier }; -static int __init cpufreq_tsc(void) +static int __init cpufreq_register_tsc_scaling(void) { if (!boot_cpu_has(X86_FEATURE_TSC)) return 0; @@ -965,7 +962,7 @@ static int __init cpufreq_tsc(void) return 0; } -core_initcall(cpufreq_tsc); +core_initcall(cpufreq_register_tsc_scaling); #endif /* CONFIG_CPU_FREQ */ -- cgit v1.2.3 From eff4677e9fb9b680d1d5f6ba079116548d072b7e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 5 Apr 2016 08:29:53 +0200 Subject: x86/tsc: Save an indentation level in recalibrate_cpu_khz() ... by flipping the check. Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1459837795-2588-5-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 5bb702c77e8f..38ba6de56ede 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -834,15 +834,15 @@ int recalibrate_cpu_khz(void) #ifndef CONFIG_SMP unsigned long cpu_khz_old = cpu_khz; - if (boot_cpu_has(X86_FEATURE_TSC)) { - tsc_khz = x86_platform.calibrate_tsc(); - cpu_khz = tsc_khz; - cpu_data(0).loops_per_jiffy = - cpufreq_scale(cpu_data(0).loops_per_jiffy, - cpu_khz_old, cpu_khz); - return 0; - } else + if (!boot_cpu_has(X86_FEATURE_TSC)) return -ENODEV; + + tsc_khz = x86_platform.calibrate_tsc(); + cpu_khz = tsc_khz; + cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy, + cpu_khz_old, cpu_khz); + + return 0; #else return -ENODEV; #endif -- cgit v1.2.3 From de82fbc3823b7b15ee03466ebfb1c5ec7cc1a941 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 5 Apr 2016 08:29:54 +0200 Subject: x86/fpu: Remove check_fpu() indirection Rename it to fpu__init_check_bugs() and do the CPU feature check at entry, thus getting rid of the old fpu__init_check_bugs() wrapper. Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1459837795-2588-6-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/bugs.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/fpu/bugs.c b/arch/x86/kernel/fpu/bugs.c index 224b5ec52195..aad34aafc0e0 100644 --- a/arch/x86/kernel/fpu/bugs.c +++ b/arch/x86/kernel/fpu/bugs.c @@ -21,11 +21,15 @@ static double __initdata y = 3145727.0; * We should really only care about bugs here * anyway. Not features. */ -static void __init check_fpu(void) +void __init fpu__init_check_bugs(void) { u32 cr0_saved; s32 fdiv_bug; + /* kernel_fpu_begin/end() relies on patched alternative instructions. */ + if (!boot_cpu_has(X86_FEATURE_FPU)) + return; + /* We might have CR0::TS set already, clear it: */ cr0_saved = read_cr0(); write_cr0(cr0_saved & ~X86_CR0_TS); @@ -59,13 +63,3 @@ static void __init check_fpu(void) pr_warn("Hmm, FPU with FDIV bug\n"); } } - -void __init fpu__init_check_bugs(void) -{ - /* - * kernel_fpu_begin/end() in check_fpu() relies on the patched - * alternative instructions. - */ - if (boot_cpu_has(X86_FEATURE_FPU)) - check_fpu(); -} -- cgit v1.2.3 From 6aa6dbfced51dec6cde159c6167ad3dad6add823 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 5 Apr 2016 08:29:55 +0200 Subject: x86/fpu: Get rid of x87 math exception helpers ... and integrate their functionality into their single user fpu__exception_code(). No functionality change. Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1459837795-2588-7-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/core.c | 44 +++++++++++++------------------------------- 1 file changed, 13 insertions(+), 31 deletions(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 1551b28398a4..97027545a72d 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -506,33 +506,6 @@ void fpu__clear(struct fpu *fpu) * x87 math exception handling: */ -static inline unsigned short get_fpu_cwd(struct fpu *fpu) -{ - if (boot_cpu_has(X86_FEATURE_FXSR)) { - return fpu->state.fxsave.cwd; - } else { - return (unsigned short)fpu->state.fsave.cwd; - } -} - -static inline unsigned short get_fpu_swd(struct fpu *fpu) -{ - if (boot_cpu_has(X86_FEATURE_FXSR)) { - return fpu->state.fxsave.swd; - } else { - return (unsigned short)fpu->state.fsave.swd; - } -} - -static inline unsigned short get_fpu_mxcsr(struct fpu *fpu) -{ - if (boot_cpu_has(X86_FEATURE_XMM)) { - return fpu->state.fxsave.mxcsr; - } else { - return MXCSR_DEFAULT; - } -} - int fpu__exception_code(struct fpu *fpu, int trap_nr) { int err; @@ -547,10 +520,15 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr) * so if this combination doesn't produce any single exception, * then we have a bad program that isn't synchronizing its FPU usage * and it will suffer the consequences since we won't be able to - * fully reproduce the context of the exception + * fully reproduce the context of the exception. */ - cwd = get_fpu_cwd(fpu); - swd = get_fpu_swd(fpu); + if (boot_cpu_has(X86_FEATURE_FXSR)) { + cwd = fpu->state.fxsave.cwd; + swd = fpu->state.fxsave.swd; + } else { + cwd = (unsigned short)fpu->state.fsave.cwd; + swd = (unsigned short)fpu->state.fsave.swd; + } err = swd & ~cwd; } else { @@ -560,7 +538,11 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr) * unmasked exception was caught we must mask the exception mask bits * at 0x1f80, and then use these to mask the exception bits at 0x3f. */ - unsigned short mxcsr = get_fpu_mxcsr(fpu); + unsigned short mxcsr = MXCSR_DEFAULT; + + if (boot_cpu_has(X86_FEATURE_XMM)) + mxcsr = fpu->state.fxsave.mxcsr; + err = ~(mxcsr >> 7) & mxcsr; } -- cgit v1.2.3 From 7bbcdb1ca4d2fd69094ee89c18601b396531ca9f Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 2 Apr 2016 07:01:32 -0700 Subject: x86/head: Pass a real pt_regs and trapnr to early_fixup_exception() early_fixup_exception() is limited by the fact that it doesn't have a real struct pt_regs. Change both the 32-bit and 64-bit asm and the C code to pass and accept a real pt_regs. Tested-by: Boris Ostrovsky Signed-off-by: Andy Lutomirski Acked-by: Linus Torvalds Cc: Andrew Morton Cc: Arjan van de Ven Cc: Borislav Petkov Cc: KVM list Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: xen-devel Link: http://lkml.kernel.org/r/e3fb680fcfd5e23e38237e8328b64a25cc121d37.1459605520.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess.h | 2 +- arch/x86/kernel/head_32.S | 74 +++++++++++++++++++++++++++++------------- arch/x86/kernel/head_64.S | 68 ++++++++++++++++++++------------------ arch/x86/mm/extable.c | 6 ++-- 4 files changed, 92 insertions(+), 58 deletions(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index a969ae607be8..b6fb311b7d75 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -110,7 +110,7 @@ struct exception_table_entry { extern int fixup_exception(struct pt_regs *regs, int trapnr); extern bool ex_has_fault_handler(unsigned long ip); -extern int early_fixup_exception(unsigned long *ip); +extern int early_fixup_exception(struct pt_regs *regs, int trapnr); /* * These are the main single-value transfer routines. They automatically diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 54cdbd2003fe..0904536cd45c 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -568,29 +568,64 @@ early_idt_handler_common: je hlt_loop incl %ss:early_recursion_flag - push %eax # 16(%esp) - push %ecx # 12(%esp) - push %edx # 8(%esp) - push %ds # 4(%esp) - push %es # 0(%esp) - movl $(__KERNEL_DS),%eax - movl %eax,%ds - movl %eax,%es + /* The vector number is in pt_regs->gs */ - cmpl $(__KERNEL_CS),32(%esp) + cld + pushl %fs /* pt_regs->fs */ + movw $0, 2(%esp) /* clear high bits (some CPUs leave garbage) */ + pushl %es /* pt_regs->es */ + movw $0, 2(%esp) /* clear high bits (some CPUs leave garbage) */ + pushl %ds /* pt_regs->ds */ + movw $0, 2(%esp) /* clear high bits (some CPUs leave garbage) */ + pushl %eax /* pt_regs->ax */ + pushl %ebp /* pt_regs->bp */ + pushl %edi /* pt_regs->di */ + pushl %esi /* pt_regs->si */ + pushl %edx /* pt_regs->dx */ + pushl %ecx /* pt_regs->cx */ + pushl %ebx /* pt_regs->bx */ + + /* Fix up DS and ES */ + movl $(__KERNEL_DS), %ecx + movl %ecx, %ds + movl %ecx, %es + + /* Load the vector number into EDX */ + movl PT_GS(%esp), %edx + + /* Load GS into pt_regs->gs and clear high bits */ + movw %gs, PT_GS(%esp) + movw $0, PT_GS+2(%esp) + + cmpl $(__KERNEL_CS),PT_CS(%esp) jne 10f - leal 28(%esp),%eax # Pointer to %eip - call early_fixup_exception - andl %eax,%eax - jnz ex_entry /* found an exception entry */ + movl %esp, %eax /* args are pt_regs (EAX), trapnr (EDX) */ + call early_fixup_exception + andl %eax,%eax + jz 10f /* Exception wasn't fixed up */ + + popl %ebx /* pt_regs->bx */ + popl %ecx /* pt_regs->cx */ + popl %edx /* pt_regs->dx */ + popl %esi /* pt_regs->si */ + popl %edi /* pt_regs->di */ + popl %ebp /* pt_regs->bp */ + popl %eax /* pt_regs->ax */ + popl %ds /* pt_regs->ds */ + popl %es /* pt_regs->es */ + popl %fs /* pt_regs->fs */ + popl %gs /* pt_regs->gs */ + decl %ss:early_recursion_flag + addl $4, %esp /* pop pt_regs->orig_ax */ + iret 10: #ifdef CONFIG_PRINTK xorl %eax,%eax - movw %ax,2(%esp) /* clean up the segment values on some cpus */ - movw %ax,6(%esp) - movw %ax,34(%esp) + movw %ax,PT_FS+2(%esp) /* clean up the segment values on some cpus */ + movw %ax,PT_DS+2(%esp) + movw %ax,PT_ES+2(%esp) leal 40(%esp),%eax pushl %eax /* %esp before the exception */ pushl %ebx @@ -608,13 +643,6 @@ hlt_loop: hlt jmp hlt_loop -ex_entry: - pop %es - pop %ds - pop %edx - pop %ecx - pop %eax - decl %ss:early_recursion_flag .Lis_nmi: addl $8,%esp /* drop vector number and error code */ iret diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 22fbf9df61bb..9e8636d2cedd 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -20,6 +20,7 @@ #include #include #include +#include "../entry/calling.h" #ifdef CONFIG_PARAVIRT #include @@ -357,39 +358,52 @@ early_idt_handler_common: jz 1f incl early_recursion_flag(%rip) - pushq %rax # 64(%rsp) - pushq %rcx # 56(%rsp) - pushq %rdx # 48(%rsp) - pushq %rsi # 40(%rsp) - pushq %rdi # 32(%rsp) - pushq %r8 # 24(%rsp) - pushq %r9 # 16(%rsp) - pushq %r10 # 8(%rsp) - pushq %r11 # 0(%rsp) - - cmpl $__KERNEL_CS,96(%rsp) + /* The vector number is currently in the pt_regs->di slot. */ + pushq %rsi /* pt_regs->si */ + movq 8(%rsp), %rsi /* RSI = vector number */ + movq %rdi, 8(%rsp) /* pt_regs->di = RDI */ + pushq %rdx /* pt_regs->dx */ + pushq %rcx /* pt_regs->cx */ + pushq %rax /* pt_regs->ax */ + pushq %r8 /* pt_regs->r8 */ + pushq %r9 /* pt_regs->r9 */ + pushq %r10 /* pt_regs->r10 */ + pushq %r11 /* pt_regs->r11 */ + pushq %rbx /* pt_regs->bx */ + pushq %rbp /* pt_regs->bp */ + pushq %r12 /* pt_regs->r12 */ + pushq %r13 /* pt_regs->r13 */ + pushq %r14 /* pt_regs->r14 */ + pushq %r15 /* pt_regs->r15 */ + + cmpl $__KERNEL_CS,CS(%rsp) jne 11f - cmpl $14,72(%rsp) # Page fault? + cmpq $14,%rsi /* Page fault? */ jnz 10f - GET_CR2_INTO(%rdi) # can clobber any volatile register if pv + GET_CR2_INTO(%rdi) /* Can clobber any volatile register if pv */ call early_make_pgtable andl %eax,%eax - jz 20f # All good + jz 20f /* All good */ 10: - leaq 88(%rsp),%rdi # Pointer to %rip + movq %rsp,%rdi /* RDI = pt_regs; RSI is already trapnr */ call early_fixup_exception andl %eax,%eax jnz 20f # Found an exception entry 11: #ifdef CONFIG_EARLY_PRINTK - GET_CR2_INTO(%r9) # can clobber any volatile register if pv - movl 80(%rsp),%r8d # error code - movl 72(%rsp),%esi # vector number - movl 96(%rsp),%edx # %cs - movq 88(%rsp),%rcx # %rip + /* + * On paravirt kernels, GET_CR2_INTO clobbers callee-clobbered regs. + * We only care about RSI, so we need to save it. + */ + movq %rsi,%rbx /* Save vector number */ + GET_CR2_INTO(%r9) + movq ORIG_RAX(%rsp),%r8 /* error code */ + movq %rbx,%rsi /* vector number */ + movq CS(%rsp),%rdx + movq RIP(%rsp),%rcx xorl %eax,%eax leaq early_idt_msg(%rip),%rdi call early_printk @@ -398,24 +412,16 @@ early_idt_handler_common: call dump_stack #ifdef CONFIG_KALLSYMS leaq early_idt_ripmsg(%rip),%rdi - movq 40(%rsp),%rsi # %rip again + movq RIP(%rsp),%rsi # %rip again call __print_symbol #endif #endif /* EARLY_PRINTK */ 1: hlt jmp 1b -20: # Exception table entry found or page table generated - popq %r11 - popq %r10 - popq %r9 - popq %r8 - popq %rdi - popq %rsi - popq %rdx - popq %rcx - popq %rax +20: /* Exception table entry found or page table generated */ decl early_recursion_flag(%rip) + jmp restore_regs_and_iret .Lis_nmi: addq $16,%rsp # drop vector number and error code INTERRUPT_RETURN diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 82447b3fba38..1366e067a796 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -83,13 +83,13 @@ int fixup_exception(struct pt_regs *regs, int trapnr) } /* Restricted version used during very early boot */ -int __init early_fixup_exception(unsigned long *ip) +int __init early_fixup_exception(struct pt_regs *regs, int trapnr) { const struct exception_table_entry *e; unsigned long new_ip; ex_handler_t handler; - e = search_exception_tables(*ip); + e = search_exception_tables(regs->ip); if (!e) return 0; @@ -100,6 +100,6 @@ int __init early_fixup_exception(unsigned long *ip) if (handler != ex_handler_default) return 0; - *ip = new_ip; + regs->ip = new_ip; return 1; } -- cgit v1.2.3 From 0d0efc07f3df677d7622bb760f8e2920b5e33f42 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 2 Apr 2016 07:01:33 -0700 Subject: x86/head: Move the early NMI fixup into C C is nicer than asm. Tested-by: Boris Ostrovsky Signed-off-by: Andy Lutomirski Acked-by: Linus Torvalds Cc: Andrew Morton Cc: Arjan van de Ven Cc: Borislav Petkov Cc: KVM list Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: xen-devel Link: http://lkml.kernel.org/r/dd068269f8d59fe44e9e43a50d0efd67da65c2b5.1459605520.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_32.S | 7 ------- arch/x86/kernel/head_64.S | 6 ------ arch/x86/mm/extable.c | 5 +++++ 3 files changed, 5 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 0904536cd45c..184291c72c22 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -561,9 +561,6 @@ early_idt_handler_common: */ cld - cmpl $2,(%esp) # X86_TRAP_NMI - je .Lis_nmi # Ignore NMI - cmpl $2,%ss:early_recursion_flag je hlt_loop incl %ss:early_recursion_flag @@ -642,10 +639,6 @@ early_idt_handler_common: hlt_loop: hlt jmp hlt_loop - -.Lis_nmi: - addl $8,%esp /* drop vector number and error code */ - iret ENDPROC(early_idt_handler_common) /* This is the default interrupt "handler" :-) */ diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 9e8636d2cedd..230843781dd4 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -351,9 +351,6 @@ early_idt_handler_common: */ cld - cmpl $2,(%rsp) # X86_TRAP_NMI - je .Lis_nmi # Ignore NMI - cmpl $2,early_recursion_flag(%rip) jz 1f incl early_recursion_flag(%rip) @@ -422,9 +419,6 @@ early_idt_handler_common: 20: /* Exception table entry found or page table generated */ decl early_recursion_flag(%rip) jmp restore_regs_and_iret -.Lis_nmi: - addq $16,%rsp # drop vector number and error code - INTERRUPT_RETURN ENDPROC(early_idt_handler_common) __INITDATA diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 1366e067a796..4be041910c2f 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -1,5 +1,6 @@ #include #include +#include typedef bool (*ex_handler_t)(const struct exception_table_entry *, struct pt_regs *, int); @@ -89,6 +90,10 @@ int __init early_fixup_exception(struct pt_regs *regs, int trapnr) unsigned long new_ip; ex_handler_t handler; + /* Ignore early NMIs. */ + if (trapnr == X86_TRAP_NMI) + return 1; + e = search_exception_tables(regs->ip); if (!e) return 0; -- cgit v1.2.3 From 0e861fbb5bda79b871341ef2a9a8059765cbe8a4 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 2 Apr 2016 07:01:34 -0700 Subject: x86/head: Move early exception panic code into early_fixup_exception() This removes a bunch of assembly and adds some C code instead. It changes the actual printouts on both 32-bit and 64-bit kernels, but they still seem okay. Tested-by: Boris Ostrovsky Signed-off-by: Andy Lutomirski Acked-by: Linus Torvalds Cc: Andrew Morton Cc: Arjan van de Ven Cc: Borislav Petkov Cc: KVM list Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: xen-devel Link: http://lkml.kernel.org/r/4085070316fc3ab29538d3fcfe282648d1d4ee2e.1459605520.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess.h | 2 +- arch/x86/kernel/head_32.S | 49 +++++------------------------------------- arch/x86/kernel/head_64.S | 45 ++------------------------------------ arch/x86/mm/extable.c | 29 ++++++++++++++++++++----- 4 files changed, 32 insertions(+), 93 deletions(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index b6fb311b7d75..d794fd1f582f 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -110,7 +110,7 @@ struct exception_table_entry { extern int fixup_exception(struct pt_regs *regs, int trapnr); extern bool ex_has_fault_handler(unsigned long ip); -extern int early_fixup_exception(struct pt_regs *regs, int trapnr); +extern void early_fixup_exception(struct pt_regs *regs, int trapnr); /* * These are the main single-value transfer routines. They automatically diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 184291c72c22..6770865fde6b 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -561,8 +561,6 @@ early_idt_handler_common: */ cld - cmpl $2,%ss:early_recursion_flag - je hlt_loop incl %ss:early_recursion_flag /* The vector number is in pt_regs->gs */ @@ -594,13 +592,8 @@ early_idt_handler_common: movw %gs, PT_GS(%esp) movw $0, PT_GS+2(%esp) - cmpl $(__KERNEL_CS),PT_CS(%esp) - jne 10f - movl %esp, %eax /* args are pt_regs (EAX), trapnr (EDX) */ call early_fixup_exception - andl %eax,%eax - jz 10f /* Exception wasn't fixed up */ popl %ebx /* pt_regs->bx */ popl %ecx /* pt_regs->cx */ @@ -616,29 +609,6 @@ early_idt_handler_common: decl %ss:early_recursion_flag addl $4, %esp /* pop pt_regs->orig_ax */ iret - -10: -#ifdef CONFIG_PRINTK - xorl %eax,%eax - movw %ax,PT_FS+2(%esp) /* clean up the segment values on some cpus */ - movw %ax,PT_DS+2(%esp) - movw %ax,PT_ES+2(%esp) - leal 40(%esp),%eax - pushl %eax /* %esp before the exception */ - pushl %ebx - pushl %ebp - pushl %esi - pushl %edi - movl %cr2,%eax - pushl %eax - pushl (20+6*4)(%esp) /* trapno */ - pushl $fault_msg - call printk -#endif - call dump_stack -hlt_loop: - hlt - jmp hlt_loop ENDPROC(early_idt_handler_common) /* This is the default interrupt "handler" :-) */ @@ -674,10 +644,14 @@ ignore_int: popl %eax #endif iret + +hlt_loop: + hlt + jmp hlt_loop ENDPROC(ignore_int) __INITDATA .align 4 -early_recursion_flag: +GLOBAL(early_recursion_flag) .long 0 __REFDATA @@ -742,19 +716,6 @@ __INITRODATA int_msg: .asciz "Unknown interrupt or fault at: %p %p %p\n" -fault_msg: -/* fault info: */ - .ascii "BUG: Int %d: CR2 %p\n" -/* regs pushed in early_idt_handler: */ - .ascii " EDI %p ESI %p EBP %p EBX %p\n" - .ascii " ESP %p ES %p DS %p\n" - .ascii " EDX %p ECX %p EAX %p\n" -/* fault frame: */ - .ascii " vec %p err %p EIP %p CS %p flg %p\n" - .ascii "Stack: %p %p %p %p %p %p %p %p\n" - .ascii " %p %p %p %p %p %p %p %p\n" - .asciz " %p %p %p %p %p %p %p %p\n" - #include "../../x86/xen/xen-head.S" /* diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 230843781dd4..3de91a7e6c99 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -351,8 +351,6 @@ early_idt_handler_common: */ cld - cmpl $2,early_recursion_flag(%rip) - jz 1f incl early_recursion_flag(%rip) /* The vector number is currently in the pt_regs->di slot. */ @@ -373,9 +371,6 @@ early_idt_handler_common: pushq %r14 /* pt_regs->r14 */ pushq %r15 /* pt_regs->r15 */ - cmpl $__KERNEL_CS,CS(%rsp) - jne 11f - cmpq $14,%rsi /* Page fault? */ jnz 10f GET_CR2_INTO(%rdi) /* Can clobber any volatile register if pv */ @@ -386,37 +381,8 @@ early_idt_handler_common: 10: movq %rsp,%rdi /* RDI = pt_regs; RSI is already trapnr */ call early_fixup_exception - andl %eax,%eax - jnz 20f # Found an exception entry - -11: -#ifdef CONFIG_EARLY_PRINTK - /* - * On paravirt kernels, GET_CR2_INTO clobbers callee-clobbered regs. - * We only care about RSI, so we need to save it. - */ - movq %rsi,%rbx /* Save vector number */ - GET_CR2_INTO(%r9) - movq ORIG_RAX(%rsp),%r8 /* error code */ - movq %rbx,%rsi /* vector number */ - movq CS(%rsp),%rdx - movq RIP(%rsp),%rcx - xorl %eax,%eax - leaq early_idt_msg(%rip),%rdi - call early_printk - cmpl $2,early_recursion_flag(%rip) - jz 1f - call dump_stack -#ifdef CONFIG_KALLSYMS - leaq early_idt_ripmsg(%rip),%rdi - movq RIP(%rsp),%rsi # %rip again - call __print_symbol -#endif -#endif /* EARLY_PRINTK */ -1: hlt - jmp 1b -20: /* Exception table entry found or page table generated */ +20: decl early_recursion_flag(%rip) jmp restore_regs_and_iret ENDPROC(early_idt_handler_common) @@ -424,16 +390,9 @@ ENDPROC(early_idt_handler_common) __INITDATA .balign 4 -early_recursion_flag: +GLOBAL(early_recursion_flag) .long 0 -#ifdef CONFIG_EARLY_PRINTK -early_idt_msg: - .asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n" -early_idt_ripmsg: - .asciz "RIP %s\n" -#endif /* CONFIG_EARLY_PRINTK */ - #define NEXT_PAGE(name) \ .balign PAGE_SIZE; \ GLOBAL(name) diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 4be041910c2f..da442f37ca8b 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -83,8 +83,10 @@ int fixup_exception(struct pt_regs *regs, int trapnr) return handler(e, regs, trapnr); } +extern unsigned int early_recursion_flag; + /* Restricted version used during very early boot */ -int __init early_fixup_exception(struct pt_regs *regs, int trapnr) +void __init early_fixup_exception(struct pt_regs *regs, int trapnr) { const struct exception_table_entry *e; unsigned long new_ip; @@ -92,19 +94,36 @@ int __init early_fixup_exception(struct pt_regs *regs, int trapnr) /* Ignore early NMIs. */ if (trapnr == X86_TRAP_NMI) - return 1; + return; + + if (early_recursion_flag > 2) + goto halt_loop; + + if (regs->cs != __KERNEL_CS) + goto fail; e = search_exception_tables(regs->ip); if (!e) - return 0; + goto fail; new_ip = ex_fixup_addr(e); handler = ex_fixup_handler(e); /* special handling not supported during early boot */ if (handler != ex_handler_default) - return 0; + goto fail; regs->ip = new_ip; - return 1; + return; + +fail: + early_printk("PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n", + (unsigned)trapnr, (unsigned long)regs->cs, regs->ip, + regs->orig_ax, read_cr2()); + + show_regs(regs); + +halt_loop: + while (true) + halt(); } -- cgit v1.2.3 From ae7ef45e12354a1e2f6013b46df0c9f5bbb6ffbe Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 2 Apr 2016 07:01:35 -0700 Subject: x86/traps: Enable all exception handler callbacks early Now that early_fixup_exception() has pt_regs, we can just call fixup_exception() from it. This will make fancy exception handlers work early. Tested-by: Boris Ostrovsky Signed-off-by: Andy Lutomirski Acked-by: Linus Torvalds Cc: Andrew Morton Cc: Arjan van de Ven Cc: Borislav Petkov Cc: KVM list Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: xen-devel Link: http://lkml.kernel.org/r/20fc047d926150cb08cb9b9f2923519b07ec1a15.1459605520.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/extable.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index da442f37ca8b..061a23758354 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -88,10 +88,6 @@ extern unsigned int early_recursion_flag; /* Restricted version used during very early boot */ void __init early_fixup_exception(struct pt_regs *regs, int trapnr) { - const struct exception_table_entry *e; - unsigned long new_ip; - ex_handler_t handler; - /* Ignore early NMIs. */ if (trapnr == X86_TRAP_NMI) return; @@ -102,19 +98,8 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr) if (regs->cs != __KERNEL_CS) goto fail; - e = search_exception_tables(regs->ip); - if (!e) - goto fail; - - new_ip = ex_fixup_addr(e); - handler = ex_fixup_handler(e); - - /* special handling not supported during early boot */ - if (handler != ex_handler_default) - goto fail; - - regs->ip = new_ip; - return; + if (fixup_exception(regs, trapnr)) + return; fail: early_printk("PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n", -- cgit v1.2.3 From c2ee03b2a94d7ba692cf6206bbe069d5bfcc20ed Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 2 Apr 2016 07:01:36 -0700 Subject: x86/paravirt: Add _safe to the read_ms()r and write_msr() PV callbacks These callbacks match the _safe variants, so name them accordingly. This will make room for unsafe PV callbacks. Tested-by: Boris Ostrovsky Signed-off-by: Andy Lutomirski Acked-by: Linus Torvalds Cc: Andrew Morton Cc: Arjan van de Ven Cc: Borislav Petkov Cc: KVM list Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: xen-devel Link: http://lkml.kernel.org/r/9ee3fb6a196a514c93325bdfa15594beecf04876.1459605520.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/paravirt.h | 33 +++++++++++++++++---------------- arch/x86/include/asm/paravirt_types.h | 8 ++++---- arch/x86/kernel/paravirt.c | 4 ++-- arch/x86/xen/enlighten.c | 4 ++-- 4 files changed, 25 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 601f1b8f9961..81ef2d5c2a24 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -130,34 +130,35 @@ static inline void wbinvd(void) #define get_kernel_rpl() (pv_info.kernel_rpl) -static inline u64 paravirt_read_msr(unsigned msr, int *err) +static inline u64 paravirt_read_msr_safe(unsigned msr, int *err) { - return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err); + return PVOP_CALL2(u64, pv_cpu_ops.read_msr_safe, msr, err); } -static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) +static inline int paravirt_write_msr_safe(unsigned msr, + unsigned low, unsigned high) { - return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high); + return PVOP_CALL3(int, pv_cpu_ops.write_msr_safe, msr, low, high); } /* These should all do BUG_ON(_err), but our headers are too tangled. */ #define rdmsr(msr, val1, val2) \ do { \ int _err; \ - u64 _l = paravirt_read_msr(msr, &_err); \ + u64 _l = paravirt_read_msr_safe(msr, &_err); \ val1 = (u32)_l; \ val2 = _l >> 32; \ } while (0) #define wrmsr(msr, val1, val2) \ do { \ - paravirt_write_msr(msr, val1, val2); \ + paravirt_write_msr_safe(msr, val1, val2); \ } while (0) #define rdmsrl(msr, val) \ do { \ int _err; \ - val = paravirt_read_msr(msr, &_err); \ + val = paravirt_read_msr_safe(msr, &_err); \ } while (0) static inline void wrmsrl(unsigned msr, u64 val) @@ -165,23 +166,23 @@ static inline void wrmsrl(unsigned msr, u64 val) wrmsr(msr, (u32)val, (u32)(val>>32)); } -#define wrmsr_safe(msr, a, b) paravirt_write_msr(msr, a, b) +#define wrmsr_safe(msr, a, b) paravirt_write_msr_safe(msr, a, b) /* rdmsr with exception handling */ -#define rdmsr_safe(msr, a, b) \ -({ \ - int _err; \ - u64 _l = paravirt_read_msr(msr, &_err); \ - (*a) = (u32)_l; \ - (*b) = _l >> 32; \ - _err; \ +#define rdmsr_safe(msr, a, b) \ +({ \ + int _err; \ + u64 _l = paravirt_read_msr_safe(msr, &_err); \ + (*a) = (u32)_l; \ + (*b) = _l >> 32; \ + _err; \ }) static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) { int err; - *p = paravirt_read_msr(msr, &err); + *p = paravirt_read_msr_safe(msr, &err); return err; } diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index e8c2326478c8..09c9e1dd81ce 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -155,10 +155,10 @@ struct pv_cpu_ops { void (*cpuid)(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx); - /* MSR, PMC and TSR operations. - err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ - u64 (*read_msr)(unsigned int msr, int *err); - int (*write_msr)(unsigned int msr, unsigned low, unsigned high); + /* MSR operations. + err = 0/-EIO. wrmsr returns 0/-EIO. */ + u64 (*read_msr_safe)(unsigned int msr, int *err); + int (*write_msr_safe)(unsigned int msr, unsigned low, unsigned high); u64 (*read_pmc)(int counter); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index f08ac28b8136..8aad95478ae5 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -339,8 +339,8 @@ __visible struct pv_cpu_ops pv_cpu_ops = { .write_cr8 = native_write_cr8, #endif .wbinvd = native_wbinvd, - .read_msr = native_read_msr_safe, - .write_msr = native_write_msr_safe, + .read_msr_safe = native_read_msr_safe, + .write_msr_safe = native_write_msr_safe, .read_pmc = native_read_pmc, .load_tr_desc = native_load_tr_desc, .set_ldt = native_set_ldt, diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 9b8f1eacc110..13f756fdcb33 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1222,8 +1222,8 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .wbinvd = native_wbinvd, - .read_msr = xen_read_msr_safe, - .write_msr = xen_write_msr_safe, + .read_msr_safe = xen_read_msr_safe, + .write_msr_safe = xen_write_msr_safe, .read_pmc = xen_read_pmc, -- cgit v1.2.3 From fbd704374d111bed16a19261176fa30e2379c87c Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 2 Apr 2016 07:01:37 -0700 Subject: x86/msr: Carry on after a non-"safe" MSR access fails This demotes an OOPS and likely panic due to a failed non-"safe" MSR access to a WARN_ONCE() and, for RDMSR, a return value of zero. To be clear, this type of failure should *not* happen. This patch exists to minimize the chance of nasty undebuggable failures happening when a CONFIG_PARAVIRT=y bug in the non-"safe" MSR helpers gets fixed. Tested-by: Boris Ostrovsky Signed-off-by: Andy Lutomirski Acked-by: Linus Torvalds Cc: Andrew Morton Cc: Arjan van de Ven Cc: Borislav Petkov Cc: KVM list Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: xen-devel Link: http://lkml.kernel.org/r/26567b216aae70e795938f4b567eace5a0eb90ba.1459605520.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr.h | 10 ++++++++-- arch/x86/mm/extable.c | 27 +++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 7a79ee2778b3..25f169c6eb95 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -84,7 +84,10 @@ static inline unsigned long long native_read_msr(unsigned int msr) { DECLARE_ARGS(val, low, high); - asm volatile("rdmsr" : EAX_EDX_RET(val, low, high) : "c" (msr)); + asm volatile("1: rdmsr\n" + "2:\n" + _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_unsafe) + : EAX_EDX_RET(val, low, high) : "c" (msr)); if (msr_tracepoint_active(__tracepoint_read_msr)) do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), 0); return EAX_EDX_VAL(val, low, high); @@ -111,7 +114,10 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr, static inline void native_write_msr(unsigned int msr, unsigned low, unsigned high) { - asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory"); + asm volatile("1: wrmsr\n" + "2:\n" + _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe) + : : "c" (msr), "a"(low), "d" (high) : "memory"); if (msr_tracepoint_active(__tracepoint_read_msr)) do_trace_write_msr(msr, ((u64)high << 32 | low), 0); } diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 061a23758354..fd9eb98c4f58 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -43,6 +43,33 @@ bool ex_handler_ext(const struct exception_table_entry *fixup, } EXPORT_SYMBOL(ex_handler_ext); +bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) +{ + WARN_ONCE(1, "unchecked MSR access error: RDMSR from 0x%x\n", + (unsigned int)regs->cx); + + /* Pretend that the read succeeded and returned 0. */ + regs->ip = ex_fixup_addr(fixup); + regs->ax = 0; + regs->dx = 0; + return true; +} +EXPORT_SYMBOL(ex_handler_rdmsr_unsafe); + +bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) +{ + WARN_ONCE(1, "unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x)\n", + (unsigned int)regs->cx, + (unsigned int)regs->dx, (unsigned int)regs->ax); + + /* Pretend that the write succeeded. */ + regs->ip = ex_fixup_addr(fixup); + return true; +} +EXPORT_SYMBOL(ex_handler_wrmsr_unsafe); + bool ex_has_fault_handler(unsigned long ip) { const struct exception_table_entry *e; -- cgit v1.2.3 From dd2f4a004b016bbfb64f1de49cb45e66232e40a6 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 2 Apr 2016 07:01:38 -0700 Subject: x86/paravirt: Add paravirt_{read,write}_msr() This adds paravirt callbacks for unsafe MSR access. On native, they call native_{read,write}_msr(). On Xen, they use xen_{read,write}_msr_safe(). Nothing uses them yet for ease of bisection. The next patch will use them in rdmsrl(), wrmsrl(), etc. I intentionally didn't make them warn on #GP on Xen. I think that should be done separately by the Xen maintainers. Tested-by: Boris Ostrovsky Signed-off-by: Andy Lutomirski Acked-by: Linus Torvalds Cc: Andrew Morton Cc: Arjan van de Ven Cc: Borislav Petkov Cc: KVM list Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: xen-devel Link: http://lkml.kernel.org/r/880eebc5dcd2ad9f310d41345f82061ea500e9fa.1459605520.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr.h | 5 +++-- arch/x86/include/asm/paravirt.h | 11 +++++++++++ arch/x86/include/asm/paravirt_types.h | 10 ++++++++-- arch/x86/kernel/paravirt.c | 2 ++ arch/x86/xen/enlighten.c | 23 +++++++++++++++++++++++ 5 files changed, 47 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 25f169c6eb95..00050c034a13 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -111,8 +111,9 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr, return EAX_EDX_VAL(val, low, high); } -static inline void native_write_msr(unsigned int msr, - unsigned low, unsigned high) +/* Can be uninlined because referenced by paravirt */ +notrace static inline void native_write_msr(unsigned int msr, + unsigned low, unsigned high) { asm volatile("1: wrmsr\n" "2:\n" diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 81ef2d5c2a24..97839fa8b8aa 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -130,6 +130,17 @@ static inline void wbinvd(void) #define get_kernel_rpl() (pv_info.kernel_rpl) +static inline u64 paravirt_read_msr(unsigned msr) +{ + return PVOP_CALL1(u64, pv_cpu_ops.read_msr, msr); +} + +static inline void paravirt_write_msr(unsigned msr, + unsigned low, unsigned high) +{ + return PVOP_VCALL3(pv_cpu_ops.write_msr, msr, low, high); +} + static inline u64 paravirt_read_msr_safe(unsigned msr, int *err) { return PVOP_CALL2(u64, pv_cpu_ops.read_msr_safe, msr, err); diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 09c9e1dd81ce..b4a23eafa1b9 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -155,8 +155,14 @@ struct pv_cpu_ops { void (*cpuid)(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx); - /* MSR operations. - err = 0/-EIO. wrmsr returns 0/-EIO. */ + /* Unsafe MSR operations. These will warn or panic on failure. */ + u64 (*read_msr)(unsigned int msr); + void (*write_msr)(unsigned int msr, unsigned low, unsigned high); + + /* + * Safe MSR operations. + * read sets err to 0 or -EIO. write returns 0 or -EIO. + */ u64 (*read_msr_safe)(unsigned int msr, int *err); int (*write_msr_safe)(unsigned int msr, unsigned low, unsigned high); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 8aad95478ae5..f9583917c7c4 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -339,6 +339,8 @@ __visible struct pv_cpu_ops pv_cpu_ops = { .write_cr8 = native_write_cr8, #endif .wbinvd = native_wbinvd, + .read_msr = native_read_msr, + .write_msr = native_write_msr, .read_msr_safe = native_read_msr_safe, .write_msr_safe = native_write_msr_safe, .read_pmc = native_read_pmc, diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 13f756fdcb33..6ab672233ac9 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1092,6 +1092,26 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) return ret; } +static u64 xen_read_msr(unsigned int msr) +{ + /* + * This will silently swallow a #GP from RDMSR. It may be worth + * changing that. + */ + int err; + + return xen_read_msr_safe(msr, &err); +} + +static void xen_write_msr(unsigned int msr, unsigned low, unsigned high) +{ + /* + * This will silently swallow a #GP from WRMSR. It may be worth + * changing that. + */ + xen_write_msr_safe(msr, low, high); +} + void xen_setup_shared_info(void) { if (!xen_feature(XENFEAT_auto_translated_physmap)) { @@ -1222,6 +1242,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .wbinvd = native_wbinvd, + .read_msr = xen_read_msr, + .write_msr = xen_write_msr, + .read_msr_safe = xen_read_msr_safe, .write_msr_safe = xen_write_msr_safe, -- cgit v1.2.3 From 4985ce15a397e9b6541548efe3b9ffac2dda9127 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 2 Apr 2016 07:01:39 -0700 Subject: x86/paravirt: Make "unsafe" MSR accesses unsafe even if PARAVIRT=y Enabling CONFIG_PARAVIRT had an unintended side effect: rdmsr() turned into rdmsr_safe() and wrmsr() turned into wrmsr_safe(), even on bare metal. Undo that by using the new unsafe paravirt MSR callbacks. Tested-by: Boris Ostrovsky Signed-off-by: Andy Lutomirski Acked-by: Linus Torvalds Cc: Andrew Morton Cc: Arjan van de Ven Cc: Borislav Petkov Cc: KVM list Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: xen-devel Link: http://lkml.kernel.org/r/414fabd6d3527703077c6c2a797223d0a9c3b081.1459605520.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/paravirt.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 97839fa8b8aa..3c731413f1de 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -152,24 +152,21 @@ static inline int paravirt_write_msr_safe(unsigned msr, return PVOP_CALL3(int, pv_cpu_ops.write_msr_safe, msr, low, high); } -/* These should all do BUG_ON(_err), but our headers are too tangled. */ #define rdmsr(msr, val1, val2) \ do { \ - int _err; \ - u64 _l = paravirt_read_msr_safe(msr, &_err); \ + u64 _l = paravirt_read_msr(msr); \ val1 = (u32)_l; \ val2 = _l >> 32; \ } while (0) #define wrmsr(msr, val1, val2) \ do { \ - paravirt_write_msr_safe(msr, val1, val2); \ + paravirt_write_msr(msr, val1, val2); \ } while (0) #define rdmsrl(msr, val) \ do { \ - int _err; \ - val = paravirt_read_msr_safe(msr, &_err); \ + val = paravirt_read_msr(msr); \ } while (0) static inline void wrmsrl(unsigned msr, u64 val) -- cgit v1.2.3 From b828b79fcced0e66492590707649dbfaea6435e6 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 2 Apr 2016 07:01:40 -0700 Subject: x86/msr: Set the return value to zero when native_rdmsr_safe() fails This will cause unchecked native_rdmsr_safe() failures to return deterministic results. Tested-by: Boris Ostrovsky Signed-off-by: Andy Lutomirski Acked-by: Linus Torvalds Cc: Andrew Morton Cc: Arjan van de Ven Cc: Borislav Petkov Cc: KVM list Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: xen-devel Link: http://lkml.kernel.org/r/515fb611449a755312a476cfe11675906e7ddf6c.1459605520.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 00050c034a13..7dc1d8fef7fd 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -101,7 +101,10 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr, asm volatile("2: rdmsr ; xor %[err],%[err]\n" "1:\n\t" ".section .fixup,\"ax\"\n\t" - "3: mov %[fault],%[err] ; jmp 1b\n\t" + "3: mov %[fault],%[err]\n\t" + "xorl %%eax, %%eax\n\t" + "xorl %%edx, %%edx\n\t" + "jmp 1b\n\t" ".previous\n\t" _ASM_EXTABLE(2b, 3b) : [err] "=r" (*err), EAX_EDX_RET(val, low, high) -- cgit v1.2.3 From 60a0e2039e3df6c0a2b896bd78af36ff36fb629c Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 4 Apr 2016 08:46:22 -0700 Subject: x86/extable: Add a comment about early exception handlers Borislav asked for a comment explaining why all exception handlers are allowed early. Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Acked-by: Linus Torvalds Cc: Andrew Morton Cc: Arjan van de Ven Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: KVM list Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: xen-devel Link: http://lkml.kernel.org/r/5f1dcd6919f4a5923959a8065cb2c04d9dac1412.1459784772.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/extable.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index fd9eb98c4f58..aaeda3ffaafe 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -125,6 +125,20 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr) if (regs->cs != __KERNEL_CS) goto fail; + /* + * The full exception fixup machinery is available as soon as + * the early IDT is loaded. This means that it is the + * responsibility of extable users to either function correctly + * when handlers are invoked early or to simply avoid causing + * exceptions before they're ready to handle them. + * + * This is better than filtering which handlers can be used, + * because refusing to call a handler here is guaranteed to + * result in a hard-to-debug panic. + * + * Keep in mind that not all vectors actually get here. Early + * fage faults, for example, are special. + */ if (fixup_exception(regs, trapnr)) return; -- cgit v1.2.3 From 91ed140d6c1e168b11bbbddac4f6066f40a0c6b5 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 31 Mar 2016 16:21:02 +0200 Subject: x86/asm: Make sure verify_cpu() has a good stack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 04633df0c43d ("x86/cpu: Call verify_cpu() after having entered long mode too") added the call to verify_cpu() for sanitizing CPU configuration. The latter uses the stack minimally and it can happen that we land in startup_64() directly from a 64-bit bootloader. Then we want to use our own, known good stack. Do that. APs don't need this as the trampoline sets up a stack for them. Reported-by: Tom Lendacky Signed-off-by: Borislav Petkov Cc: Brian Gerst Cc: Linus Torvalds Cc: Mika Penttilä Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1459434062-31055-1-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_64.S | 8 ++++++++ include/asm-generic/vmlinux.lds.h | 4 +++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 3de91a7e6c99..5df831ef1442 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -65,6 +65,14 @@ startup_64: * tables and then reload them. */ + /* + * Setup stack for verify_cpu(). "-8" because stack_start is defined + * this way, see below. Our best guess is a NULL ptr for stack + * termination heuristics and we don't want to break anything which + * might depend on it (kgdb, ...). + */ + leaq (__end_init_task - 8)(%rip), %rsp + /* Sanitize CPU configuration */ call verify_cpu diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 339125bb4d2c..6a67ab94b553 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -245,7 +245,9 @@ #define INIT_TASK_DATA(align) \ . = ALIGN(align); \ - *(.data..init_task) + VMLINUX_SYMBOL(__start_init_task) = .; \ + *(.data..init_task) \ + VMLINUX_SYMBOL(__end_init_task) = .; /* * Read only Data -- cgit v1.2.3 From abfb9498ee1327f534df92a7ecaea81a85913bae Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 18 Apr 2016 16:43:43 +0300 Subject: x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall() The is_ia32_task()/is_x32_task() function names are a big misnomer: they suggests that the compat-ness of a system call is a task property, which is not true, the compatness of a system call purely depends on how it was invoked through the system call layer. A task may call 32-bit and 64-bit and x32 system calls without changing any of its kernel visible state. This specific minomer is also actively dangerous, as it might cause kernel developers to use the wrong kind of security checks within system calls. So rename it to in_{ia32,x32}_syscall(). Suggested-by: Andy Lutomirski Suggested-by: Ingo Molnar Signed-off-by: Dmitry Safonov [ Expanded the changelog. ] Acked-by: Andy Lutomirski Cc: 0x7f454c46@gmail.com Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/1460987025-30360-1-git-send-email-dsafonov@virtuozzo.com Signed-off-by: Ingo Molnar --- arch/x86/entry/common.c | 2 +- arch/x86/include/asm/compat.h | 4 ++-- arch/x86/include/asm/thread_info.h | 2 +- arch/x86/kernel/process_64.c | 2 +- arch/x86/kernel/ptrace.c | 2 +- arch/x86/kernel/signal.c | 2 +- arch/x86/kernel/uprobes.c | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index e79d93d44ecd..ec138e538c44 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -191,7 +191,7 @@ long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch, long syscall_trace_enter(struct pt_regs *regs) { - u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; + u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch); if (phase1_result == 0) diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index ebb102e1bbc7..5a3b2c119ed0 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -307,7 +307,7 @@ static inline void __user *arch_compat_alloc_user_space(long len) return (void __user *)round_down(sp - len, 16); } -static inline bool is_x32_task(void) +static inline bool in_x32_syscall(void) { #ifdef CONFIG_X86_X32_ABI if (task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT) @@ -318,7 +318,7 @@ static inline bool is_x32_task(void) static inline bool in_compat_syscall(void) { - return is_ia32_task() || is_x32_task(); + return in_ia32_syscall() || in_x32_syscall(); } #define in_compat_syscall in_compat_syscall /* override the generic impl */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index ffae84df8a93..30c133ac05cd 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -255,7 +255,7 @@ static inline bool test_and_clear_restore_sigmask(void) return true; } -static inline bool is_ia32_task(void) +static inline bool in_ia32_syscall(void) { #ifdef CONFIG_X86_32 return true; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 50337eac1ca2..24d1b7fb4399 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -210,7 +210,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, */ if (clone_flags & CLONE_SETTLS) { #ifdef CONFIG_IA32_EMULATION - if (is_ia32_task()) + if (in_ia32_syscall()) err = do_set_thread_area(p, -1, (struct user_desc __user *)tls, 0); else diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 32e9d9cbb884..0f4d2a5df2dc 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1266,7 +1266,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, compat_ulong_t caddr, compat_ulong_t cdata) { #ifdef CONFIG_X86_X32_ABI - if (!is_ia32_task()) + if (!in_ia32_syscall()) return x32_arch_ptrace(child, request, caddr, cdata); #endif #ifdef CONFIG_IA32_EMULATION diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 6408c09bbcd4..2ebcc60f0e14 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -762,7 +762,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) { #ifdef CONFIG_X86_64 - if (is_ia32_task()) + if (in_ia32_syscall()) return __NR_ia32_restart_syscall; #endif #ifdef CONFIG_X86_X32_ABI diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index bf4db6eaec8f..98b4dc87628b 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -516,7 +516,7 @@ struct uprobe_xol_ops { static inline int sizeof_long(void) { - return is_ia32_task() ? 4 : 8; + return in_ia32_syscall() ? 4 : 8; } static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) -- cgit v1.2.3 From 35de5b0692aaa1f99803044526f2cc00ff864426 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 26 Apr 2016 12:23:24 -0700 Subject: x86/asm: Stop depending on ptrace.h in alternative.h alternative.h pulls in ptrace.h, which means that alternatives can't be used in anything referenced from ptrace.h, which is a mess. Break the dependency by pulling text patching helpers into their own header. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/99b93b13f2c9eb671f5c98bba4c2cbdc061293a2.1461698311.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/alternative.h | 33 ----------------------------- arch/x86/include/asm/kgdb.h | 2 ++ arch/x86/include/asm/setup.h | 1 + arch/x86/include/asm/text-patching.h | 40 ++++++++++++++++++++++++++++++++++++ arch/x86/kernel/alternative.c | 1 + arch/x86/kernel/jump_label.c | 1 + arch/x86/kernel/kgdb.c | 1 + arch/x86/kernel/kprobes/core.c | 1 + arch/x86/kernel/kprobes/opt.c | 1 + arch/x86/kernel/module.c | 1 + arch/x86/kernel/traps.c | 1 + 11 files changed, 50 insertions(+), 33 deletions(-) create mode 100644 arch/x86/include/asm/text-patching.h diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 99afb665a004..be4496c961db 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -5,7 +5,6 @@ #include #include #include -#include /* * Alternative inline assembly for SMP. @@ -233,36 +232,4 @@ static inline int alternatives_text_reserved(void *start, void *end) */ #define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr -struct paravirt_patch_site; -#ifdef CONFIG_PARAVIRT -void apply_paravirt(struct paravirt_patch_site *start, - struct paravirt_patch_site *end); -#else -static inline void apply_paravirt(struct paravirt_patch_site *start, - struct paravirt_patch_site *end) -{} -#define __parainstructions NULL -#define __parainstructions_end NULL -#endif - -extern void *text_poke_early(void *addr, const void *opcode, size_t len); - -/* - * Clear and restore the kernel write-protection flag on the local CPU. - * Allows the kernel to edit read-only pages. - * Side-effect: any interrupt handler running between save and restore will have - * the ability to write to read-only pages. - * - * Warning: - * Code patching in the UP case is safe if NMIs and MCE handlers are stopped and - * no thread can be preempted in the instructions being modified (no iret to an - * invalid instruction possible) or if the instructions are changed from a - * consistent state to another consistent state atomically. - * On the local CPU you need to be protected again NMI or MCE handlers seeing an - * inconsistent instruction while you patch. - */ -extern void *text_poke(void *addr, const void *opcode, size_t len); -extern int poke_int3_handler(struct pt_regs *regs); -extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler); - #endif /* _ASM_X86_ALTERNATIVE_H */ diff --git a/arch/x86/include/asm/kgdb.h b/arch/x86/include/asm/kgdb.h index 332f98c9111f..22a8537eb780 100644 --- a/arch/x86/include/asm/kgdb.h +++ b/arch/x86/include/asm/kgdb.h @@ -6,6 +6,8 @@ * Copyright (C) 2008 Wind River Systems, Inc. */ +#include + /* * BUFMAX defines the maximum number of characters in inbound/outbound * buffers at least NUMREGBYTES*2 are needed for register packets diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 11af24e09c8a..ac1d5da14734 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -6,6 +6,7 @@ #define COMMAND_LINE_SIZE 2048 #include +#include #ifdef __i386__ diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h new file mode 100644 index 000000000000..90395063383c --- /dev/null +++ b/arch/x86/include/asm/text-patching.h @@ -0,0 +1,40 @@ +#ifndef _ASM_X86_TEXT_PATCHING_H +#define _ASM_X86_TEXT_PATCHING_H + +#include +#include +#include + +struct paravirt_patch_site; +#ifdef CONFIG_PARAVIRT +void apply_paravirt(struct paravirt_patch_site *start, + struct paravirt_patch_site *end); +#else +static inline void apply_paravirt(struct paravirt_patch_site *start, + struct paravirt_patch_site *end) +{} +#define __parainstructions NULL +#define __parainstructions_end NULL +#endif + +extern void *text_poke_early(void *addr, const void *opcode, size_t len); + +/* + * Clear and restore the kernel write-protection flag on the local CPU. + * Allows the kernel to edit read-only pages. + * Side-effect: any interrupt handler running between save and restore will have + * the ability to write to read-only pages. + * + * Warning: + * Code patching in the UP case is safe if NMIs and MCE handlers are stopped and + * no thread can be preempted in the instructions being modified (no iret to an + * invalid instruction possible) or if the instructions are changed from a + * consistent state to another consistent state atomically. + * On the local CPU you need to be protected again NMI or MCE handlers seeing an + * inconsistent instruction while you patch. + */ +extern void *text_poke(void *addr, const void *opcode, size_t len); +extern int poke_int3_handler(struct pt_regs *regs); +extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler); + +#endif /* _ASM_X86_TEXT_PATCHING_H */ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 25f909362b7a..5cb272a7a5a3 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index e565e0e4d216..fc25f698d792 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -13,6 +13,7 @@ #include #include #include +#include #ifdef HAVE_JUMP_LABEL diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 2da6ee9ae69b..04cde527d728 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -45,6 +45,7 @@ #include #include +#include #include #include #include diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index ae703acb85c1..38cf7a741250 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -51,6 +51,7 @@ #include #include +#include #include #include #include diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 7b3b9d15c47a..4425f593f0ec 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -29,6 +29,7 @@ #include #include +#include #include #include #include diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 005c03e93fc5..477ae806c2fa 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -31,6 +31,7 @@ #include #include +#include #include #include #include diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 06cbe25861f1..d1590486204a 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From f005f5d860e0231fe212cfda8c1a3148b99609f4 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 26 Apr 2016 12:23:25 -0700 Subject: x86/asm: Make asm/alternative.h safe from assembly asm/alternative.h isn't directly useful from assembly, but it shouldn't break the build. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/e5b693fcef99fe6e80341c9e97a002fb23871e91.1461698311.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/alternative.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index be4496c961db..e77a6443104f 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_ALTERNATIVE_H #define _ASM_X86_ALTERNATIVE_H +#ifndef __ASSEMBLY__ + #include #include #include @@ -232,4 +234,6 @@ static inline int alternatives_text_reserved(void *start, void *end) */ #define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr +#endif /* __ASSEMBLY__ */ + #endif /* _ASM_X86_ALTERNATIVE_H */ -- cgit v1.2.3 From 45e876f794e8e566bf827c25ef0791875081724f Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 26 Apr 2016 12:23:26 -0700 Subject: x86/segments/64: When loadsegment(fs, ...) fails, clear the base On AMD CPUs, a failed loadsegment currently may not clear the FS base. Fix it. While we're at it, prevent loadsegment(gs, xyz) from even compiling on 64-bit kernels. It shouldn't be used. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/a084c1b93b7b1408b58d3fd0b5d6e47da8e7d7cf.1461698311.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/segment.h | 42 +++++++++++++++++++++++++++++++++++++++--- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/mm/extable.c | 10 ++++++++++ 3 files changed, 50 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 7d5a1929d76b..e1a4afd20223 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -2,6 +2,7 @@ #define _ASM_X86_SEGMENT_H #include +#include /* * Constructor for a conventional segment GDT (or LDT) entry. @@ -249,10 +250,13 @@ extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDL #endif /* - * Load a segment. Fall back on loading the zero - * segment if something goes wrong.. + * Load a segment. Fall back on loading the zero segment if something goes + * wrong. This variant assumes that loading zero fully clears the segment. + * This is always the case on Intel CPUs and, even on 64-bit AMD CPUs, any + * failure to fully clear the cached descriptor is only observable for + * FS and GS. */ -#define loadsegment(seg, value) \ +#define __loadsegment_simple(seg, value) \ do { \ unsigned short __val = (value); \ \ @@ -269,6 +273,38 @@ do { \ : "+r" (__val) : : "memory"); \ } while (0) +#define __loadsegment_ss(value) __loadsegment_simple(ss, (value)) +#define __loadsegment_ds(value) __loadsegment_simple(ds, (value)) +#define __loadsegment_es(value) __loadsegment_simple(es, (value)) + +#ifdef CONFIG_X86_32 + +/* + * On 32-bit systems, the hidden parts of FS and GS are unobservable if + * the selector is NULL, so there's no funny business here. + */ +#define __loadsegment_fs(value) __loadsegment_simple(fs, (value)) +#define __loadsegment_gs(value) __loadsegment_simple(gs, (value)) + +#else + +static inline void __loadsegment_fs(unsigned short value) +{ + asm volatile(" \n" + "1: movw %0, %%fs \n" + "2: \n" + + _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_clear_fs) + + : : "rm" (value) : "memory"); +} + +/* __loadsegment_gs is intentionally undefined. Use load_gs_index instead. */ + +#endif + +#define loadsegment(seg, value) __loadsegment_ ## seg (value) + /* * Save a segment register away: */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6bfa36de6d9f..088106140c4b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -430,7 +430,7 @@ void load_percpu_segment(int cpu) #ifdef CONFIG_X86_32 loadsegment(fs, __KERNEL_PERCPU); #else - loadsegment(gs, 0); + __loadsegment_simple(gs, 0); wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); #endif load_stack_canary_segment(); diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index aaeda3ffaafe..4bb53b89f3c5 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -70,6 +70,16 @@ bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup, } EXPORT_SYMBOL(ex_handler_wrmsr_unsafe); +bool ex_handler_clear_fs(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) +{ + if (static_cpu_has(X86_BUG_NULL_SEG)) + asm volatile ("mov %0, %%fs" : : "rm" (__USER_DS)); + asm volatile ("mov %0, %%fs" : : "rm" (0)); + return ex_handler_default(fixup, regs, trapnr); +} +EXPORT_SYMBOL(ex_handler_clear_fs); + bool ex_has_fault_handler(unsigned long ip) { const struct exception_table_entry *e; -- cgit v1.2.3 From b038c842b385f1470f991078e71b7c5b084a7341 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 26 Apr 2016 12:23:27 -0700 Subject: x86/segments/64: When load_gs_index fails, clear the base On AMD CPUs, a failed load_gs_base currently may not clear the FS base. Fix it. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1a6c4d3a8a4e7be79ba448b42685e0321d50c14c.1461698311.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 1693c17dbf81..6344629ae1ce 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -794,6 +794,12 @@ END(native_load_gs_index) /* running with kernelgs */ bad_gs: SWAPGS /* switch back to user gs */ +.macro ZAP_GS + /* This can't be a string because the preprocessor needs to see it. */ + movl $__USER_DS, %eax + movl %eax, %gs +.endm + ALTERNATIVE "", "ZAP_GS", X86_BUG_NULL_SEG xorl %eax, %eax movl %eax, %gs jmp 2b -- cgit v1.2.3 From 731e33e39a5b95ad77017811b3ced32ecf9dc666 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 26 Apr 2016 12:23:28 -0700 Subject: x86/arch_prctl/64: Remove FSBASE/GSBASE < 4G optimization As far as I know, the optimization doesn't work on any modern distro because modern distros use high addresses for ASLR. Remove it. The ptrace code was either wrong or very strange, but the behavior with this patch should be essentially identical to the behavior without this patch unless user code goes out of its way to mislead ptrace. On newer CPUs, once the FSGSBASE instructions are enabled, we won't want to use the optimized variant anyway. This isn't actually much of a performance regression, it has no effect on normal dynamically linked programs, and it's a considerably simplification. It also removes some nasty special cases from code that is already way too full of special cases for comfort. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/dd1599b08866961dba9d2458faa6bbd7fba471d7.1461698311.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/segment.h | 7 ----- arch/x86/kernel/process_64.c | 71 +++++++----------------------------------- arch/x86/kernel/ptrace.c | 44 ++++---------------------- 3 files changed, 17 insertions(+), 105 deletions(-) diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index e1a4afd20223..1549caa098f0 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -208,13 +208,6 @@ #define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3) #define __PER_CPU_SEG (GDT_ENTRY_PER_CPU*8 + 3) -/* TLS indexes for 64-bit - hardcoded in arch_prctl(): */ -#define FS_TLS 0 -#define GS_TLS 1 - -#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3) -#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3) - #endif #ifndef CONFIG_PARAVIRT diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 24d1b7fb4399..864fe2cde3c6 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -136,25 +136,6 @@ void release_thread(struct task_struct *dead_task) } } -static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) -{ - struct user_desc ud = { - .base_addr = addr, - .limit = 0xfffff, - .seg_32bit = 1, - .limit_in_pages = 1, - .useable = 1, - }; - struct desc_struct *desc = t->thread.tls_array; - desc += tls; - fill_ldt(desc, &ud); -} - -static inline u32 read_32bit_tls(struct task_struct *t, int tls) -{ - return get_desc_base(&t->thread.tls_array[tls]); -} - int copy_thread_tls(unsigned long clone_flags, unsigned long sp, unsigned long arg, struct task_struct *p, unsigned long tls) { @@ -554,25 +535,12 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) if (addr >= TASK_SIZE_OF(task)) return -EPERM; cpu = get_cpu(); - /* handle small bases via the GDT because that's faster to - switch. */ - if (addr <= 0xffffffff) { - set_32bit_tls(task, GS_TLS, addr); - if (doit) { - load_TLS(&task->thread, cpu); - load_gs_index(GS_TLS_SEL); - } - task->thread.gsindex = GS_TLS_SEL; - task->thread.gs = 0; - } else { - task->thread.gsindex = 0; - task->thread.gs = addr; - if (doit) { - load_gs_index(0); - ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr); - } + task->thread.gsindex = 0; + task->thread.gs = addr; + if (doit) { + load_gs_index(0); + ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr); } - put_cpu(); break; case ARCH_SET_FS: /* Not strictly needed for fs, but do it for symmetry @@ -580,25 +548,12 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) if (addr >= TASK_SIZE_OF(task)) return -EPERM; cpu = get_cpu(); - /* handle small bases via the GDT because that's faster to - switch. */ - if (addr <= 0xffffffff) { - set_32bit_tls(task, FS_TLS, addr); - if (doit) { - load_TLS(&task->thread, cpu); - loadsegment(fs, FS_TLS_SEL); - } - task->thread.fsindex = FS_TLS_SEL; - task->thread.fs = 0; - } else { - task->thread.fsindex = 0; - task->thread.fs = addr; - if (doit) { - /* set the selector to 0 to not confuse - __switch_to */ - loadsegment(fs, 0); - ret = wrmsrl_safe(MSR_FS_BASE, addr); - } + task->thread.fsindex = 0; + task->thread.fs = addr; + if (doit) { + /* set the selector to 0 to not confuse __switch_to */ + loadsegment(fs, 0); + ret = wrmsrl_safe(MSR_FS_BASE, addr); } put_cpu(); break; @@ -606,8 +561,6 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) unsigned long base; if (doit) rdmsrl(MSR_FS_BASE, base); - else if (task->thread.fsindex == FS_TLS_SEL) - base = read_32bit_tls(task, FS_TLS); else base = task->thread.fs; ret = put_user(base, (unsigned long __user *)addr); @@ -617,8 +570,6 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) unsigned long base; if (doit) rdmsrl(MSR_KERNEL_GS_BASE, base); - else if (task->thread.gsindex == GS_TLS_SEL) - base = read_32bit_tls(task, GS_TLS); else base = task->thread.gs; ret = put_user(base, (unsigned long __user *)addr); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 0f4d2a5df2dc..e72ab40fa969 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -303,29 +303,11 @@ static int set_segment_reg(struct task_struct *task, switch (offset) { case offsetof(struct user_regs_struct,fs): - /* - * If this is setting fs as for normal 64-bit use but - * setting fs_base has implicitly changed it, leave it. - */ - if ((value == FS_TLS_SEL && task->thread.fsindex == 0 && - task->thread.fs != 0) || - (value == 0 && task->thread.fsindex == FS_TLS_SEL && - task->thread.fs == 0)) - break; task->thread.fsindex = value; if (task == current) loadsegment(fs, task->thread.fsindex); break; case offsetof(struct user_regs_struct,gs): - /* - * If this is setting gs as for normal 64-bit use but - * setting gs_base has implicitly changed it, leave it. - */ - if ((value == GS_TLS_SEL && task->thread.gsindex == 0 && - task->thread.gs != 0) || - (value == 0 && task->thread.gsindex == GS_TLS_SEL && - task->thread.gs == 0)) - break; task->thread.gsindex = value; if (task == current) load_gs_index(task->thread.gsindex); @@ -453,31 +435,17 @@ static unsigned long getreg(struct task_struct *task, unsigned long offset) #ifdef CONFIG_X86_64 case offsetof(struct user_regs_struct, fs_base): { /* - * do_arch_prctl may have used a GDT slot instead of - * the MSR. To userland, it appears the same either - * way, except the %fs segment selector might not be 0. + * XXX: This will not behave as expected if called on + * current or if fsindex != 0. */ - unsigned int seg = task->thread.fsindex; - if (task->thread.fs != 0) - return task->thread.fs; - if (task == current) - asm("movl %%fs,%0" : "=r" (seg)); - if (seg != FS_TLS_SEL) - return 0; - return get_desc_base(&task->thread.tls_array[FS_TLS]); + return task->thread.fs; } case offsetof(struct user_regs_struct, gs_base): { /* - * Exactly the same here as the %fs handling above. + * XXX: This will not behave as expected if called on + * current or if fsindex != 0. */ - unsigned int seg = task->thread.gsindex; - if (task->thread.gs != 0) - return task->thread.gs; - if (task == current) - asm("movl %%gs,%0" : "=r" (seg)); - if (seg != GS_TLS_SEL) - return 0; - return get_desc_base(&task->thread.tls_array[GS_TLS]); + return task->thread.gs; } #endif } -- cgit v1.2.3 From 296f781a4b7801ad9c1c0219f9e87b6c25e196fe Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 26 Apr 2016 12:23:29 -0700 Subject: x86/asm/64: Rename thread_struct's fs and gs to fsbase and gsbase Unlike ds and es, these are base addresses, not selectors. Rename them so their meaning is more obvious. On x86_32, the field is still called fs. Fixing that could make sense as a future cleanup. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/69a18a51c4cba0ce29a241e570fc618ad721d908.1461698311.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/elf.h | 6 +++--- arch/x86/include/asm/processor.h | 11 +++++++++-- arch/x86/kernel/process_64.c | 30 +++++++++++++++--------------- arch/x86/kernel/ptrace.c | 8 ++++---- arch/x86/kvm/svm.c | 2 +- 5 files changed, 32 insertions(+), 25 deletions(-) diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 15340e36ddcb..fea7724141a0 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -176,7 +176,7 @@ static inline void elf_common_init(struct thread_struct *t, regs->si = regs->di = regs->bp = 0; regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0; regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; - t->fs = t->gs = 0; + t->fsbase = t->gsbase = 0; t->fsindex = t->gsindex = 0; t->ds = t->es = ds; } @@ -226,8 +226,8 @@ do { \ (pr_reg)[18] = (regs)->flags; \ (pr_reg)[19] = (regs)->sp; \ (pr_reg)[20] = (regs)->ss; \ - (pr_reg)[21] = current->thread.fs; \ - (pr_reg)[22] = current->thread.gs; \ + (pr_reg)[21] = current->thread.fsbase; \ + (pr_reg)[22] = current->thread.gsbase; \ asm("movl %%ds,%0" : "=r" (v)); (pr_reg)[23] = v; \ asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v; \ asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v; \ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 9264476f3d57..9251aa962721 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -388,9 +388,16 @@ struct thread_struct { unsigned long ip; #endif #ifdef CONFIG_X86_64 - unsigned long fs; + unsigned long fsbase; + unsigned long gsbase; +#else + /* + * XXX: this could presumably be unsigned short. Alternatively, + * 32-bit kernels could be taught to use fsindex instead. + */ + unsigned long fs; + unsigned long gs; #endif - unsigned long gs; /* Save middle states of ptrace breakpoints */ struct perf_event *ptrace_bps[HBP_NUM]; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 864fe2cde3c6..4285f6adcc5e 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -150,9 +150,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, p->thread.io_bitmap_ptr = NULL; savesegment(gs, p->thread.gsindex); - p->thread.gs = p->thread.gsindex ? 0 : me->thread.gs; + p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase; savesegment(fs, p->thread.fsindex); - p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs; + p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase; savesegment(es, p->thread.es); savesegment(ds, p->thread.ds); memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); @@ -329,18 +329,18 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * stronger guarantees.) * * As an invariant, - * (fs != 0 && fsindex != 0) || (gs != 0 && gsindex != 0) is + * (fsbase != 0 && fsindex != 0) || (gsbase != 0 && gsindex != 0) is * impossible. */ if (next->fsindex) { /* Loading a nonzero value into FS sets the index and base. */ loadsegment(fs, next->fsindex); } else { - if (next->fs) { + if (next->fsbase) { /* Next index is zero but next base is nonzero. */ if (prev_fsindex) loadsegment(fs, 0); - wrmsrl(MSR_FS_BASE, next->fs); + wrmsrl(MSR_FS_BASE, next->fsbase); } else { /* Next base and index are both zero. */ if (static_cpu_has_bug(X86_BUG_NULL_SEG)) { @@ -356,7 +356,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * didn't change the base, then the base is * also zero and we don't need to do anything. */ - if (prev->fs || prev_fsindex) + if (prev->fsbase || prev_fsindex) loadsegment(fs, 0); } } @@ -369,18 +369,18 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * us. */ if (prev_fsindex) - prev->fs = 0; + prev->fsbase = 0; prev->fsindex = prev_fsindex; if (next->gsindex) { /* Loading a nonzero value into GS sets the index and base. */ load_gs_index(next->gsindex); } else { - if (next->gs) { + if (next->gsbase) { /* Next index is zero but next base is nonzero. */ if (prev_gsindex) load_gs_index(0); - wrmsrl(MSR_KERNEL_GS_BASE, next->gs); + wrmsrl(MSR_KERNEL_GS_BASE, next->gsbase); } else { /* Next base and index are both zero. */ if (static_cpu_has_bug(X86_BUG_NULL_SEG)) { @@ -400,7 +400,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * didn't change the base, then the base is * also zero and we don't need to do anything. */ - if (prev->gs || prev_gsindex) + if (prev->gsbase || prev_gsindex) load_gs_index(0); } } @@ -413,7 +413,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * us. */ if (prev_gsindex) - prev->gs = 0; + prev->gsbase = 0; prev->gsindex = prev_gsindex; switch_fpu_finish(next_fpu, fpu_switch); @@ -536,7 +536,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) return -EPERM; cpu = get_cpu(); task->thread.gsindex = 0; - task->thread.gs = addr; + task->thread.gsbase = addr; if (doit) { load_gs_index(0); ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr); @@ -549,7 +549,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) return -EPERM; cpu = get_cpu(); task->thread.fsindex = 0; - task->thread.fs = addr; + task->thread.fsbase = addr; if (doit) { /* set the selector to 0 to not confuse __switch_to */ loadsegment(fs, 0); @@ -562,7 +562,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) if (doit) rdmsrl(MSR_FS_BASE, base); else - base = task->thread.fs; + base = task->thread.fsbase; ret = put_user(base, (unsigned long __user *)addr); break; } @@ -571,7 +571,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) if (doit) rdmsrl(MSR_KERNEL_GS_BASE, base); else - base = task->thread.gs; + base = task->thread.gsbase; ret = put_user(base, (unsigned long __user *)addr); break; } diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index e72ab40fa969..e60ef918f53d 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -399,7 +399,7 @@ static int putreg(struct task_struct *child, * to set either thread.fs or thread.fsindex and the * corresponding GDT slot. */ - if (child->thread.fs != value) + if (child->thread.fsbase != value) return do_arch_prctl(child, ARCH_SET_FS, value); return 0; case offsetof(struct user_regs_struct,gs_base): @@ -408,7 +408,7 @@ static int putreg(struct task_struct *child, */ if (value >= TASK_SIZE_OF(child)) return -EIO; - if (child->thread.gs != value) + if (child->thread.gsbase != value) return do_arch_prctl(child, ARCH_SET_GS, value); return 0; #endif @@ -438,14 +438,14 @@ static unsigned long getreg(struct task_struct *task, unsigned long offset) * XXX: This will not behave as expected if called on * current or if fsindex != 0. */ - return task->thread.fs; + return task->thread.fsbase; } case offsetof(struct user_regs_struct, gs_base): { /* * XXX: This will not behave as expected if called on * current or if fsindex != 0. */ - return task->thread.gs; + return task->thread.gsbase; } #endif } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 31346a3f20a5..fafd720ce10a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1254,7 +1254,7 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) kvm_load_ldt(svm->host.ldt); #ifdef CONFIG_X86_64 loadsegment(fs, svm->host.fs); - wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); + wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase); load_gs_index(svm->host.gs); #else #ifdef CONFIG_X86_32_LAZY_GS -- cgit v1.2.3 From c9867f863e19dd07c6085b457f6775047924c3b5 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 26 Apr 2016 12:23:30 -0700 Subject: x86/tls: Synchronize segment registers in set_thread_area() The current behavior of set_thread_area() when it modifies a segment that is currently loaded is a bit confused. If CS [1] or SS is modified, the change will take effect on return to userspace because CS and SS are fundamentally always reloaded on return to userspace. Similarly, on 32-bit kernels, if DS, ES, FS, or (depending on configuration) GS refers to a modified segment, the change will take effect immediately on return to user mode because the entry code reloads these registers. If set_thread_area() modifies DS, ES [2], FS, or GS on 64-bit kernels or GS on 32-bit lazy-GS [3] kernels, however, the segment registers will be left alone until something (most likely a context switch) causes them to be reloaded. This means that behavior visible to user space is inconsistent. If set_thread_area() is implicitly called via CLONE_SETTLS, then all segment registers will be reloaded before the thread starts because CLONE_SETTLS happens before the initial context switch into the newly created thread. Empirically, glibc requires the immediate reload on CLONE_SETTLS -- 32-bit glibc on my system does *not* manually reload GS when creating a new thread. Before enabling FSGSBASE, we need to figure out what the behavior will be, as FSGSBASE requires that we reconsider our behavior when, e.g., GS and GSBASE are out of sync in user mode. Given that we must preserve the existing behavior of CLONE_SETTLS, it makes sense to me that we simply extend similar behavior to all invocations of set_thread_area(). This patch explicitly updates any segment register referring to a segment that is targetted by set_thread_area(). If set_thread_area() deletes the segment, then the segment register will be nulled out. [1] This can't actually happen since 0e58af4e1d21 ("x86/tls: Disallow unusual TLS segments") but, if it did, this is how it would behave. [2] I strongly doubt that any existing non-malicious program loads a TLS segment into DS or ES on a 64-bit kernel because the context switch code was badly broken until recently, but that's not an excuse to leave the current code alone. [3] One way or another, that config option should to go away. Yuck! Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/27d119b0d396e9b82009e40dff8333a249038225.1461698311.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/tls.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index 7fc5e843f247..9692a5e9fdab 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -114,6 +114,7 @@ int do_set_thread_area(struct task_struct *p, int idx, int can_allocate) { struct user_desc info; + unsigned short __maybe_unused sel, modified_sel; if (copy_from_user(&info, u_info, sizeof(info))) return -EFAULT; @@ -141,6 +142,47 @@ int do_set_thread_area(struct task_struct *p, int idx, set_tls_desc(p, idx, &info, 1); + /* + * If DS, ES, FS, or GS points to the modified segment, forcibly + * refresh it. Only needed on x86_64 because x86_32 reloads them + * on return to user mode. + */ + modified_sel = (idx << 3) | 3; + + if (p == current) { +#ifdef CONFIG_X86_64 + savesegment(ds, sel); + if (sel == modified_sel) + loadsegment(ds, sel); + + savesegment(es, sel); + if (sel == modified_sel) + loadsegment(es, sel); + + savesegment(fs, sel); + if (sel == modified_sel) + loadsegment(fs, sel); + + savesegment(gs, sel); + if (sel == modified_sel) + load_gs_index(sel); +#endif + +#ifdef CONFIG_X86_32_LAZY_GS + savesegment(gs, sel); + if (sel == modified_sel) + loadsegment(gs, sel); +#endif + } else { +#ifdef CONFIG_X86_64 + if (p->thread.fsindex == modified_sel) + p->thread.fsbase = info.base_addr; + + if (p->thread.gsindex == modified_sel) + p->thread.gsbase = info.base_addr; +#endif + } + return 0; } -- cgit v1.2.3 From d63f4b5269e93759d3036932890c96d7a8639e5b Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 26 Apr 2016 12:23:31 -0700 Subject: selftests/x86/ldt_gdt: Test set_thread_area() deletion of an active segment Now that set_thread_area() is supposed to give deterministic behavior when it modifies in-use segments, test it. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/f2bc11af1ee1a0f815ed910840cbdba06b640a20.1461698311.git.luto@kernel.org Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/ldt_gdt.c | 250 ++++++++++++++++++++++++++++++++++ 1 file changed, 250 insertions(+) diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c index 31a3035cd4eb..4af47079cf04 100644 --- a/tools/testing/selftests/x86/ldt_gdt.c +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -21,6 +21,9 @@ #include #include #include +#include +#include +#include #define AR_ACCESSED (1<<8) @@ -44,6 +47,12 @@ static int nerrs; +/* Points to an array of 1024 ints, each holding its own index. */ +static const unsigned int *counter_page; +static struct user_desc *low_user_desc; +static struct user_desc *low_user_desc_clear; /* Use to delete GDT entry */ +static int gdt_entry_num; + static void check_invalid_segment(uint16_t index, int ldt) { uint32_t has_limit = 0, has_ar = 0, limit, ar; @@ -561,16 +570,257 @@ static void do_exec_test(void) } } +static void setup_counter_page(void) +{ + unsigned int *page = mmap(NULL, 4096, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_32BIT, -1, 0); + if (page == MAP_FAILED) + err(1, "mmap"); + + for (int i = 0; i < 1024; i++) + page[i] = i; + counter_page = page; +} + +static int invoke_set_thread_area(void) +{ + int ret; + asm volatile ("int $0x80" + : "=a" (ret), "+m" (low_user_desc) : + "a" (243), "b" (low_user_desc) + : "flags"); + return ret; +} + +static void setup_low_user_desc(void) +{ + low_user_desc = mmap(NULL, 2 * sizeof(struct user_desc), + PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_32BIT, -1, 0); + if (low_user_desc == MAP_FAILED) + err(1, "mmap"); + + low_user_desc->entry_number = -1; + low_user_desc->base_addr = (unsigned long)&counter_page[1]; + low_user_desc->limit = 0xfffff; + low_user_desc->seg_32bit = 1; + low_user_desc->contents = 0; /* Data, grow-up*/ + low_user_desc->read_exec_only = 0; + low_user_desc->limit_in_pages = 1; + low_user_desc->seg_not_present = 0; + low_user_desc->useable = 0; + + if (invoke_set_thread_area() == 0) { + gdt_entry_num = low_user_desc->entry_number; + printf("[NOTE]\tset_thread_area is available; will use GDT index %d\n", gdt_entry_num); + } else { + printf("[NOTE]\tset_thread_area is unavailable\n"); + } + + low_user_desc_clear = low_user_desc + 1; + low_user_desc_clear->entry_number = gdt_entry_num; + low_user_desc_clear->read_exec_only = 1; + low_user_desc_clear->seg_not_present = 1; +} + +static void test_gdt_invalidation(void) +{ + if (!gdt_entry_num) + return; /* 64-bit only system -- we can't use set_thread_area */ + + unsigned short prev_sel; + unsigned short sel; + unsigned int eax; + const char *result; +#ifdef __x86_64__ + unsigned long saved_base; + unsigned long new_base; +#endif + + /* Test DS */ + invoke_set_thread_area(); + eax = 243; + sel = (gdt_entry_num << 3) | 3; + asm volatile ("movw %%ds, %[prev_sel]\n\t" + "movw %[sel], %%ds\n\t" +#ifdef __i386__ + "pushl %%ebx\n\t" +#endif + "movl %[arg1], %%ebx\n\t" + "int $0x80\n\t" /* Should invalidate ds */ +#ifdef __i386__ + "popl %%ebx\n\t" +#endif + "movw %%ds, %[sel]\n\t" + "movw %[prev_sel], %%ds" + : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel), + "+a" (eax) + : "m" (low_user_desc_clear), + [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) + : "flags"); + + if (sel != 0) { + result = "FAIL"; + nerrs++; + } else { + result = "OK"; + } + printf("[%s]\tInvalidate DS with set_thread_area: new DS = 0x%hx\n", + result, sel); + + /* Test ES */ + invoke_set_thread_area(); + eax = 243; + sel = (gdt_entry_num << 3) | 3; + asm volatile ("movw %%es, %[prev_sel]\n\t" + "movw %[sel], %%es\n\t" +#ifdef __i386__ + "pushl %%ebx\n\t" +#endif + "movl %[arg1], %%ebx\n\t" + "int $0x80\n\t" /* Should invalidate es */ +#ifdef __i386__ + "popl %%ebx\n\t" +#endif + "movw %%es, %[sel]\n\t" + "movw %[prev_sel], %%es" + : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel), + "+a" (eax) + : "m" (low_user_desc_clear), + [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) + : "flags"); + + if (sel != 0) { + result = "FAIL"; + nerrs++; + } else { + result = "OK"; + } + printf("[%s]\tInvalidate ES with set_thread_area: new ES = 0x%hx\n", + result, sel); + + /* Test FS */ + invoke_set_thread_area(); + eax = 243; + sel = (gdt_entry_num << 3) | 3; +#ifdef __x86_64__ + syscall(SYS_arch_prctl, ARCH_GET_FS, &saved_base); +#endif + asm volatile ("movw %%fs, %[prev_sel]\n\t" + "movw %[sel], %%fs\n\t" +#ifdef __i386__ + "pushl %%ebx\n\t" +#endif + "movl %[arg1], %%ebx\n\t" + "int $0x80\n\t" /* Should invalidate fs */ +#ifdef __i386__ + "popl %%ebx\n\t" +#endif + "movw %%fs, %[sel]\n\t" + : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel), + "+a" (eax) + : "m" (low_user_desc_clear), + [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) + : "flags"); + +#ifdef __x86_64__ + syscall(SYS_arch_prctl, ARCH_GET_FS, &new_base); +#endif + + /* Restore FS/BASE for glibc */ + asm volatile ("movw %[prev_sel], %%fs" : : [prev_sel] "rm" (prev_sel)); +#ifdef __x86_64__ + if (saved_base) + syscall(SYS_arch_prctl, ARCH_SET_FS, saved_base); +#endif + + if (sel != 0) { + result = "FAIL"; + nerrs++; + } else { + result = "OK"; + } + printf("[%s]\tInvalidate FS with set_thread_area: new FS = 0x%hx\n", + result, sel); + +#ifdef __x86_64__ + if (sel == 0 && new_base != 0) { + nerrs++; + printf("[FAIL]\tNew FSBASE was 0x%lx\n", new_base); + } else { + printf("[OK]\tNew FSBASE was zero\n"); + } +#endif + + /* Test GS */ + invoke_set_thread_area(); + eax = 243; + sel = (gdt_entry_num << 3) | 3; +#ifdef __x86_64__ + syscall(SYS_arch_prctl, ARCH_GET_GS, &saved_base); +#endif + asm volatile ("movw %%gs, %[prev_sel]\n\t" + "movw %[sel], %%gs\n\t" +#ifdef __i386__ + "pushl %%ebx\n\t" +#endif + "movl %[arg1], %%ebx\n\t" + "int $0x80\n\t" /* Should invalidate gs */ +#ifdef __i386__ + "popl %%ebx\n\t" +#endif + "movw %%gs, %[sel]\n\t" + : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel), + "+a" (eax) + : "m" (low_user_desc_clear), + [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) + : "flags"); + +#ifdef __x86_64__ + syscall(SYS_arch_prctl, ARCH_GET_GS, &new_base); +#endif + + /* Restore GS/BASE for glibc */ + asm volatile ("movw %[prev_sel], %%gs" : : [prev_sel] "rm" (prev_sel)); +#ifdef __x86_64__ + if (saved_base) + syscall(SYS_arch_prctl, ARCH_SET_GS, saved_base); +#endif + + if (sel != 0) { + result = "FAIL"; + nerrs++; + } else { + result = "OK"; + } + printf("[%s]\tInvalidate GS with set_thread_area: new GS = 0x%hx\n", + result, sel); + +#ifdef __x86_64__ + if (sel == 0 && new_base != 0) { + nerrs++; + printf("[FAIL]\tNew GSBASE was 0x%lx\n", new_base); + } else { + printf("[OK]\tNew GSBASE was zero\n"); + } +#endif +} + int main(int argc, char **argv) { if (argc == 1 && !strcmp(argv[0], "ldt_gdt_test_exec")) return finish_exec_test(); + setup_counter_page(); + setup_low_user_desc(); + do_simple_tests(); do_multicpu_tests(); do_exec_test(); + test_gdt_invalidation(); + return nerrs ? 1 : 0; } -- cgit v1.2.3 From 778843f934e362ed4ed734520f60a44a78a074b4 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 2 May 2016 16:56:50 +0200 Subject: x86/asm/entry/32: Simplify pushes of zeroed pt_regs->REGs Use of a temporary R8 register here seems to be unnecessary. "push %r8" is a two-byte insn (it needs REX prefix to specify R8), "push $0" is two-byte too. It seems just using the latter would be no worse. Thus, code had an unnecessary "xorq %r8,%r8" insn. It probably costs nothing in execution time here since we are probably limited by store bandwidth at this point, but still. Run-tested under QEMU: 32-bit calls still work: / # ./test_syscall_vdso32 [RUN] Executing 6-argument 32-bit syscall via VDSO [OK] Arguments are preserved across syscall [NOTE] R11 has changed:0000000000200ed7 - assuming clobbered by SYSRET insn [OK] R8..R15 did not leak kernel data [RUN] Executing 6-argument 32-bit syscall via INT 80 [OK] Arguments are preserved across syscall [OK] R8..R15 did not leak kernel data [RUN] Running tests under ptrace [RUN] Executing 6-argument 32-bit syscall via VDSO [OK] Arguments are preserved across syscall [NOTE] R11 has changed:0000000000200ed7 - assuming clobbered by SYSRET insn [OK] R8..R15 did not leak kernel data [RUN] Executing 6-argument 32-bit syscall via INT 80 [OK] Arguments are preserved across syscall [OK] R8..R15 did not leak kernel data Signed-off-by: Denys Vlasenko Acked-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Drewry Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1462201010-16846-1-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64_compat.S | 45 +++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 847f2f0c31e5..e1721dafbcb1 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -72,24 +72,23 @@ ENTRY(entry_SYSENTER_compat) pushfq /* pt_regs->flags (except IF = 0) */ orl $X86_EFLAGS_IF, (%rsp) /* Fix saved flags */ pushq $__USER32_CS /* pt_regs->cs */ - xorq %r8,%r8 - pushq %r8 /* pt_regs->ip = 0 (placeholder) */ + pushq $0 /* pt_regs->ip = 0 (placeholder) */ pushq %rax /* pt_regs->orig_ax */ pushq %rdi /* pt_regs->di */ pushq %rsi /* pt_regs->si */ pushq %rdx /* pt_regs->dx */ pushq %rcx /* pt_regs->cx */ pushq $-ENOSYS /* pt_regs->ax */ - pushq %r8 /* pt_regs->r8 = 0 */ - pushq %r8 /* pt_regs->r9 = 0 */ - pushq %r8 /* pt_regs->r10 = 0 */ - pushq %r8 /* pt_regs->r11 = 0 */ + pushq $0 /* pt_regs->r8 = 0 */ + pushq $0 /* pt_regs->r9 = 0 */ + pushq $0 /* pt_regs->r10 = 0 */ + pushq $0 /* pt_regs->r11 = 0 */ pushq %rbx /* pt_regs->rbx */ pushq %rbp /* pt_regs->rbp (will be overwritten) */ - pushq %r8 /* pt_regs->r12 = 0 */ - pushq %r8 /* pt_regs->r13 = 0 */ - pushq %r8 /* pt_regs->r14 = 0 */ - pushq %r8 /* pt_regs->r15 = 0 */ + pushq $0 /* pt_regs->r12 = 0 */ + pushq $0 /* pt_regs->r13 = 0 */ + pushq $0 /* pt_regs->r14 = 0 */ + pushq $0 /* pt_regs->r15 = 0 */ cld /* @@ -205,17 +204,16 @@ ENTRY(entry_SYSCALL_compat) pushq %rdx /* pt_regs->dx */ pushq %rbp /* pt_regs->cx (stashed in bp) */ pushq $-ENOSYS /* pt_regs->ax */ - xorq %r8,%r8 - pushq %r8 /* pt_regs->r8 = 0 */ - pushq %r8 /* pt_regs->r9 = 0 */ - pushq %r8 /* pt_regs->r10 = 0 */ - pushq %r8 /* pt_regs->r11 = 0 */ + pushq $0 /* pt_regs->r8 = 0 */ + pushq $0 /* pt_regs->r9 = 0 */ + pushq $0 /* pt_regs->r10 = 0 */ + pushq $0 /* pt_regs->r11 = 0 */ pushq %rbx /* pt_regs->rbx */ pushq %rbp /* pt_regs->rbp (will be overwritten) */ - pushq %r8 /* pt_regs->r12 = 0 */ - pushq %r8 /* pt_regs->r13 = 0 */ - pushq %r8 /* pt_regs->r14 = 0 */ - pushq %r8 /* pt_regs->r15 = 0 */ + pushq $0 /* pt_regs->r12 = 0 */ + pushq $0 /* pt_regs->r13 = 0 */ + pushq $0 /* pt_regs->r14 = 0 */ + pushq $0 /* pt_regs->r15 = 0 */ /* * User mode is traced as though IRQs are on, and SYSENTER @@ -316,11 +314,10 @@ ENTRY(entry_INT80_compat) pushq %rdx /* pt_regs->dx */ pushq %rcx /* pt_regs->cx */ pushq $-ENOSYS /* pt_regs->ax */ - xorq %r8,%r8 - pushq %r8 /* pt_regs->r8 = 0 */ - pushq %r8 /* pt_regs->r9 = 0 */ - pushq %r8 /* pt_regs->r10 = 0 */ - pushq %r8 /* pt_regs->r11 = 0 */ + pushq $0 /* pt_regs->r8 = 0 */ + pushq $0 /* pt_regs->r9 = 0 */ + pushq $0 /* pt_regs->r10 = 0 */ + pushq $0 /* pt_regs->r11 = 0 */ pushq %rbx /* pt_regs->rbx */ pushq %rbp /* pt_regs->rbp */ pushq %r12 /* pt_regs->r12 */ -- cgit v1.2.3 From 092c74e420952c7cb68141731f2b562245b51eeb Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 4 May 2016 22:44:36 -0400 Subject: x86/entry, sched/x86: Don't save/restore EFLAGS on task switch Now that NT is filtered by the SYSENTER entry code, it is safe to skip saving and restoring flags on task switch. Also remove a leftover reset of flags on 64-bit fork. Signed-off-by: Brian Gerst Acked-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1462416278-11974-2-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_32.S | 4 ---- arch/x86/entry/entry_64.S | 3 --- arch/x86/include/asm/switch_to.h | 4 +--- 3 files changed, 1 insertion(+), 10 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 10868aa734dc..c84d99bfff94 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -209,8 +209,6 @@ ENTRY(ret_from_fork) call schedule_tail GET_THREAD_INFO(%ebp) popl %eax - pushl $0x0202 # Reset kernel eflags - popfl /* When we fork, we trace the syscall return in the child, too. */ movl %esp, %eax @@ -223,8 +221,6 @@ ENTRY(ret_from_kernel_thread) call schedule_tail GET_THREAD_INFO(%ebp) popl %eax - pushl $0x0202 # Reset kernel eflags - popfl movl PT_EBP(%esp), %eax call *PT_EBX(%esp) movl $0, PT_EAX(%esp) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 6344629ae1ce..9ee0da1807ed 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -372,9 +372,6 @@ END(ptregs_\func) ENTRY(ret_from_fork) LOCK ; btr $TIF_FORK, TI_flags(%r8) - pushq $0x0002 - popfq /* reset kernel eflags */ - call schedule_tail /* rdi: 'prev' task parameter */ testb $3, CS(%rsp) /* from kernel_thread? */ diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 751bf4b7bf11..8f321a1b03a1 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -39,8 +39,7 @@ do { \ */ \ unsigned long ebx, ecx, edx, esi, edi; \ \ - asm volatile("pushfl\n\t" /* save flags */ \ - "pushl %%ebp\n\t" /* save EBP */ \ + asm volatile("pushl %%ebp\n\t" /* save EBP */ \ "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \ "movl %[next_sp],%%esp\n\t" /* restore ESP */ \ "movl $1f,%[prev_ip]\n\t" /* save EIP */ \ @@ -49,7 +48,6 @@ do { \ "jmp __switch_to\n" /* regparm call */ \ "1:\t" \ "popl %%ebp\n\t" /* restore EBP */ \ - "popfl\n" /* restore flags */ \ \ /* output parameters */ \ : [prev_sp] "=m" (prev->thread.sp), \ -- cgit v1.2.3 From 1e17880371f85d3d866962e04ba3567c0654a125 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 4 May 2016 22:44:37 -0400 Subject: x86/entry/32: Remove GET_THREAD_INFO() from entry code The entry code used to cache the thread_info pointer in the EBP register, but all the code that used it has been moved to C. Remove the unused code to get the pointer. Signed-off-by: Brian Gerst Acked-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1462416278-11974-3-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_32.S | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index c84d99bfff94..983e5d3a0d27 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -207,7 +207,6 @@ ENTRY(ret_from_fork) pushl %eax call schedule_tail - GET_THREAD_INFO(%ebp) popl %eax /* When we fork, we trace the syscall return in the child, too. */ @@ -219,7 +218,6 @@ END(ret_from_fork) ENTRY(ret_from_kernel_thread) pushl %eax call schedule_tail - GET_THREAD_INFO(%ebp) popl %eax movl PT_EBP(%esp), %eax call *PT_EBX(%esp) @@ -247,7 +245,6 @@ ENDPROC(ret_from_kernel_thread) ret_from_exception: preempt_stop(CLBR_ANY) ret_from_intr: - GET_THREAD_INFO(%ebp) #ifdef CONFIG_VM86 movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS movb PT_CS(%esp), %al -- cgit v1.2.3 From 0676b4e0a1940a6b7ae3156bd212ca9032a29c30 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 4 May 2016 22:44:38 -0400 Subject: x86/entry/32: Remove asmlinkage_protect() Now that syscalls are called from C code, which copies the args to new stack slots instead of overlaying pt_regs, asmlinkage_protect() is no longer needed. Signed-off-by: Brian Gerst Acked-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1462416278-11974-4-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/linkage.h | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 79327e9483a3..0ccb26dda126 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -8,40 +8,6 @@ #ifdef CONFIG_X86_32 #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0))) - -/* - * Make sure the compiler doesn't do anything stupid with the - * arguments on the stack - they are owned by the *caller*, not - * the callee. This just fools gcc into not spilling into them, - * and keeps it from doing tailcall recursion and/or using the - * stack slots for temporaries, since they are live and "used" - * all the way to the end of the function. - * - * NOTE! On x86-64, all the arguments are in registers, so this - * only matters on a 32-bit kernel. - */ -#define asmlinkage_protect(n, ret, args...) \ - __asmlinkage_protect##n(ret, ##args) -#define __asmlinkage_protect_n(ret, args...) \ - __asm__ __volatile__ ("" : "=r" (ret) : "0" (ret), ##args) -#define __asmlinkage_protect0(ret) \ - __asmlinkage_protect_n(ret) -#define __asmlinkage_protect1(ret, arg1) \ - __asmlinkage_protect_n(ret, "m" (arg1)) -#define __asmlinkage_protect2(ret, arg1, arg2) \ - __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2)) -#define __asmlinkage_protect3(ret, arg1, arg2, arg3) \ - __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3)) -#define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \ - __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ - "m" (arg4)) -#define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \ - __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ - "m" (arg4), "m" (arg5)) -#define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \ - __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ - "m" (arg4), "m" (arg5), "m" (arg6)) - #endif /* CONFIG_X86_32 */ #ifdef __ASSEMBLY__ -- cgit v1.2.3 From 4afd0565552c87f23834db9121dd9cf6955d0b43 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Tue, 10 May 2016 22:56:43 +0200 Subject: x86/arch_prctl/64: Restore accidentally removed put_cpu() in ARCH_SET_GS This fixes an oversight in: 731e33e39a5b95 ("Remove FSBASE/GSBASE < 4G optimization") Signed-off-by: Mateusz Guzik Cc: Alexander Shishkin Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1462913803-29634-1-git-send-email-mguzik@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_64.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 4285f6adcc5e..6b16c36f0939 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -541,6 +541,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) load_gs_index(0); ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr); } + put_cpu(); break; case ARCH_SET_FS: /* Not strictly needed for fs, but do it for symmetry -- cgit v1.2.3