summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig12
-rw-r--r--arch/x86/Makefile7
-rw-r--r--arch/x86/entry/calling.h34
-rw-r--r--arch/x86/entry/entry_32.S3
-rw-r--r--arch/x86/entry/entry_64.S155
-rw-r--r--arch/x86/entry/entry_64_compat.S83
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl38
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c16
-rw-r--r--arch/x86/events/intel/uncore_snbep.c2
-rw-r--r--arch/x86/ia32/sys_ia32.c74
-rw-r--r--arch/x86/include/asm/apm.h6
-rw-r--r--arch/x86/include/asm/asm-prototypes.h3
-rw-r--r--arch/x86/include/asm/bitops.h29
-rw-r--r--arch/x86/include/asm/cpufeatures.h3
-rw-r--r--arch/x86/include/asm/efi.h17
-rw-r--r--arch/x86/include/asm/kvm_host.h6
-rw-r--r--arch/x86/include/asm/microcode.h10
-rw-r--r--arch/x86/include/asm/mmu_context.h1
-rw-r--r--arch/x86/include/asm/nospec-branch.h143
-rw-r--r--arch/x86/include/asm/paravirt.h17
-rw-r--r--arch/x86/include/asm/paravirt_types.h5
-rw-r--r--arch/x86/include/asm/percpu.h2
-rw-r--r--arch/x86/include/asm/pgtable.h8
-rw-r--r--arch/x86/include/asm/pgtable_32.h1
-rw-r--r--arch/x86/include/asm/pgtable_64.h1
-rw-r--r--arch/x86/include/asm/pgtable_types.h12
-rw-r--r--arch/x86/include/asm/processor.h1
-rw-r--r--arch/x86/include/asm/refcount.h6
-rw-r--r--arch/x86/include/asm/rmwcc.h16
-rw-r--r--arch/x86/include/asm/sections.h1
-rw-r--r--arch/x86/include/asm/sys_ia32.h48
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h1
-rw-r--r--arch/x86/include/uapi/asm/mce.h1
-rw-r--r--arch/x86/kernel/apic/io_apic.c2
-rw-r--r--arch/x86/kernel/cpu/bugs.c12
-rw-r--r--arch/x86/kernel/cpu/common.c30
-rw-r--r--arch/x86/kernel/cpu/intel.c10
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c26
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c44
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c181
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c62
-rw-r--r--arch/x86/kernel/head_64.S2
-rw-r--r--arch/x86/kernel/ioport.c2
-rw-r--r--arch/x86/kernel/kprobes/core.c10
-rw-r--r--arch/x86/kernel/kvm.c20
-rw-r--r--arch/x86/kernel/setup.c17
-rw-r--r--arch/x86/kernel/setup_percpu.c17
-rw-r--r--arch/x86/kernel/signal_compat.c65
-rw-r--r--arch/x86/kernel/unwind_orc.c3
-rw-r--r--arch/x86/kernel/vm86_32.c3
-rw-r--r--arch/x86/kernel/vmlinux.lds.S2
-rw-r--r--arch/x86/kvm/cpuid.c3
-rw-r--r--arch/x86/kvm/lapic.c11
-rw-r--r--arch/x86/kvm/mmu.c6
-rw-r--r--arch/x86/kvm/svm.c90
-rw-r--r--arch/x86/kvm/vmx.c26
-rw-r--r--arch/x86/kvm/x86.c107
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/retpoline.S56
-rw-r--r--arch/x86/mm/cpu_entry_area.c6
-rw-r--r--arch/x86/mm/fault.c10
-rw-r--r--arch/x86/mm/init_32.c15
-rw-r--r--arch/x86/mm/mem_encrypt_boot.S2
-rw-r--r--arch/x86/mm/pti.c2
-rw-r--r--arch/x86/platform/intel-mid/intel-mid.c2
-rw-r--r--arch/x86/realmode/rm/trampoline_64.S2
-rw-r--r--arch/x86/xen/enlighten_pv.c6
-rw-r--r--arch/x86/xen/suspend.c16
68 files changed, 1113 insertions, 518 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c1236b187824..0fa71a78ec99 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -430,6 +430,7 @@ config GOLDFISH
config RETPOLINE
bool "Avoid speculative indirect branches in kernel"
default y
+ select STACK_VALIDATION if HAVE_STACK_VALIDATION
help
Compile kernel with the retpoline compiler options to guard against
kernel-to-user data leaks by avoiding speculative indirect
@@ -2306,7 +2307,7 @@ choice
it can be used to assist security vulnerability exploitation.
This setting can be changed at boot time via the kernel command
- line parameter vsyscall=[native|emulate|none].
+ line parameter vsyscall=[emulate|none].
On a system with recent enough glibc (2.14 or newer) and no
static binaries, you can say None without a performance penalty
@@ -2314,15 +2315,6 @@ choice
If unsure, select "Emulate".
- config LEGACY_VSYSCALL_NATIVE
- bool "Native"
- help
- Actual executable code is located in the fixed vsyscall
- address mapping, implementing time() efficiently. Since
- this makes the mapping executable, it can be used during
- security vulnerability exploitation (traditionally as
- ROP gadgets). This configuration is not recommended.
-
config LEGACY_VSYSCALL_EMULATE
bool "Emulate"
help
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index fad55160dcb9..498c1b812300 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -232,10 +232,9 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
# Avoid indirect branches in kernel to deal with Spectre
ifdef CONFIG_RETPOLINE
- RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
- ifneq ($(RETPOLINE_CFLAGS),)
- KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
- endif
+ifneq ($(RETPOLINE_CFLAGS),)
+ KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
+endif
endif
archscripts: scripts_basic
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index dce7092ab24a..be63330c5511 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -97,7 +97,7 @@ For 32-bit we have the following conventions - kernel is built with
#define SIZEOF_PTREGS 21*8
-.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax
+.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
/*
* Push registers and sanitize registers of values that a
* speculation attack might otherwise want to exploit. The
@@ -105,32 +105,41 @@ For 32-bit we have the following conventions - kernel is built with
* could be put to use in a speculative execution gadget.
* Interleave XOR with PUSH for better uop scheduling:
*/
+ .if \save_ret
+ pushq %rsi /* pt_regs->si */
+ movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */
+ movq %rdi, 8(%rsp) /* pt_regs->di (overwriting original return address) */
+ .else
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
+ .endif
pushq \rdx /* pt_regs->dx */
pushq %rcx /* pt_regs->cx */
pushq \rax /* pt_regs->ax */
pushq %r8 /* pt_regs->r8 */
- xorq %r8, %r8 /* nospec r8 */
+ xorl %r8d, %r8d /* nospec r8 */
pushq %r9 /* pt_regs->r9 */
- xorq %r9, %r9 /* nospec r9 */
+ xorl %r9d, %r9d /* nospec r9 */
pushq %r10 /* pt_regs->r10 */
- xorq %r10, %r10 /* nospec r10 */
+ xorl %r10d, %r10d /* nospec r10 */
pushq %r11 /* pt_regs->r11 */
- xorq %r11, %r11 /* nospec r11*/
+ xorl %r11d, %r11d /* nospec r11*/
pushq %rbx /* pt_regs->rbx */
xorl %ebx, %ebx /* nospec rbx*/
pushq %rbp /* pt_regs->rbp */
xorl %ebp, %ebp /* nospec rbp*/
pushq %r12 /* pt_regs->r12 */
- xorq %r12, %r12 /* nospec r12*/
+ xorl %r12d, %r12d /* nospec r12*/
pushq %r13 /* pt_regs->r13 */
- xorq %r13, %r13 /* nospec r13*/
+ xorl %r13d, %r13d /* nospec r13*/
pushq %r14 /* pt_regs->r14 */
- xorq %r14, %r14 /* nospec r14*/
+ xorl %r14d, %r14d /* nospec r14*/
pushq %r15 /* pt_regs->r15 */
- xorq %r15, %r15 /* nospec r15*/
+ xorl %r15d, %r15d /* nospec r15*/
UNWIND_HINT_REGS
+ .if \save_ret
+ pushq %rsi /* return address on top of stack */
+ .endif
.endm
.macro POP_REGS pop_rdi=1 skip_r11rcx=0
@@ -172,12 +181,7 @@ For 32-bit we have the following conventions - kernel is built with
*/
.macro ENCODE_FRAME_POINTER ptregs_offset=0
#ifdef CONFIG_FRAME_POINTER
- .if \ptregs_offset
- leaq \ptregs_offset(%rsp), %rbp
- .else
- mov %rsp, %rbp
- .endif
- orq $0x1, %rbp
+ leaq 1+\ptregs_offset(%rsp), %rbp
#endif
.endm
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 16c2c022540d..6ad064c8cf35 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -252,8 +252,7 @@ ENTRY(__switch_to_asm)
* exist, overwrite the RSB with entries which capture
* speculative execution to prevent attack.
*/
- /* Clobbers %ebx */
- FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+ FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
#endif
/* restore callee-saved registers */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 8971bd64d515..805f52703ee3 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -55,7 +55,7 @@ END(native_usergs_sysret64)
.macro TRACE_IRQS_FLAGS flags:req
#ifdef CONFIG_TRACE_IRQFLAGS
- bt $9, \flags /* interrupts off? */
+ btl $9, \flags /* interrupts off? */
jnc 1f
TRACE_IRQS_ON
1:
@@ -364,8 +364,7 @@ ENTRY(__switch_to_asm)
* exist, overwrite the RSB with entries which capture
* speculative execution to prevent attack.
*/
- /* Clobbers %rbx */
- FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+ FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
#endif
/* restore callee-saved registers */
@@ -449,9 +448,19 @@ END(irq_entries_start)
*
* The invariant is that, if irq_count != -1, then the IRQ stack is in use.
*/
-.macro ENTER_IRQ_STACK regs=1 old_rsp
+.macro ENTER_IRQ_STACK regs=1 old_rsp save_ret=0
DEBUG_ENTRY_ASSERT_IRQS_OFF
+
+ .if \save_ret
+ /*
+ * If save_ret is set, the original stack contains one additional
+ * entry -- the return address. Therefore, move the address one
+ * entry below %rsp to \old_rsp.
+ */
+ leaq 8(%rsp), \old_rsp
+ .else
movq %rsp, \old_rsp
+ .endif
.if \regs
UNWIND_HINT_REGS base=\old_rsp
@@ -497,6 +506,15 @@ END(irq_entries_start)
.if \regs
UNWIND_HINT_REGS indirect=1
.endif
+
+ .if \save_ret
+ /*
+ * Push the return address to the stack. This return address can
+ * be found at the "real" original RSP, which was offset by 8 at
+ * the beginning of this macro.
+ */
+ pushq -8(\old_rsp)
+ .endif
.endm
/*
@@ -520,27 +538,65 @@ END(irq_entries_start)
.endm
/*
- * Interrupt entry/exit.
- *
- * Interrupt entry points save only callee clobbered registers in fast path.
+ * Interrupt entry helper function.
*
- * Entry runs with interrupts off.
+ * Entry runs with interrupts off. Stack layout at entry:
+ * +----------------------------------------------------+
+ * | regs->ss |
+ * | regs->rsp |
+ * | regs->eflags |
+ * | regs->cs |
+ * | regs->ip |
+ * +----------------------------------------------------+
+ * | regs->orig_ax = ~(interrupt number) |
+ * +----------------------------------------------------+
+ * | return address |
+ * +----------------------------------------------------+
*/
-
-/* 0(%rsp): ~(interrupt number) */
- .macro interrupt func
+ENTRY(interrupt_entry)
+ UNWIND_HINT_FUNC
+ ASM_CLAC
cld
- testb $3, CS-ORIG_RAX(%rsp)
+ testb $3, CS-ORIG_RAX+8(%rsp)
jz 1f
SWAPGS
- call switch_to_thread_stack
+
+ /*
+ * Switch to the thread stack. The IRET frame and orig_ax are
+ * on the stack, as well as the return address. RDI..R12 are
+ * not (yet) on the stack and space has not (yet) been
+ * allocated for them.
+ */
+ pushq %rdi
+
+ /* Need to switch before accessing the thread stack. */
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+ movq %rsp, %rdi
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+
+ /*
+ * We have RDI, return address, and orig_ax on the stack on
+ * top of the IRET frame. That means offset=24
+ */
+ UNWIND_HINT_IRET_REGS base=%rdi offset=24
+
+ pushq 7*8(%rdi) /* regs->ss */
+ pushq 6*8(%rdi) /* regs->rsp */
+ pushq 5*8(%rdi) /* regs->eflags */
+ pushq 4*8(%rdi) /* regs->cs */
+ pushq 3*8(%rdi) /* regs->ip */
+ pushq 2*8(%rdi) /* regs->orig_ax */
+ pushq 8(%rdi) /* return address */
+ UNWIND_HINT_FUNC
+
+ movq (%rdi), %rdi
1:
- PUSH_AND_CLEAR_REGS
- ENCODE_FRAME_POINTER
+ PUSH_AND_CLEAR_REGS save_ret=1
+ ENCODE_FRAME_POINTER 8
- testb $3, CS(%rsp)
+ testb $3, CS+8(%rsp)
jz 1f
/*
@@ -548,7 +604,7 @@ END(irq_entries_start)
*
* We need to tell lockdep that IRQs are off. We can't do this until
* we fix gsbase, and we should do it before enter_from_user_mode
- * (which can take locks). Since TRACE_IRQS_OFF idempotent,
+ * (which can take locks). Since TRACE_IRQS_OFF is idempotent,
* the simplest way to handle it is to just call it twice if
* we enter from user mode. There's no reason to optimize this since
* TRACE_IRQS_OFF is a no-op if lockdep is off.
@@ -558,12 +614,15 @@ END(irq_entries_start)
CALL_enter_from_user_mode
1:
- ENTER_IRQ_STACK old_rsp=%rdi
+ ENTER_IRQ_STACK old_rsp=%rdi save_ret=1
/* We entered an interrupt context - irqs are off: */
TRACE_IRQS_OFF
- call \func /* rdi points to pt_regs */
- .endm
+ ret
+END(interrupt_entry)
+
+
+/* Interrupt entry/exit. */
/*
* The interrupt stubs push (~vector+0x80) onto the stack and
@@ -571,9 +630,10 @@ END(irq_entries_start)
*/
.p2align CONFIG_X86_L1_CACHE_SHIFT
common_interrupt:
- ASM_CLAC
addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
- interrupt do_IRQ
+ call interrupt_entry
+ UNWIND_HINT_REGS indirect=1
+ call do_IRQ /* rdi points to pt_regs */
/* 0(%rsp): old RSP */
ret_from_intr:
DISABLE_INTERRUPTS(CLBR_ANY)
@@ -766,10 +826,11 @@ END(common_interrupt)
.macro apicinterrupt3 num sym do_sym
ENTRY(\sym)
UNWIND_HINT_IRET_REGS
- ASM_CLAC
pushq $~(\num)
.Lcommon_\sym:
- interrupt \do_sym
+ call interrupt_entry
+ UNWIND_HINT_REGS indirect=1
+ call \do_sym /* rdi points to pt_regs */
jmp ret_from_intr
END(\sym)
.endm
@@ -832,34 +893,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
*/
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
-/*
- * Switch to the thread stack. This is called with the IRET frame and
- * orig_ax on the stack. (That is, RDI..R12 are not on the stack and
- * space has not been allocated for them.)
- */
-ENTRY(switch_to_thread_stack)
- UNWIND_HINT_FUNC
-
- pushq %rdi
- /* Need to switch before accessing the thread stack. */
- SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
- movq %rsp, %rdi
- movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
- UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
-
- pushq 7*8(%rdi) /* regs->ss */
- pushq 6*8(%rdi) /* regs->rsp */
- pushq 5*8(%rdi) /* regs->eflags */
- pushq 4*8(%rdi) /* regs->cs */
- pushq 3*8(%rdi) /* regs->ip */
- pushq 2*8(%rdi) /* regs->orig_ax */
- pushq 8(%rdi) /* return address */
- UNWIND_HINT_FUNC
-
- movq (%rdi), %rdi
- ret
-END(switch_to_thread_stack)
-
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
ENTRY(\sym)
UNWIND_HINT_IRET_REGS offset=\has_error_code*8
@@ -875,12 +908,8 @@ ENTRY(\sym)
pushq $-1 /* ORIG_RAX: no syscall to restart */
.endif
- /* Save all registers in pt_regs */
- PUSH_AND_CLEAR_REGS
- ENCODE_FRAME_POINTER
-
.if \paranoid < 2
- testb $3, CS(%rsp) /* If coming from userspace, switch stacks */
+ testb $3, CS-ORIG_RAX(%rsp) /* If coming from userspace, switch stacks */
jnz .Lfrom_usermode_switch_stack_\@
.endif
@@ -1130,13 +1159,15 @@ idtentry machine_check do_mce has_error_code=0 paranoid=1
#endif
/*
- * Switch gs if needed.
+ * Save all registers in pt_regs, and switch gs if needed.
* Use slow, but surefire "are we in kernel?" check.
* Return: ebx=0: need swapgs on exit, ebx=1: otherwise
*/
ENTRY(paranoid_entry)
UNWIND_HINT_FUNC
cld
+ PUSH_AND_CLEAR_REGS save_ret=1
+ ENCODE_FRAME_POINTER 8
movl $1, %ebx
movl $MSR_GS_BASE, %ecx
rdmsr
@@ -1181,12 +1212,14 @@ ENTRY(paranoid_exit)
END(paranoid_exit)
/*
- * Switch gs if needed.
+ * Save all registers in pt_regs, and switch GS if needed.
* Return: EBX=0: came from user mode; EBX=1: otherwise
*/
ENTRY(error_entry)
- UNWIND_HINT_REGS offset=8
+ UNWIND_HINT_FUNC
cld
+ PUSH_AND_CLEAR_REGS save_ret=1
+ ENCODE_FRAME_POINTER 8
testb $3, CS+8(%rsp)
jz .Lerror_kernelspace
@@ -1577,8 +1610,6 @@ end_repeat_nmi:
* frame to point back to repeat_nmi.
*/
pushq $-1 /* ORIG_RAX: no syscall to restart */
- PUSH_AND_CLEAR_REGS
- ENCODE_FRAME_POINTER
/*
* Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index fd65e016e413..08425c42f8b7 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -85,25 +85,25 @@ ENTRY(entry_SYSENTER_compat)
pushq %rcx /* pt_regs->cx */
pushq $-ENOSYS /* pt_regs->ax */
pushq $0 /* pt_regs->r8 = 0 */
- xorq %r8, %r8 /* nospec r8 */
+ xorl %r8d, %r8d /* nospec r8 */
pushq $0 /* pt_regs->r9 = 0 */
- xorq %r9, %r9 /* nospec r9 */
+ xorl %r9d, %r9d /* nospec r9 */
pushq $0 /* pt_regs->r10 = 0 */
- xorq %r10, %r10 /* nospec r10 */
+ xorl %r10d, %r10d /* nospec r10 */
pushq $0 /* pt_regs->r11 = 0 */
- xorq %r11, %r11 /* nospec r11 */
+ xorl %r11d, %r11d /* nospec r11 */
pushq %rbx /* pt_regs->rbx */
xorl %ebx, %ebx /* nospec rbx */
pushq %rbp /* pt_regs->rbp (will be overwritten) */
xorl %ebp, %ebp /* nospec rbp */
pushq $0 /* pt_regs->r12 = 0 */
- xorq %r12, %r12 /* nospec r12 */
+ xorl %r12d, %r12d /* nospec r12 */
pushq $0 /* pt_regs->r13 = 0 */
- xorq %r13, %r13 /* nospec r13 */
+ xorl %r13d, %r13d /* nospec r13 */
pushq $0 /* pt_regs->r14 = 0 */
- xorq %r14, %r14 /* nospec r14 */
+ xorl %r14d, %r14d /* nospec r14 */
pushq $0 /* pt_regs->r15 = 0 */
- xorq %r15, %r15 /* nospec r15 */
+ xorl %r15d, %r15d /* nospec r15 */
cld
/*
@@ -224,25 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
pushq %rbp /* pt_regs->cx (stashed in bp) */
pushq $-ENOSYS /* pt_regs->ax */
pushq $0 /* pt_regs->r8 = 0 */
- xorq %r8, %r8 /* nospec r8 */
+ xorl %r8d, %r8d /* nospec r8 */
pushq $0 /* pt_regs->r9 = 0 */
- xorq %r9, %r9 /* nospec r9 */
+ xorl %r9d, %r9d /* nospec r9 */
pushq $0 /* pt_regs->r10 = 0 */
- xorq %r10, %r10 /* nospec r10 */
+ xorl %r10d, %r10d /* nospec r10 */
pushq $0 /* pt_regs->r11 = 0 */
- xorq %r11, %r11 /* nospec r11 */
+ xorl %r11d, %r11d /* nospec r11 */
pushq %rbx /* pt_regs->rbx */
xorl %ebx, %ebx /* nospec rbx */
pushq %rbp /* pt_regs->rbp (will be overwritten) */
xorl %ebp, %ebp /* nospec rbp */
pushq $0 /* pt_regs->r12 = 0 */
- xorq %r12, %r12 /* nospec r12 */
+ xorl %r12d, %r12d /* nospec r12 */
pushq $0 /* pt_regs->r13 = 0 */
- xorq %r13, %r13 /* nospec r13 */
+ xorl %r13d, %r13d /* nospec r13 */
pushq $0 /* pt_regs->r14 = 0 */
- xorq %r14, %r14 /* nospec r14 */
+ xorl %r14d, %r14d /* nospec r14 */
pushq $0 /* pt_regs->r15 = 0 */
- xorq %r15, %r15 /* nospec r15 */
+ xorl %r15d, %r15d /* nospec r15 */
/*
* User mode is traced as though IRQs are on, and SYSENTER
@@ -298,9 +298,9 @@ sysret32_from_system_call:
*/
SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
- xorq %r8, %r8
- xorq %r9, %r9
- xorq %r10, %r10
+ xorl %r8d, %r8d
+ xorl %r9d, %r9d
+ xorl %r10d, %r10d
swapgs
sysretl
END(entry_SYSCALL_compat)
@@ -347,36 +347,47 @@ ENTRY(entry_INT80_compat)
*/
movl %eax, %eax
+ /* switch to thread stack expects orig_ax and rdi to be pushed */
pushq %rax /* pt_regs->orig_ax */
+ pushq %rdi /* pt_regs->di */
- /* switch to thread stack expects orig_ax to be pushed */
- call switch_to_thread_stack
+ /* Need to switch before accessing the thread stack. */
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+ movq %rsp, %rdi
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
- pushq %rdi /* pt_regs->di */
+ pushq 6*8(%rdi) /* regs->ss */
+ pushq 5*8(%rdi) /* regs->rsp */
+ pushq 4*8(%rdi) /* regs->eflags */
+ pushq 3*8(%rdi) /* regs->cs */
+ pushq 2*8(%rdi) /* regs->ip */
+ pushq 1*8(%rdi) /* regs->orig_ax */
+
+ pushq (%rdi) /* pt_regs->di */
pushq %rsi /* pt_regs->si */
pushq %rdx /* pt_regs->dx */
pushq %rcx /* pt_regs->cx */
pushq $-ENOSYS /* pt_regs->ax */
pushq $0 /* pt_regs->r8 = 0 */
- xorq %r8, %r8 /* nospec r8 */
+ xorl %r8d, %r8d /* nospec r8 */
pushq $0 /* pt_regs->r9 = 0 */
- xorq %r9, %r9 /* nospec r9 */
+ xorl %r9d, %r9d /* nospec r9 */
pushq $0 /* pt_regs->r10 = 0 */
- xorq %r10, %r10 /* nospec r10 */
+ xorl %r10d, %r10d /* nospec r10 */
pushq $0 /* pt_regs->r11 = 0 */
- xorq %r11, %r11 /* nospec r11 */
+ xorl %r11d, %r11d /* nospec r11 */
pushq %rbx /* pt_regs->rbx */
xorl %ebx, %ebx /* nospec rbx */
pushq %rbp /* pt_regs->rbp */
xorl %ebp, %ebp /* nospec rbp */
pushq %r12 /* pt_regs->r12 */
- xorq %r12, %r12 /* nospec r12 */
+ xorl %r12d, %r12d /* nospec r12 */
pushq %r13 /* pt_regs->r13 */
- xorq %r13, %r13 /* nospec r13 */
+ xorl %r13d, %r13d /* nospec r13 */
pushq %r14 /* pt_regs->r14 */
- xorq %r14, %r14 /* nospec r14 */
+ xorl %r14d, %r14d /* nospec r14 */
pushq %r15 /* pt_regs->r15 */
- xorq %r15, %r15 /* nospec r15 */
+ xorl %r15d, %r15d /* nospec r15 */
cld
/*
@@ -393,15 +404,3 @@ ENTRY(entry_INT80_compat)
TRACE_IRQS_ON
jmp swapgs_restore_regs_and_return_to_usermode
END(entry_INT80_compat)
-
-ENTRY(stub32_clone)
- /*
- * The 32-bit clone ABI is: clone(..., int tls_val, int *child_tidptr).
- * The 64-bit clone ABI is: clone(..., int *child_tidptr, int tls_val).
- *
- * The native 64-bit kernel's sys_clone() implements the latter,
- * so we need to swap arguments here before calling it:
- */
- xchg %r8, %rcx
- jmp sys_clone
-ENDPROC(stub32_clone)
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 448ac2161112..2a5e99cff859 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -8,12 +8,12 @@
#
0 i386 restart_syscall sys_restart_syscall
1 i386 exit sys_exit
-2 i386 fork sys_fork sys_fork
+2 i386 fork sys_fork
3 i386 read sys_read
4 i386 write sys_write
5 i386 open sys_open compat_sys_open
6 i386 close sys_close
-7 i386 waitpid sys_waitpid sys32_waitpid
+7 i386 waitpid sys_waitpid compat_sys_x86_waitpid
8 i386 creat sys_creat
9 i386 link sys_link
10 i386 unlink sys_unlink
@@ -78,7 +78,7 @@
69 i386 ssetmask sys_ssetmask
70 i386 setreuid sys_setreuid16
71 i386 setregid sys_setregid16
-72 i386 sigsuspend sys_sigsuspend sys_sigsuspend
+72 i386 sigsuspend sys_sigsuspend
73 i386 sigpending sys_sigpending compat_sys_sigpending
74 i386 sethostname sys_sethostname
75 i386 setrlimit sys_setrlimit compat_sys_setrlimit
@@ -96,7 +96,7 @@
87 i386 swapon sys_swapon
88 i386 reboot sys_reboot
89 i386 readdir sys_old_readdir compat_sys_old_readdir
-90 i386 mmap sys_old_mmap sys32_mmap
+90 i386 mmap sys_old_mmap compat_sys_x86_mmap
91 i386 munmap sys_munmap
92 i386 truncate sys_truncate compat_sys_truncate
93 i386 ftruncate sys_ftruncate compat_sys_ftruncate
@@ -126,7 +126,7 @@
117 i386 ipc sys_ipc compat_sys_ipc
118 i386 fsync sys_fsync
119 i386 sigreturn sys_sigreturn sys32_sigreturn
-120 i386 clone sys_clone stub32_clone
+120 i386 clone sys_clone compat_sys_x86_clone
121 i386 setdomainname sys_setdomainname
122 i386 uname sys_newuname
123 i386 modify_ldt sys_modify_ldt
@@ -186,8 +186,8 @@
177 i386 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait
178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo
179 i386 rt_sigsuspend sys_rt_sigsuspend
-180 i386 pread64 sys_pread64 sys32_pread
-181 i386 pwrite64 sys_pwrite64 sys32_pwrite
+180 i386 pread64 sys_pread64 compat_sys_x86_pread
+181 i386 pwrite64 sys_pwrite64 compat_sys_x86_pwrite
182 i386 chown sys_chown16
183 i386 getcwd sys_getcwd
184 i386 capget sys_capget
@@ -196,14 +196,14 @@
187 i386 sendfile sys_sendfile compat_sys_sendfile
188 i386 getpmsg
189 i386 putpmsg
-190 i386 vfork sys_vfork sys_vfork
+190 i386 vfork sys_vfork
191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit
192 i386 mmap2 sys_mmap_pgoff
-193 i386 truncate64 sys_truncate64 sys32_truncate64
-194 i386 ftruncate64 sys_ftruncate64 sys32_ftruncate64
-195 i386 stat64 sys_stat64 sys32_stat64
-196 i386 lstat64 sys_lstat64 sys32_lstat64
-197 i386 fstat64 sys_fstat64 sys32_fstat64
+193 i386 truncate64 sys_truncate64 compat_sys_x86_truncate64
+194 i386 ftruncate64 sys_ftruncate64 compat_sys_x86_ftruncate64
+195 i386 stat64 sys_stat64 compat_sys_x86_stat64
+196 i386 lstat64 sys_lstat64 compat_sys_x86_lstat64
+197 i386 fstat64 sys_fstat64 compat_sys_x86_fstat64
198 i386 lchown32 sys_lchown
199 i386 getuid32 sys_getuid
200 i386 getgid32 sys_getgid
@@ -231,7 +231,7 @@
# 222 is unused
# 223 is unused
224 i386 gettid sys_gettid
-225 i386 readahead sys_readahead sys32_readahead
+225 i386 readahead sys_readahead compat_sys_x86_readahead
226 i386 setxattr sys_setxattr
227 i386 lsetxattr sys_lsetxattr
228 i386 fsetxattr sys_fsetxattr
@@ -256,7 +256,7 @@
247 i386 io_getevents sys_io_getevents compat_sys_io_getevents
248 i386 io_submit sys_io_submit compat_sys_io_submit
249 i386 io_cancel sys_io_cancel
-250 i386 fadvise64 sys_fadvise64 sys32_fadvise64
+250 i386 fadvise64 sys_fadvise64 compat_sys_x86_fadvise64
# 251 is available for reuse (was briefly sys_set_zone_reclaim)
252 i386 exit_group sys_exit_group
253 i386 lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie
@@ -278,7 +278,7 @@
269 i386 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64
270 i386 tgkill sys_tgkill
271 i386 utimes sys_utimes compat_sys_utimes
-272 i386 fadvise64_64 sys_fadvise64_64 sys32_fadvise64_64
+272 i386 fadvise64_64 sys_fadvise64_64 compat_sys_x86_fadvise64_64
273 i386 vserver
274 i386 mbind sys_mbind
275 i386 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
@@ -306,7 +306,7 @@
297 i386 mknodat sys_mknodat
298 i386 fchownat sys_fchownat
299 i386 futimesat sys_futimesat compat_sys_futimesat
-300 i386 fstatat64 sys_fstatat64 sys32_fstatat
+300 i386 fstatat64 sys_fstatat64 compat_sys_x86_fstatat
301 i386 unlinkat sys_unlinkat
302 i386 renameat sys_renameat
303 i386 linkat sys_linkat
@@ -320,7 +320,7 @@
311 i386 set_robust_list sys_set_robust_list compat_sys_set_robust_list
312 i386 get_robust_list sys_get_robust_list compat_sys_get_robust_list
313 i386 splice sys_splice
-314 i386 sync_file_range sys_sync_file_range sys32_sync_file_range
+314 i386 sync_file_range sys_sync_file_range compat_sys_x86_sync_file_range
315 i386 tee sys_tee
316 i386 vmsplice sys_vmsplice compat_sys_vmsplice
317 i386 move_pages sys_move_pages compat_sys_move_pages
@@ -330,7 +330,7 @@
321 i386 signalfd sys_signalfd compat_sys_signalfd
322 i386 timerfd_create sys_timerfd_create
323 i386 eventfd sys_eventfd
-324 i386 fallocate sys_fallocate sys32_fallocate
+324 i386 fallocate sys_fallocate compat_sys_x86_fallocate
325 i386 timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime
326 i386 timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime
327 i386 signalfd4 sys_signalfd4 compat_sys_signalfd4
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 577fa8adb785..8560ef68a9d6 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -42,10 +42,8 @@
#define CREATE_TRACE_POINTS
#include "vsyscall_trace.h"
-static enum { EMULATE, NATIVE, NONE } vsyscall_mode =
-#if defined(CONFIG_LEGACY_VSYSCALL_NATIVE)
- NATIVE;
-#elif defined(CONFIG_LEGACY_VSYSCALL_NONE)
+static enum { EMULATE, NONE } vsyscall_mode =
+#ifdef CONFIG_LEGACY_VSYSCALL_NONE
NONE;
#else
EMULATE;
@@ -56,8 +54,6 @@ static int __init vsyscall_setup(char *str)
if (str) {
if (!strcmp("emulate", str))
vsyscall_mode = EMULATE;
- else if (!strcmp("native", str))
- vsyscall_mode = NATIVE;
else if (!strcmp("none", str))
vsyscall_mode = NONE;
else
@@ -139,10 +135,6 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
WARN_ON_ONCE(address != regs->ip);
- /* This should be unreachable in NATIVE mode. */
- if (WARN_ON(vsyscall_mode == NATIVE))
- return false;
-
if (vsyscall_mode == NONE) {
warn_bad_vsyscall(KERN_INFO, regs,
"vsyscall attempted with vsyscall=none");
@@ -370,9 +362,7 @@ void __init map_vsyscall(void)
if (vsyscall_mode != NONE) {
__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
- vsyscall_mode == NATIVE
- ? PAGE_KERNEL_VSYSCALL
- : PAGE_KERNEL_VVAR);
+ PAGE_KERNEL_VVAR);
set_vsyscall_pgtable_user_bits(swapper_pg_dir);
}
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 6d8044ab1060..22ec65bc033a 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -3606,7 +3606,7 @@ static struct intel_uncore_type skx_uncore_imc = {
};
static struct attribute *skx_upi_uncore_formats_attr[] = {
- &format_attr_event_ext.attr,
+ &format_attr_event.attr,
&format_attr_umask_ext.attr,
&format_attr_edge.attr,
&format_attr_inv.attr,
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 96cd33bbfc85..6512498bbef6 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -51,15 +51,14 @@
#define AA(__x) ((unsigned long)(__x))
-asmlinkage long sys32_truncate64(const char __user *filename,
- unsigned long offset_low,
- unsigned long offset_high)
+COMPAT_SYSCALL_DEFINE3(x86_truncate64, const char __user *, filename,
+ unsigned long, offset_low, unsigned long, offset_high)
{
return sys_truncate(filename, ((loff_t) offset_high << 32) | offset_low);
}
-asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long offset_low,
- unsigned long offset_high)
+COMPAT_SYSCALL_DEFINE3(x86_ftruncate64, unsigned int, fd,
+ unsigned long, offset_low, unsigned long, offset_high)
{
return sys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low);
}
@@ -96,8 +95,8 @@ static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat)
return 0;
}
-asmlinkage long sys32_stat64(const char __user *filename,
- struct stat64 __user *statbuf)
+COMPAT_SYSCALL_DEFINE2(x86_stat64, const char __user *, filename,
+ struct stat64 __user *, statbuf)
{
struct kstat stat;
int ret = vfs_stat(filename, &stat);
@@ -107,8 +106,8 @@ asmlinkage long sys32_stat64(const char __user *filename,
return ret;
}
-asmlinkage long sys32_lstat64(const char __user *filename,
- struct stat64 __user *statbuf)
+COMPAT_SYSCALL_DEFINE2(x86_lstat64, const char __user *, filename,
+ struct stat64 __user *, statbuf)
{
struct kstat stat;
int ret = vfs_lstat(filename, &stat);
@@ -117,7 +116,8 @@ asmlinkage long sys32_lstat64(const char __user *filename,
return ret;
}
-asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf)
+COMPAT_SYSCALL_DEFINE2(x86_fstat64, unsigned int, fd,
+ struct stat64 __user *, statbuf)
{
struct kstat stat;
int ret = vfs_fstat(fd, &stat);
@@ -126,8 +126,9 @@ asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf)
return ret;
}
-asmlinkage long sys32_fstatat(unsigned int dfd, const char __user *filename,
- struct stat64 __user *statbuf, int flag)
+COMPAT_SYSCALL_DEFINE4(x86_fstatat, unsigned int, dfd,
+ const char __user *, filename,
+ struct stat64 __user *, statbuf, int, flag)
{
struct kstat stat;
int error;
@@ -153,7 +154,7 @@ struct mmap_arg_struct32 {
unsigned int offset;
};
-asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg)
+COMPAT_SYSCALL_DEFINE1(x86_mmap, struct mmap_arg_struct32 __user *, arg)
{
struct mmap_arg_struct32 a;
@@ -167,22 +168,22 @@ asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg)
a.offset>>PAGE_SHIFT);
}
-asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int __user *stat_addr,
- int options)
+COMPAT_SYSCALL_DEFINE3(x86_waitpid, compat_pid_t, pid, unsigned int __user *,
+ stat_addr, int, options)
{
return compat_sys_wait4(pid, stat_addr, options, NULL);
}
/* warning: next two assume little endian */
-asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count,
- u32 poslo, u32 poshi)
+COMPAT_SYSCALL_DEFINE5(x86_pread, unsigned int, fd, char __user *, ubuf,
+ u32, count, u32, poslo, u32, poshi)
{
return sys_pread64(fd, ubuf, count,
((loff_t)AA(poshi) << 32) | AA(poslo));
}
-asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf,
- u32 count, u32 poslo, u32 poshi)
+COMPAT_SYSCALL_DEFINE5(x86_pwrite, unsigned int, fd, const char __user *, ubuf,
+ u32, count, u32, poslo, u32, poshi)
{
return sys_pwrite64(fd, ubuf, count,
((loff_t)AA(poshi) << 32) | AA(poslo));
@@ -193,8 +194,9 @@ asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf,
* Some system calls that need sign extended arguments. This could be
* done by a generic wrapper.
*/
-long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
- __u32 len_low, __u32 len_high, int advice)
+COMPAT_SYSCALL_DEFINE6(x86_fadvise64_64, int, fd, __u32, offset_low,
+ __u32, offset_high, __u32, len_low, __u32, len_high,
+ int, advice)
{
return sys_fadvise64_64(fd,
(((u64)offset_high)<<32) | offset_low,
@@ -202,31 +204,43 @@ long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
advice);
}
-asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi,
- size_t count)
+COMPAT_SYSCALL_DEFINE4(x86_readahead, int, fd, unsigned int, off_lo,
+ unsigned int, off_hi, size_t, count)
{
return sys_readahead(fd, ((u64)off_hi << 32) | off_lo, count);
}
-asmlinkage long sys32_sync_file_range(int fd, unsigned off_low, unsigned off_hi,
- unsigned n_low, unsigned n_hi, int flags)
+COMPAT_SYSCALL_DEFINE6(x86_sync_file_range, int, fd, unsigned int, off_low,
+ unsigned int, off_hi, unsigned int, n_low,
+ unsigned int, n_hi, int, flags)
{
return sys_sync_file_range(fd,
((u64)off_hi << 32) | off_low,
((u64)n_hi << 32) | n_low, flags);
}
-asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi,
- size_t len, int advice)
+COMPAT_SYSCALL_DEFINE5(x86_fadvise64, int, fd, unsigned int, offset_lo,
+ unsigned int, offset_hi, size_t, len, int, advice)
{
return sys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo,
len, advice);
}
-asmlinkage long sys32_fallocate(int fd, int mode, unsigned offset_lo,
- unsigned offset_hi, unsigned len_lo,
- unsigned len_hi)
+COMPAT_SYSCALL_DEFINE6(x86_fallocate, int, fd, int, mode,
+ unsigned int, offset_lo, unsigned int, offset_hi,
+ unsigned int, len_lo, unsigned int, len_hi)
{
return sys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo,
((u64)len_hi << 32) | len_lo);
}
+
+/*
+ * The 32-bit clone ABI is CONFIG_CLONE_BACKWARDS
+ */
+COMPAT_SYSCALL_DEFINE5(x86_clone, unsigned long, clone_flags,
+ unsigned long, newsp, int __user *, parent_tidptr,
+ unsigned long, tls_val, int __user *, child_tidptr)
+{
+ return sys_clone(clone_flags, newsp, parent_tidptr, child_tidptr,
+ tls_val);
+}
diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h
index 4d4015ddcf26..c356098b6fb9 100644
--- a/arch/x86/include/asm/apm.h
+++ b/arch/x86/include/asm/apm.h
@@ -7,6 +7,8 @@
#ifndef _ASM_X86_MACH_DEFAULT_APM_H
#define _ASM_X86_MACH_DEFAULT_APM_H
+#include <asm/nospec-branch.h>
+
#ifdef APM_ZERO_SEGS
# define APM_DO_ZERO_SEGS \
"pushl %%ds\n\t" \
@@ -32,6 +34,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
* N.B. We do NOT need a cld after the BIOS call
* because we always save and restore the flags.
*/
+ firmware_restrict_branch_speculation_start();
__asm__ __volatile__(APM_DO_ZERO_SEGS
"pushl %%edi\n\t"
"pushl %%ebp\n\t"
@@ -44,6 +47,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
"=S" (*esi)
: "a" (func), "b" (ebx_in), "c" (ecx_in)
: "memory", "cc");
+ firmware_restrict_branch_speculation_end();
}
static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
@@ -56,6 +60,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
* N.B. We do NOT need a cld after the BIOS call
* because we always save and restore the flags.
*/
+ firmware_restrict_branch_speculation_start();
__asm__ __volatile__(APM_DO_ZERO_SEGS
"pushl %%edi\n\t"
"pushl %%ebp\n\t"
@@ -68,6 +73,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
"=S" (si)
: "a" (func), "b" (ebx_in), "c" (ecx_in)
: "memory", "cc");
+ firmware_restrict_branch_speculation_end();
return error;
}
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 4d111616524b..1908214b9125 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -38,7 +38,4 @@ INDIRECT_THUNK(dx)
INDIRECT_THUNK(si)
INDIRECT_THUNK(di)
INDIRECT_THUNK(bp)
-asmlinkage void __fill_rsb(void);
-asmlinkage void __clear_rsb(void);
-
#endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 3fa039855b8f..9f645ba57dbb 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -78,7 +78,7 @@ set_bit(long nr, volatile unsigned long *addr)
: "iq" ((u8)CONST_MASK(nr))
: "memory");
} else {
- asm volatile(LOCK_PREFIX "bts %1,%0"
+ asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
: BITOP_ADDR(addr) : "Ir" (nr) : "memory");
}
}
@@ -94,7 +94,7 @@ set_bit(long nr, volatile unsigned long *addr)
*/
static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
{
- asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
+ asm volatile(__ASM_SIZE(bts) " %1,%0" : ADDR : "Ir" (nr) : "memory");
}
/**
@@ -115,7 +115,7 @@ clear_bit(long nr, volatile unsigned long *addr)
: CONST_MASK_ADDR(nr, addr)
: "iq" ((u8)~CONST_MASK(nr)));
} else {
- asm volatile(LOCK_PREFIX "btr %1,%0"
+ asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
: BITOP_ADDR(addr)
: "Ir" (nr));
}
@@ -137,7 +137,7 @@ static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *ad
static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
{
- asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
+ asm volatile(__ASM_SIZE(btr) " %1,%0" : ADDR : "Ir" (nr));
}
static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
@@ -182,7 +182,7 @@ static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *
*/
static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
{
- asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
+ asm volatile(__ASM_SIZE(btc) " %1,%0" : ADDR : "Ir" (nr));
}
/**
@@ -201,7 +201,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
: CONST_MASK_ADDR(nr, addr)
: "iq" ((u8)CONST_MASK(nr)));
} else {
- asm volatile(LOCK_PREFIX "btc %1,%0"
+ asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
: BITOP_ADDR(addr)
: "Ir" (nr));
}
@@ -217,7 +217,8 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
*/
static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
{
- GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", c);
+ GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts),
+ *addr, "Ir", nr, "%0", c);
}
/**
@@ -246,7 +247,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
{
bool oldbit;
- asm("bts %2,%1"
+ asm(__ASM_SIZE(bts) " %2,%1"
CC_SET(c)
: CC_OUT(c) (oldbit), ADDR
: "Ir" (nr));
@@ -263,7 +264,8 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
*/
static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
{
- GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", c);
+ GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr),
+ *addr, "Ir", nr, "%0", c);
}
/**
@@ -286,7 +288,7 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
{
bool oldbit;
- asm volatile("btr %2,%1"
+ asm volatile(__ASM_SIZE(btr) " %2,%1"
CC_SET(c)
: CC_OUT(c) (oldbit), ADDR
: "Ir" (nr));
@@ -298,7 +300,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
{
bool oldbit;
- asm volatile("btc %2,%1"
+ asm volatile(__ASM_SIZE(btc) " %2,%1"
CC_SET(c)
: CC_OUT(c) (oldbit), ADDR
: "Ir" (nr) : "memory");
@@ -316,7 +318,8 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
*/
static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr)
{
- GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", c);
+ GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc),
+ *addr, "Ir", nr, "%0", c);
}
static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr)
@@ -329,7 +332,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
{
bool oldbit;
- asm volatile("bt %2,%1"
+ asm volatile(__ASM_SIZE(bt) " %2,%1"
CC_SET(c)
: CC_OUT(c) (oldbit)
: "m" (*(unsigned long *)addr), "Ir" (nr));
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 0dfe4d3f74e2..d554c11e01ff 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -213,6 +213,7 @@
#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */
#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
+#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
@@ -315,6 +316,7 @@
#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */
#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */
#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
+#define X86_FEATURE_TME (16*32+13) /* Intel Total Memory Encryption */
#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
@@ -327,6 +329,7 @@
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 85f6ccb80b91..a399c1ebf6f0 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -6,6 +6,7 @@
#include <asm/pgtable.h>
#include <asm/processor-flags.h>
#include <asm/tlb.h>
+#include <asm/nospec-branch.h>
/*
* We map the EFI regions needed for runtime services non-contiguously,
@@ -36,8 +37,18 @@
extern asmlinkage unsigned long efi_call_phys(void *, ...);
-#define arch_efi_call_virt_setup() kernel_fpu_begin()
-#define arch_efi_call_virt_teardown() kernel_fpu_end()
+#define arch_efi_call_virt_setup() \
+({ \
+ kernel_fpu_begin(); \
+ firmware_restrict_branch_speculation_start(); \
+})
+
+#define arch_efi_call_virt_teardown() \
+({ \
+ firmware_restrict_branch_speculation_end(); \
+ kernel_fpu_end(); \
+})
+
/*
* Wrap all the virtual calls in a way that forces the parameters on the stack.
@@ -73,6 +84,7 @@ struct efi_scratch {
efi_sync_low_kernel_mappings(); \
preempt_disable(); \
__kernel_fpu_begin(); \
+ firmware_restrict_branch_speculation_start(); \
\
if (efi_scratch.use_pgd) { \
efi_scratch.prev_cr3 = __read_cr3(); \
@@ -91,6 +103,7 @@ struct efi_scratch {
__flush_tlb_all(); \
} \
\
+ firmware_restrict_branch_speculation_end(); \
__kernel_fpu_end(); \
preempt_enable(); \
})
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index dd6f57a54a26..b605a5b6a30c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -507,6 +507,7 @@ struct kvm_vcpu_arch {
u64 smi_count;
bool tpr_access_reporting;
u64 ia32_xss;
+ u64 microcode_version;
/*
* Paging state of the vcpu
@@ -1095,6 +1096,8 @@ struct kvm_x86_ops {
int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
+
+ int (*get_msr_feature)(struct kvm_msr_entry *entry);
};
struct kvm_arch_async_pf {
@@ -1464,7 +1467,4 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
#define put_smstate(type, buf, offset, val) \
*(type *)((buf) + (offset) - 0x7e00) = val
-void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
- unsigned long start, unsigned long end);
-
#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 55520cec8b27..6cf0e4cb7b97 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -37,7 +37,13 @@ struct cpu_signature {
struct device;
-enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND };
+enum ucode_state {
+ UCODE_OK = 0,
+ UCODE_NEW,
+ UCODE_UPDATED,
+ UCODE_NFOUND,
+ UCODE_ERROR,
+};
struct microcode_ops {
enum ucode_state (*request_microcode_user) (int cpu,
@@ -54,7 +60,7 @@ struct microcode_ops {
* are being called.
* See also the "Synchronization" section in microcode_core.c.
*/
- int (*apply_microcode) (int cpu);
+ enum ucode_state (*apply_microcode) (int cpu);
int (*collect_cpu_info) (int cpu, struct cpu_signature *csig);
};
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index c931b88982a0..1de72ce514cd 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -74,6 +74,7 @@ static inline void *ldt_slot_va(int slot)
return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
#else
BUG();
+ return (void *)fix_to_virt(FIX_HOLE);
#endif
}
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 81a1be326571..f928ad9b143f 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -8,6 +8,50 @@
#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
+/*
+ * Fill the CPU return stack buffer.
+ *
+ * Each entry in the RSB, if used for a speculative 'ret', contains an
+ * infinite 'pause; lfence; jmp' loop to capture speculative execution.
+ *
+ * This is required in various cases for retpoline and IBRS-based
+ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
+ * eliminate potentially bogus entries from the RSB, and sometimes
+ * purely to ensure that it doesn't get empty, which on some CPUs would
+ * allow predictions from other (unwanted!) sources to be used.
+ *
+ * We define a CPP macro such that it can be used from both .S files and
+ * inline assembly. It's possible to do a .macro and then include that
+ * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
+ */
+
+#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
+#define RSB_FILL_LOOPS 16 /* To avoid underflow */
+
+/*
+ * Google experimented with loop-unrolling and this turned out to be
+ * the optimal version — two calls, each with their own speculation
+ * trap should their return address end up getting used, in a loop.
+ */
+#define __FILL_RETURN_BUFFER(reg, nr, sp) \
+ mov $(nr/2), reg; \
+771: \
+ call 772f; \
+773: /* speculation trap */ \
+ pause; \
+ lfence; \
+ jmp 773b; \
+772: \
+ call 774f; \
+775: /* speculation trap */ \
+ pause; \
+ lfence; \
+ jmp 775b; \
+774: \
+ dec reg; \
+ jnz 771b; \
+ add $(BITS_PER_LONG/8) * nr, sp;
+
#ifdef __ASSEMBLY__
/*
@@ -24,6 +68,18 @@
.endm
/*
+ * This should be used immediately before an indirect jump/call. It tells
+ * objtool the subsequent indirect jump/call is vouched safe for retpoline
+ * builds.
+ */
+.macro ANNOTATE_RETPOLINE_SAFE
+ .Lannotate_\@:
+ .pushsection .discard.retpoline_safe
+ _ASM_PTR .Lannotate_\@
+ .popsection
+.endm
+
+/*
* These are the bare retpoline primitives for indirect jmp and call.
* Do not use these directly; they only exist to make the ALTERNATIVE
* invocation below less ugly.
@@ -59,9 +115,9 @@
.macro JMP_NOSPEC reg:req
#ifdef CONFIG_RETPOLINE
ANNOTATE_NOSPEC_ALTERNATIVE
- ALTERNATIVE_2 __stringify(jmp *\reg), \
+ ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg), \
__stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
- __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
+ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
#else
jmp *\reg
#endif
@@ -70,18 +126,25 @@
.macro CALL_NOSPEC reg:req
#ifdef CONFIG_RETPOLINE
ANNOTATE_NOSPEC_ALTERNATIVE
- ALTERNATIVE_2 __stringify(call *\reg), \
+ ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg), \
__stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
- __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
+ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD
#else
call *\reg
#endif
.endm
-/* This clobbers the BX register */
-.macro FILL_RETURN_BUFFER nr:req ftr:req
+ /*
+ * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
+ * monstrosity above, manually.
+ */
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
#ifdef CONFIG_RETPOLINE
- ALTERNATIVE "", "call __clear_rsb", \ftr
+ ANNOTATE_NOSPEC_ALTERNATIVE
+ ALTERNATIVE "jmp .Lskip_rsb_\@", \
+ __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
+ \ftr
+.Lskip_rsb_\@:
#endif
.endm
@@ -93,6 +156,12 @@
".long 999b - .\n\t" \
".popsection\n\t"
+#define ANNOTATE_RETPOLINE_SAFE \
+ "999:\n\t" \
+ ".pushsection .discard.retpoline_safe\n\t" \
+ _ASM_PTR " 999b\n\t" \
+ ".popsection\n\t"
+
#if defined(CONFIG_X86_64) && defined(RETPOLINE)
/*
@@ -102,6 +171,7 @@
# define CALL_NOSPEC \
ANNOTATE_NOSPEC_ALTERNATIVE \
ALTERNATIVE( \
+ ANNOTATE_RETPOLINE_SAFE \
"call *%[thunk_target]\n", \
"call __x86_indirect_thunk_%V[thunk_target]\n", \
X86_FEATURE_RETPOLINE)
@@ -113,7 +183,10 @@
* otherwise we'll run out of registers. We don't care about CET
* here, anyway.
*/
-# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \
+# define CALL_NOSPEC \
+ ALTERNATIVE( \
+ ANNOTATE_RETPOLINE_SAFE \
+ "call *%[thunk_target]\n", \
" jmp 904f;\n" \
" .align 16\n" \
"901: call 903f;\n" \
@@ -156,26 +229,54 @@ extern char __indirect_thunk_end[];
static inline void vmexit_fill_RSB(void)
{
#ifdef CONFIG_RETPOLINE
- alternative_input("",
- "call __fill_rsb",
- X86_FEATURE_RETPOLINE,
- ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory"));
+ unsigned long loops;
+
+ asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
+ ALTERNATIVE("jmp 910f",
+ __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
+ X86_FEATURE_RETPOLINE)
+ "910:"
+ : "=r" (loops), ASM_CALL_CONSTRAINT
+ : : "memory" );
#endif
}
+#define alternative_msr_write(_msr, _val, _feature) \
+ asm volatile(ALTERNATIVE("", \
+ "movl %[msr], %%ecx\n\t" \
+ "movl %[val], %%eax\n\t" \
+ "movl $0, %%edx\n\t" \
+ "wrmsr", \
+ _feature) \
+ : : [msr] "i" (_msr), [val] "i" (_val) \
+ : "eax", "ecx", "edx", "memory")
+
static inline void indirect_branch_prediction_barrier(void)
{
- asm volatile(ALTERNATIVE("",
- "movl %[msr], %%ecx\n\t"
- "movl %[val], %%eax\n\t"
- "movl $0, %%edx\n\t"
- "wrmsr",
- X86_FEATURE_USE_IBPB)
- : : [msr] "i" (MSR_IA32_PRED_CMD),
- [val] "i" (PRED_CMD_IBPB)
- : "eax", "ecx", "edx", "memory");
+ alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB,
+ X86_FEATURE_USE_IBPB);
}
+/*
+ * With retpoline, we must use IBRS to restrict branch prediction
+ * before calling into firmware.
+ *
+ * (Implemented as CPP macros due to header hell.)
+ */
+#define firmware_restrict_branch_speculation_start() \
+do { \
+ preempt_disable(); \
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, \
+ X86_FEATURE_USE_IBRS_FW); \
+} while (0)
+
+#define firmware_restrict_branch_speculation_end() \
+do { \
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, \
+ X86_FEATURE_USE_IBRS_FW); \
+ preempt_enable(); \
+} while (0)
+
#endif /* __ASSEMBLY__ */
/*
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 554841fab717..c83a2f418cea 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -7,6 +7,7 @@
#ifdef CONFIG_PARAVIRT
#include <asm/pgtable_types.h>
#include <asm/asm.h>
+#include <asm/nospec-branch.h>
#include <asm/paravirt_types.h>
@@ -879,23 +880,27 @@ extern void default_banner(void);
#define INTERRUPT_RETURN \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \
- jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret))
+ ANNOTATE_RETPOLINE_SAFE; \
+ jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret);)
#define DISABLE_INTERRUPTS(clobbers) \
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
+ ANNOTATE_RETPOLINE_SAFE; \
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
#define ENABLE_INTERRUPTS(clobbers) \
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \
PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
+ ANNOTATE_RETPOLINE_SAFE; \
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
#ifdef CONFIG_X86_32
#define GET_CR0_INTO_EAX \
push %ecx; push %edx; \
+ ANNOTATE_RETPOLINE_SAFE; \
call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \
pop %edx; pop %ecx
#else /* !CONFIG_X86_32 */
@@ -917,21 +922,25 @@ extern void default_banner(void);
*/
#define SWAPGS \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \
- call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs) \
+ ANNOTATE_RETPOLINE_SAFE; \
+ call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \
)
#define GET_CR2_INTO_RAX \
- call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2)
+ ANNOTATE_RETPOLINE_SAFE; \
+ call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2);
#define USERGS_SYSRET64 \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
CLBR_NONE, \
- jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
+ ANNOTATE_RETPOLINE_SAFE; \
+ jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64);)
#ifdef CONFIG_DEBUG_ENTRY
#define SAVE_FLAGS(clobbers) \
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
+ ANNOTATE_RETPOLINE_SAFE; \
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
#endif
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index f624f1f10316..180bc0bff0fb 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -43,6 +43,7 @@
#include <asm/desc_defs.h>
#include <asm/kmap_types.h>
#include <asm/pgtable_types.h>
+#include <asm/nospec-branch.h>
struct page;
struct thread_struct;
@@ -392,7 +393,9 @@ int paravirt_disable_iospace(void);
* offset into the paravirt_patch_template structure, and can therefore be
* freely converted back into a structure offset.
*/
-#define PARAVIRT_CALL "call *%c[paravirt_opptr];"
+#define PARAVIRT_CALL \
+ ANNOTATE_RETPOLINE_SAFE \
+ "call *%c[paravirt_opptr];"
/*
* These macros are intended to wrap calls through one of the paravirt
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index ba3c523aaf16..a06b07399d17 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -526,7 +526,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr,
{
bool oldbit;
- asm volatile("bt "__percpu_arg(2)",%1"
+ asm volatile("btl "__percpu_arg(2)",%1"
CC_SET(c)
: CC_OUT(c) (oldbit)
: "m" (*(unsigned long __percpu *)addr), "Ir" (nr));
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 63c2552b6b65..b444d83cfc95 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -350,14 +350,14 @@ static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
{
pmdval_t v = native_pmd_val(pmd);
- return __pmd(v | set);
+ return native_make_pmd(v | set);
}
static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
{
pmdval_t v = native_pmd_val(pmd);
- return __pmd(v & ~clear);
+ return native_make_pmd(v & ~clear);
}
static inline pmd_t pmd_mkold(pmd_t pmd)
@@ -409,14 +409,14 @@ static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
{
pudval_t v = native_pud_val(pud);
- return __pud(v | set);
+ return native_make_pud(v | set);
}
static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
{
pudval_t v = native_pud_val(pud);
- return __pud(v & ~clear);
+ return native_make_pud(v & ~clear);
}
static inline pud_t pud_mkold(pud_t pud)
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index e55466760ff8..b3ec519e3982 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -32,6 +32,7 @@ extern pmd_t initial_pg_pmd[];
static inline void pgtable_cache_init(void) { }
static inline void check_pgt_cache(void) { }
void paging_init(void);
+void sync_initial_page_table(void);
/*
* Define this if things work differently on an i386 and an i486:
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 81462e9a34f6..1149d2112b2e 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -28,6 +28,7 @@ extern pgd_t init_top_pgt[];
#define swapper_pg_dir init_top_pgt
extern void paging_init(void);
+static inline void sync_initial_page_table(void) { }
#define pte_ERROR(e) \
pr_err("%s:%d: bad pte %p(%016lx)\n", \
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 3696398a9475..acfe755562a6 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -174,7 +174,6 @@ enum page_cache_mode {
#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_NOCACHE)
-#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER)
#define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER)
#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
@@ -206,7 +205,6 @@ enum page_cache_mode {
#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC)
-#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL | _PAGE_ENC)
#define PAGE_KERNEL_VVAR __pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC)
#define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO)
@@ -323,6 +321,11 @@ static inline pudval_t native_pud_val(pud_t pud)
#else
#include <asm-generic/pgtable-nopud.h>
+static inline pud_t native_make_pud(pudval_t val)
+{
+ return (pud_t) { .p4d.pgd = native_make_pgd(val) };
+}
+
static inline pudval_t native_pud_val(pud_t pud)
{
return native_pgd_val(pud.p4d.pgd);
@@ -344,6 +347,11 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
#else
#include <asm-generic/pgtable-nopmd.h>
+static inline pmd_t native_make_pmd(pmdval_t val)
+{
+ return (pmd_t) { .pud.p4d.pgd = native_make_pgd(val) };
+}
+
static inline pmdval_t native_pmd_val(pmd_t pmd)
{
return native_pgd_val(pmd.pud.p4d.pgd);
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 1bd9ed87606f..b0ccd4847a58 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -977,4 +977,5 @@ bool xen_set_default_idle(void);
void stop_this_cpu(void *dummy);
void df_debug(struct pt_regs *regs, long error_code);
+void microcode_check(void);
#endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h
index 4e44250e7d0d..4cf11d88d3b3 100644
--- a/arch/x86/include/asm/refcount.h
+++ b/arch/x86/include/asm/refcount.h
@@ -17,7 +17,7 @@
#define _REFCOUNT_EXCEPTION \
".pushsection .text..refcount\n" \
"111:\tlea %[counter], %%" _ASM_CX "\n" \
- "112:\t" ASM_UD0 "\n" \
+ "112:\t" ASM_UD2 "\n" \
ASM_UNREACHABLE \
".popsection\n" \
"113:\n" \
@@ -67,13 +67,13 @@ static __always_inline __must_check
bool refcount_sub_and_test(unsigned int i, refcount_t *r)
{
GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO,
- r->refs.counter, "er", i, "%0", e);
+ r->refs.counter, "er", i, "%0", e, "cx");
}
static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r)
{
GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO,
- r->refs.counter, "%0", e);
+ r->refs.counter, "%0", e, "cx");
}
static __always_inline __must_check
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index f91c365e57c3..4914a3e7c803 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -2,8 +2,7 @@
#ifndef _ASM_X86_RMWcc
#define _ASM_X86_RMWcc
-#define __CLOBBERS_MEM "memory"
-#define __CLOBBERS_MEM_CC_CX "memory", "cc", "cx"
+#define __CLOBBERS_MEM(clb...) "memory", ## clb
#if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO)
@@ -40,18 +39,19 @@ do { \
#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
#define GEN_UNARY_RMWcc(op, var, arg0, cc) \
- __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM)
+ __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM())
-#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc) \
+#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc, clobbers...)\
__GEN_RMWcc(op " " arg0 "\n\t" suffix, var, cc, \
- __CLOBBERS_MEM_CC_CX)
+ __CLOBBERS_MEM(clobbers))
#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \
__GEN_RMWcc(op __BINARY_RMWcc_ARG arg0, var, cc, \
- __CLOBBERS_MEM, vcon (val))
+ __CLOBBERS_MEM(), vcon (val))
-#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc) \
+#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc, \
+ clobbers...) \
__GEN_RMWcc(op __BINARY_RMWcc_ARG arg0 "\n\t" suffix, var, cc, \
- __CLOBBERS_MEM_CC_CX, vcon (val))
+ __CLOBBERS_MEM(clobbers), vcon (val))
#endif /* _ASM_X86_RMWcc */
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index d6baf23782bc..5c019d23d06b 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h
@@ -10,6 +10,7 @@ extern struct exception_table_entry __stop___ex_table[];
#if defined(CONFIG_X86_64)
extern char __end_rodata_hpage_align[];
+extern char __entry_trampoline_start[], __entry_trampoline_end[];
#endif
#endif /* _ASM_X86_SECTIONS_H */
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index 82c34ee25a65..906794aa034e 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -20,31 +20,43 @@
#include <asm/ia32.h>
/* ia32/sys_ia32.c */
-asmlinkage long sys32_truncate64(const char __user *, unsigned long, unsigned long);
-asmlinkage long sys32_ftruncate64(unsigned int, unsigned long, unsigned long);
+asmlinkage long compat_sys_x86_truncate64(const char __user *, unsigned long,
+ unsigned long);
+asmlinkage long compat_sys_x86_ftruncate64(unsigned int, unsigned long,
+ unsigned long);
-asmlinkage long sys32_stat64(const char __user *, struct stat64 __user *);
-asmlinkage long sys32_lstat64(const char __user *, struct stat64 __user *);
-asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *);
-asmlinkage long sys32_fstatat(unsigned int, const char __user *,
+asmlinkage long compat_sys_x86_stat64(const char __user *,
+ struct stat64 __user *);
+asmlinkage long compat_sys_x86_lstat64(const char __user *,
+ struct stat64 __user *);
+asmlinkage long compat_sys_x86_fstat64(unsigned int, struct stat64 __user *);
+asmlinkage long compat_sys_x86_fstatat(unsigned int, const char __user *,
struct stat64 __user *, int);
struct mmap_arg_struct32;
-asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *);
+asmlinkage long compat_sys_x86_mmap(struct mmap_arg_struct32 __user *);
-asmlinkage long sys32_waitpid(compat_pid_t, unsigned int __user *, int);
+asmlinkage long compat_sys_x86_waitpid(compat_pid_t, unsigned int __user *,
+ int);
-asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32);
-asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32);
+asmlinkage long compat_sys_x86_pread(unsigned int, char __user *, u32, u32,
+ u32);
+asmlinkage long compat_sys_x86_pwrite(unsigned int, const char __user *, u32,
+ u32, u32);
-long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int);
-long sys32_vm86_warning(void);
+asmlinkage long compat_sys_x86_fadvise64_64(int, __u32, __u32, __u32, __u32,
+ int);
-asmlinkage ssize_t sys32_readahead(int, unsigned, unsigned, size_t);
-asmlinkage long sys32_sync_file_range(int, unsigned, unsigned,
- unsigned, unsigned, int);
-asmlinkage long sys32_fadvise64(int, unsigned, unsigned, size_t, int);
-asmlinkage long sys32_fallocate(int, int, unsigned,
- unsigned, unsigned, unsigned);
+asmlinkage ssize_t compat_sys_x86_readahead(int, unsigned int, unsigned int,
+ size_t);
+asmlinkage long compat_sys_x86_sync_file_range(int, unsigned int, unsigned int,
+ unsigned int, unsigned int,
+ int);
+asmlinkage long compat_sys_x86_fadvise64(int, unsigned int, unsigned int,
+ size_t, int);
+asmlinkage long compat_sys_x86_fallocate(int, int, unsigned int, unsigned int,
+ unsigned int, unsigned int);
+asmlinkage long compat_sys_x86_clone(unsigned long, unsigned long, int __user *,
+ unsigned long, int __user *);
/* ia32/ia32_signal.c */
asmlinkage long sys32_sigreturn(void);
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 7a2ade4aa235..6cfa9c8cb7d6 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -26,6 +26,7 @@
#define KVM_FEATURE_PV_EOI 6
#define KVM_FEATURE_PV_UNHALT 7
#define KVM_FEATURE_PV_TLB_FLUSH 9
+#define KVM_FEATURE_ASYNC_PF_VMEXIT 10
/* The last 8 bits are used to indicate how to interpret the flags field
* in pvclock structure. If no bits are set, all flags are ignored.
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index 91723461dc1f..435db58a7bad 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -30,6 +30,7 @@ struct mce {
__u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */
__u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */
__u64 ppin; /* Protected Processor Inventory Number */
+ __u32 microcode;/* Microcode revision */
};
#define MCE_GET_RECORD_LEN _IOR('M', 1, int)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 8ad2e410974f..7c5538769f7e 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1603,7 +1603,7 @@ static void __init delay_with_tsc(void)
do {
rep_nop();
now = rdtsc();
- } while ((now - start) < 40000000000UL / HZ &&
+ } while ((now - start) < 40000000000ULL / HZ &&
time_before_eq(jiffies, end));
}
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index d71c8b54b696..bfca937bdcc3 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -300,6 +300,15 @@ retpoline_auto:
setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
}
+
+ /*
+ * Retpoline means the kernel is safe because it has no indirect
+ * branches. But firmware isn't, so use IBRS to protect that.
+ */
+ if (boot_cpu_has(X86_FEATURE_IBRS)) {
+ setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
+ pr_info("Enabling Restricted Speculation for firmware calls\n");
+ }
}
#undef pr_fmt
@@ -326,8 +335,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
return sprintf(buf, "Not affected\n");
- return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+ return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
+ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
spectre_v2_module_string());
}
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 824aee0117bb..348cf4821240 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1749,3 +1749,33 @@ static int __init init_cpu_syscore(void)
return 0;
}
core_initcall(init_cpu_syscore);
+
+/*
+ * The microcode loader calls this upon late microcode load to recheck features,
+ * only when microcode has been updated. Caller holds microcode_mutex and CPU
+ * hotplug lock.
+ */
+void microcode_check(void)
+{
+ struct cpuinfo_x86 info;
+
+ perf_check_microcode();
+
+ /* Reload CPUID max function as it might've changed. */
+ info.cpuid_level = cpuid_eax(0);
+
+ /*
+ * Copy all capability leafs to pick up the synthetic ones so that
+ * memcmp() below doesn't fail on that. The ones coming from CPUID will
+ * get overwritten in get_cpu_cap().
+ */
+ memcpy(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability));
+
+ get_cpu_cap(&info);
+
+ if (!memcmp(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability)))
+ return;
+
+ pr_warn("x86/CPU: CPU features have changed after loading microcode, but might not take effect.\n");
+ pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
+}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index d19e903214b4..c3af167d0a70 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -105,7 +105,7 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c)
/*
* Early microcode releases for the Spectre v2 mitigation were broken.
* Information taken from;
- * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf
+ * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/03/microcode-update-guidance.pdf
* - https://kb.vmware.com/s/article/52345
* - Microcode revisions observed in the wild
* - Release note from 20180108 microcode release
@@ -123,7 +123,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = {
{ INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x80 },
{ INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e },
{ INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c },
- { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 },
{ INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 },
{ INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b },
{ INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 },
@@ -144,6 +143,13 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
{
int i;
+ /*
+ * We know that the hypervisor lie to us on the microcode version so
+ * we may as well hope that it is running the correct version.
+ */
+ if (cpu_has(c, X86_FEATURE_HYPERVISOR))
+ return false;
+
for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
if (c->x86_model == spectre_bad_microcodes[i].model &&
c->x86_stepping == spectre_bad_microcodes[i].stepping)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 8ff94d1e2dce..466f47301334 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -56,6 +56,9 @@
static DEFINE_MUTEX(mce_log_mutex);
+/* sysfs synchronization */
+static DEFINE_MUTEX(mce_sysfs_mutex);
+
#define CREATE_TRACE_POINTS
#include <trace/events/mce.h>
@@ -130,6 +133,8 @@ void mce_setup(struct mce *m)
if (this_cpu_has(X86_FEATURE_INTEL_PPIN))
rdmsrl(MSR_PPIN, m->ppin);
+
+ m->microcode = boot_cpu_data.microcode;
}
DEFINE_PER_CPU(struct mce, injectm);
@@ -262,7 +267,7 @@ static void __print_mce(struct mce *m)
*/
pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n",
m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid,
- cpu_data(m->extcpu).microcode);
+ m->microcode);
}
static void print_mce(struct mce *m)
@@ -2086,6 +2091,7 @@ static ssize_t set_ignore_ce(struct device *s,
if (kstrtou64(buf, 0, &new) < 0)
return -EINVAL;
+ mutex_lock(&mce_sysfs_mutex);
if (mca_cfg.ignore_ce ^ !!new) {
if (new) {
/* disable ce features */
@@ -2098,6 +2104,8 @@ static ssize_t set_ignore_ce(struct device *s,
on_each_cpu(mce_enable_ce, (void *)1, 1);
}
}
+ mutex_unlock(&mce_sysfs_mutex);
+
return size;
}
@@ -2110,6 +2118,7 @@ static ssize_t set_cmci_disabled(struct device *s,
if (kstrtou64(buf, 0, &new) < 0)
return -EINVAL;
+ mutex_lock(&mce_sysfs_mutex);
if (mca_cfg.cmci_disabled ^ !!new) {
if (new) {
/* disable cmci */
@@ -2121,6 +2130,8 @@ static ssize_t set_cmci_disabled(struct device *s,
on_each_cpu(mce_enable_ce, NULL, 1);
}
}
+ mutex_unlock(&mce_sysfs_mutex);
+
return size;
}
@@ -2128,8 +2139,19 @@ static ssize_t store_int_with_restart(struct device *s,
struct device_attribute *attr,
const char *buf, size_t size)
{
- ssize_t ret = device_store_int(s, attr, buf, size);
+ unsigned long old_check_interval = check_interval;
+ ssize_t ret = device_store_ulong(s, attr, buf, size);
+
+ if (check_interval == old_check_interval)
+ return ret;
+
+ if (check_interval < 1)
+ check_interval = 1;
+
+ mutex_lock(&mce_sysfs_mutex);
mce_restart();
+ mutex_unlock(&mce_sysfs_mutex);
+
return ret;
}
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 330b8462d426..48179928ff38 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -339,7 +339,7 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
return -EINVAL;
ret = load_microcode_amd(true, x86_family(cpuid_1_eax), desc.data, desc.size);
- if (ret != UCODE_OK)
+ if (ret > UCODE_UPDATED)
return -EINVAL;
return 0;
@@ -498,7 +498,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
return patch_size;
}
-static int apply_microcode_amd(int cpu)
+static enum ucode_state apply_microcode_amd(int cpu)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
struct microcode_amd *mc_amd;
@@ -512,7 +512,7 @@ static int apply_microcode_amd(int cpu)
p = find_patch(cpu);
if (!p)
- return 0;
+ return UCODE_NFOUND;
mc_amd = p->data;
uci->mc = p->data;
@@ -523,13 +523,13 @@ static int apply_microcode_amd(int cpu)
if (rev >= mc_amd->hdr.patch_id) {
c->microcode = rev;
uci->cpu_sig.rev = rev;
- return 0;
+ return UCODE_OK;
}
if (__apply_microcode_amd(mc_amd)) {
pr_err("CPU%d: update failed for patch_level=0x%08x\n",
cpu, mc_amd->hdr.patch_id);
- return -1;
+ return UCODE_ERROR;
}
pr_info("CPU%d: new patch_level=0x%08x\n", cpu,
mc_amd->hdr.patch_id);
@@ -537,7 +537,7 @@ static int apply_microcode_amd(int cpu)
uci->cpu_sig.rev = mc_amd->hdr.patch_id;
c->microcode = mc_amd->hdr.patch_id;
- return 0;
+ return UCODE_UPDATED;
}
static int install_equiv_cpu_table(const u8 *buf)
@@ -683,27 +683,35 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data,
static enum ucode_state
load_microcode_amd(bool save, u8 family, const u8 *data, size_t size)
{
+ struct ucode_patch *p;
enum ucode_state ret;
/* free old equiv table */
free_equiv_cpu_table();
ret = __load_microcode_amd(family, data, size);
-
- if (ret != UCODE_OK)
+ if (ret != UCODE_OK) {
cleanup();
+ return ret;
+ }
-#ifdef CONFIG_X86_32
- /* save BSP's matching patch for early load */
- if (save) {
- struct ucode_patch *p = find_patch(0);
- if (p) {
- memset(amd_ucode_patch, 0, PATCH_MAX_SIZE);
- memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data),
- PATCH_MAX_SIZE));
- }
+ p = find_patch(0);
+ if (!p) {
+ return ret;
+ } else {
+ if (boot_cpu_data.microcode == p->patch_id)
+ return ret;
+
+ ret = UCODE_NEW;
}
-#endif
+
+ /* save BSP's matching patch for early load */
+ if (!save)
+ return ret;
+
+ memset(amd_ucode_patch, 0, PATCH_MAX_SIZE);
+ memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), PATCH_MAX_SIZE));
+
return ret;
}
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 319dd65f98a2..10c4fc2c91f8 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -22,13 +22,16 @@
#define pr_fmt(fmt) "microcode: " fmt
#include <linux/platform_device.h>
+#include <linux/stop_machine.h>
#include <linux/syscore_ops.h>
#include <linux/miscdevice.h>
#include <linux/capability.h>
#include <linux/firmware.h>
#include <linux/kernel.h>
+#include <linux/delay.h>
#include <linux/mutex.h>
#include <linux/cpu.h>
+#include <linux/nmi.h>
#include <linux/fs.h>
#include <linux/mm.h>
@@ -64,6 +67,11 @@ LIST_HEAD(microcode_cache);
*/
static DEFINE_MUTEX(microcode_mutex);
+/*
+ * Serialize late loading so that CPUs get updated one-by-one.
+ */
+static DEFINE_SPINLOCK(update_lock);
+
struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
struct cpu_info_ctx {
@@ -373,26 +381,23 @@ static int collect_cpu_info(int cpu)
return ret;
}
-struct apply_microcode_ctx {
- int err;
-};
-
static void apply_microcode_local(void *arg)
{
- struct apply_microcode_ctx *ctx = arg;
+ enum ucode_state *err = arg;
- ctx->err = microcode_ops->apply_microcode(smp_processor_id());
+ *err = microcode_ops->apply_microcode(smp_processor_id());
}
static int apply_microcode_on_target(int cpu)
{
- struct apply_microcode_ctx ctx = { .err = 0 };
+ enum ucode_state err;
int ret;
- ret = smp_call_function_single(cpu, apply_microcode_local, &ctx, 1);
- if (!ret)
- ret = ctx.err;
-
+ ret = smp_call_function_single(cpu, apply_microcode_local, &err, 1);
+ if (!ret) {
+ if (err == UCODE_ERROR)
+ ret = 1;
+ }
return ret;
}
@@ -489,31 +494,124 @@ static void __exit microcode_dev_exit(void)
/* fake device for request_firmware */
static struct platform_device *microcode_pdev;
-static int reload_for_cpu(int cpu)
+/*
+ * Late loading dance. Why the heavy-handed stomp_machine effort?
+ *
+ * - HT siblings must be idle and not execute other code while the other sibling
+ * is loading microcode in order to avoid any negative interactions caused by
+ * the loading.
+ *
+ * - In addition, microcode update on the cores must be serialized until this
+ * requirement can be relaxed in the future. Right now, this is conservative
+ * and good.
+ */
+#define SPINUNIT 100 /* 100 nsec */
+
+static int check_online_cpus(void)
{
- struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
- enum ucode_state ustate;
- int err = 0;
+ if (num_online_cpus() == num_present_cpus())
+ return 0;
- if (!uci->valid)
- return err;
+ pr_err("Not all CPUs online, aborting microcode update.\n");
- ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev, true);
- if (ustate == UCODE_OK)
- apply_microcode_on_target(cpu);
- else
- if (ustate == UCODE_ERROR)
- err = -EINVAL;
- return err;
+ return -EINVAL;
+}
+
+static atomic_t late_cpus_in;
+static atomic_t late_cpus_out;
+
+static int __wait_for_cpus(atomic_t *t, long long timeout)
+{
+ int all_cpus = num_online_cpus();
+
+ atomic_inc(t);
+
+ while (atomic_read(t) < all_cpus) {
+ if (timeout < SPINUNIT) {
+ pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n",
+ all_cpus - atomic_read(t));
+ return 1;
+ }
+
+ ndelay(SPINUNIT);
+ timeout -= SPINUNIT;
+
+ touch_nmi_watchdog();
+ }
+ return 0;
+}
+
+/*
+ * Returns:
+ * < 0 - on error
+ * 0 - no update done
+ * 1 - microcode was updated
+ */
+static int __reload_late(void *info)
+{
+ int cpu = smp_processor_id();
+ enum ucode_state err;
+ int ret = 0;
+
+ /*
+ * Wait for all CPUs to arrive. A load will not be attempted unless all
+ * CPUs show up.
+ * */
+ if (__wait_for_cpus(&late_cpus_in, NSEC_PER_SEC))
+ return -1;
+
+ spin_lock(&update_lock);
+ apply_microcode_local(&err);
+ spin_unlock(&update_lock);
+
+ if (err > UCODE_NFOUND) {
+ pr_warn("Error reloading microcode on CPU %d\n", cpu);
+ return -1;
+ /* siblings return UCODE_OK because their engine got updated already */
+ } else if (err == UCODE_UPDATED || err == UCODE_OK) {
+ ret = 1;
+ } else {
+ return ret;
+ }
+
+ /*
+ * Increase the wait timeout to a safe value here since we're
+ * serializing the microcode update and that could take a while on a
+ * large number of CPUs. And that is fine as the *actual* timeout will
+ * be determined by the last CPU finished updating and thus cut short.
+ */
+ if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC * num_online_cpus()))
+ panic("Timeout during microcode update!\n");
+
+ return ret;
+}
+
+/*
+ * Reload microcode late on all CPUs. Wait for a sec until they
+ * all gather together.
+ */
+static int microcode_reload_late(void)
+{
+ int ret;
+
+ atomic_set(&late_cpus_in, 0);
+ atomic_set(&late_cpus_out, 0);
+
+ ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask);
+ if (ret > 0)
+ microcode_check();
+
+ return ret;
}
static ssize_t reload_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t size)
{
+ enum ucode_state tmp_ret = UCODE_OK;
+ int bsp = boot_cpu_data.cpu_index;
unsigned long val;
- int cpu;
- ssize_t ret = 0, tmp_ret;
+ ssize_t ret = 0;
ret = kstrtoul(buf, 0, &val);
if (ret)
@@ -522,23 +620,24 @@ static ssize_t reload_store(struct device *dev,
if (val != 1)
return size;
+ tmp_ret = microcode_ops->request_microcode_fw(bsp, &microcode_pdev->dev, true);
+ if (tmp_ret != UCODE_NEW)
+ return size;
+
get_online_cpus();
- mutex_lock(&microcode_mutex);
- for_each_online_cpu(cpu) {
- tmp_ret = reload_for_cpu(cpu);
- if (tmp_ret != 0)
- pr_warn("Error reloading microcode on CPU %d\n", cpu);
- /* save retval of the first encountered reload error */
- if (!ret)
- ret = tmp_ret;
- }
- if (!ret)
- perf_check_microcode();
+ ret = check_online_cpus();
+ if (ret)
+ goto put;
+
+ mutex_lock(&microcode_mutex);
+ ret = microcode_reload_late();
mutex_unlock(&microcode_mutex);
+
+put:
put_online_cpus();
- if (!ret)
+ if (ret >= 0)
ret = size;
return ret;
@@ -606,10 +705,8 @@ static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw)
if (system_state != SYSTEM_RUNNING)
return UCODE_NFOUND;
- ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev,
- refresh_fw);
-
- if (ustate == UCODE_OK) {
+ ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev, refresh_fw);
+ if (ustate == UCODE_NEW) {
pr_debug("CPU%d updated upon init\n", cpu);
apply_microcode_on_target(cpu);
}
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index a15db2b4e0d6..32b8e5724f96 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -589,6 +589,23 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
if (!mc)
return 0;
+ /*
+ * Save us the MSR write below - which is a particular expensive
+ * operation - when the other hyperthread has updated the microcode
+ * already.
+ */
+ rev = intel_get_microcode_revision();
+ if (rev >= mc->hdr.rev) {
+ uci->cpu_sig.rev = rev;
+ return UCODE_OK;
+ }
+
+ /*
+ * Writeback and invalidate caches before updating microcode to avoid
+ * internal issues depending on what the microcode is updating.
+ */
+ native_wbinvd();
+
/* write microcode via MSR 0x79 */
native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
@@ -772,27 +789,44 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
return 0;
}
-static int apply_microcode_intel(int cpu)
+static enum ucode_state apply_microcode_intel(int cpu)
{
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
struct microcode_intel *mc;
- struct ucode_cpu_info *uci;
- struct cpuinfo_x86 *c;
static int prev_rev;
u32 rev;
/* We should bind the task to the CPU */
if (WARN_ON(raw_smp_processor_id() != cpu))
- return -1;
+ return UCODE_ERROR;
- uci = ucode_cpu_info + cpu;
- mc = uci->mc;
+ /* Look for a newer patch in our cache: */
+ mc = find_patch(uci);
if (!mc) {
- /* Look for a newer patch in our cache: */
- mc = find_patch(uci);
+ mc = uci->mc;
if (!mc)
- return 0;
+ return UCODE_NFOUND;
}
+ /*
+ * Save us the MSR write below - which is a particular expensive
+ * operation - when the other hyperthread has updated the microcode
+ * already.
+ */
+ rev = intel_get_microcode_revision();
+ if (rev >= mc->hdr.rev) {
+ uci->cpu_sig.rev = rev;
+ c->microcode = rev;
+ return UCODE_OK;
+ }
+
+ /*
+ * Writeback and invalidate caches before updating microcode to avoid
+ * internal issues depending on what the microcode is updating.
+ */
+ native_wbinvd();
+
/* write microcode via MSR 0x79 */
wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
@@ -801,7 +835,7 @@ static int apply_microcode_intel(int cpu)
if (rev != mc->hdr.rev) {
pr_err("CPU%d update to revision 0x%x failed\n",
cpu, mc->hdr.rev);
- return -1;
+ return UCODE_ERROR;
}
if (rev != prev_rev) {
@@ -813,12 +847,10 @@ static int apply_microcode_intel(int cpu)
prev_rev = rev;
}
- c = &cpu_data(cpu);
-
uci->cpu_sig.rev = rev;
c->microcode = rev;
- return 0;
+ return UCODE_UPDATED;
}
static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
@@ -830,6 +862,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
unsigned int leftover = size;
unsigned int curr_mc_size = 0, new_mc_size = 0;
unsigned int csig, cpf;
+ enum ucode_state ret = UCODE_OK;
while (leftover) {
struct microcode_header_intel mc_header;
@@ -871,6 +904,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
new_mc = mc;
new_mc_size = mc_size;
mc = NULL; /* trigger new vmalloc */
+ ret = UCODE_NEW;
}
ucode_ptr += mc_size;
@@ -900,7 +934,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
cpu, new_rev, uci->cpu_sig.rev);
- return UCODE_OK;
+ return ret;
}
static int get_ucode_fw(void *to, const void *from, size_t n)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 04a625f0fcda..0f545b3cf926 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -23,6 +23,7 @@
#include <asm/nops.h>
#include "../entry/calling.h"
#include <asm/export.h>
+#include <asm/nospec-branch.h>
#ifdef CONFIG_PARAVIRT
#include <asm/asm-offsets.h>
@@ -134,6 +135,7 @@ ENTRY(secondary_startup_64)
/* Ensure I am executing from virtual addresses */
movq $1f, %rax
+ ANNOTATE_RETPOLINE_SAFE
jmp *%rax
1:
UNWIND_HINT_EMPTY
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 2f723301eb58..38deafebb21b 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -23,7 +23,7 @@
/*
* this changes the io permissions bitmap in the current task.
*/
-asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
+SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on)
{
struct thread_struct *t = &current->thread;
struct tss_struct *tss;
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index bd36f3c33cd0..0715f827607c 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -1168,10 +1168,18 @@ NOKPROBE_SYMBOL(longjmp_break_handler);
bool arch_within_kprobe_blacklist(unsigned long addr)
{
+ bool is_in_entry_trampoline_section = false;
+
+#ifdef CONFIG_X86_64
+ is_in_entry_trampoline_section =
+ (addr >= (unsigned long)__entry_trampoline_start &&
+ addr < (unsigned long)__entry_trampoline_end);
+#endif
return (addr >= (unsigned long)__kprobes_text_start &&
addr < (unsigned long)__kprobes_text_end) ||
(addr >= (unsigned long)__entry_text_start &&
- addr < (unsigned long)__entry_text_end);
+ addr < (unsigned long)__entry_text_end) ||
+ is_in_entry_trampoline_section;
}
int __init arch_init_kprobes(void)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 4e37d1a851a6..bc1a27280c4b 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -49,7 +49,7 @@
static int kvmapf = 1;
-static int parse_no_kvmapf(char *arg)
+static int __init parse_no_kvmapf(char *arg)
{
kvmapf = 0;
return 0;
@@ -58,7 +58,7 @@ static int parse_no_kvmapf(char *arg)
early_param("no-kvmapf", parse_no_kvmapf);
static int steal_acc = 1;
-static int parse_no_stealacc(char *arg)
+static int __init parse_no_stealacc(char *arg)
{
steal_acc = 0;
return 0;
@@ -67,7 +67,7 @@ static int parse_no_stealacc(char *arg)
early_param("no-steal-acc", parse_no_stealacc);
static int kvmclock_vsyscall = 1;
-static int parse_no_kvmclock_vsyscall(char *arg)
+static int __init parse_no_kvmclock_vsyscall(char *arg)
{
kvmclock_vsyscall = 0;
return 0;
@@ -341,10 +341,10 @@ static void kvm_guest_cpu_init(void)
#endif
pa |= KVM_ASYNC_PF_ENABLED;
- /* Async page fault support for L1 hypervisor is optional */
- if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN,
- (pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0xffffffff, pa >> 32) < 0)
- wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
+ if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
+ pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
+
+ wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
__this_cpu_write(apf_reason.enabled, 1);
printk(KERN_INFO"KVM setup async PF for cpu %d\n",
smp_processor_id());
@@ -545,7 +545,8 @@ static void __init kvm_guest_init(void)
pv_time_ops.steal_clock = kvm_steal_clock;
}
- if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH))
+ if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
+ !kvm_para_has_feature(KVM_FEATURE_STEAL_TIME))
pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others;
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
@@ -633,7 +634,8 @@ static __init int kvm_setup_pv_tlb_flush(void)
{
int cpu;
- if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) {
+ if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
+ !kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
for_each_possible_cpu(cpu) {
zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu),
GFP_KERNEL, cpu_to_node(cpu));
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 1ae67e982af7..4c616be28506 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1204,20 +1204,13 @@ void __init setup_arch(char **cmdline_p)
kasan_init();
-#ifdef CONFIG_X86_32
- /* sync back kernel address range */
- clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
- swapper_pg_dir + KERNEL_PGD_BOUNDARY,
- KERNEL_PGD_PTRS);
-
/*
- * sync back low identity map too. It is used for example
- * in the 32-bit EFI stub.
+ * Sync back kernel address range.
+ *
+ * FIXME: Can the later sync in setup_cpu_entry_areas() replace
+ * this call?
*/
- clone_pgd_range(initial_page_table,
- swapper_pg_dir + KERNEL_PGD_BOUNDARY,
- min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
-#endif
+ sync_initial_page_table();
tboot_probe();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 497aa766fab3..ea554f812ee1 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -287,24 +287,15 @@ void __init setup_per_cpu_areas(void)
/* Setup cpu initialized, callin, callout masks */
setup_cpu_local_masks();
-#ifdef CONFIG_X86_32
/*
* Sync back kernel address range again. We already did this in
* setup_arch(), but percpu data also needs to be available in
* the smpboot asm. We can't reliably pick up percpu mappings
* using vmalloc_fault(), because exception dispatch needs
* percpu data.
+ *
+ * FIXME: Can the later sync in setup_cpu_entry_areas() replace
+ * this call?
*/
- clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
- swapper_pg_dir + KERNEL_PGD_BOUNDARY,
- KERNEL_PGD_PTRS);
-
- /*
- * sync back low identity map too. It is used for example
- * in the 32-bit EFI stub.
- */
- clone_pgd_range(initial_page_table,
- swapper_pg_dir + KERNEL_PGD_BOUNDARY,
- min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
-#endif
+ sync_initial_page_table();
}
diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index ac057f9b0763..0d930d8987cc 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -43,6 +43,13 @@ static inline void signal_compat_build_tests(void)
BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields) != 3 * sizeof(int));
#define CHECK_CSI_OFFSET(name) BUILD_BUG_ON(_sifields_offset != offsetof(compat_siginfo_t, _sifields.name))
+ BUILD_BUG_ON(offsetof(siginfo_t, si_signo) != 0);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_errno) != 4);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_code) != 8);
+
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_signo) != 0);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_errno) != 4);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_code) != 8);
/*
* Ensure that the size of each si_field never changes.
* If it does, it is a sign that the
@@ -63,36 +70,94 @@ static inline void signal_compat_build_tests(void)
CHECK_CSI_SIZE (_kill, 2*sizeof(int));
CHECK_SI_SIZE (_kill, 2*sizeof(int));
+ BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x10);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_uid) != 0x14);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pid) != 0xC);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_uid) != 0x10);
+
CHECK_CSI_OFFSET(_timer);
CHECK_CSI_SIZE (_timer, 3*sizeof(int));
CHECK_SI_SIZE (_timer, 6*sizeof(int));
+ BUILD_BUG_ON(offsetof(siginfo_t, si_tid) != 0x10);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_overrun) != 0x14);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_value) != 0x18);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_tid) != 0x0C);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_overrun) != 0x10);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_value) != 0x14);
+
CHECK_CSI_OFFSET(_rt);
CHECK_CSI_SIZE (_rt, 3*sizeof(int));
CHECK_SI_SIZE (_rt, 4*sizeof(int));
+ BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x10);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_uid) != 0x14);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_value) != 0x18);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pid) != 0x0C);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_uid) != 0x10);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_value) != 0x14);
+
CHECK_CSI_OFFSET(_sigchld);
CHECK_CSI_SIZE (_sigchld, 5*sizeof(int));
CHECK_SI_SIZE (_sigchld, 8*sizeof(int));
+ BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x10);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_uid) != 0x14);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_status) != 0x18);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_utime) != 0x20);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_stime) != 0x28);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pid) != 0x0C);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_uid) != 0x10);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_status) != 0x14);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_utime) != 0x18);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_stime) != 0x1C);
+
#ifdef CONFIG_X86_X32_ABI
CHECK_CSI_OFFSET(_sigchld_x32);
CHECK_CSI_SIZE (_sigchld_x32, 7*sizeof(int));
/* no _sigchld_x32 in the generic siginfo_t */
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields._sigchld_x32._utime) != 0x18);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields._sigchld_x32._stime) != 0x20);
#endif
CHECK_CSI_OFFSET(_sigfault);
CHECK_CSI_SIZE (_sigfault, 4*sizeof(int));
CHECK_SI_SIZE (_sigfault, 8*sizeof(int));
+ BUILD_BUG_ON(offsetof(siginfo_t, si_addr) != 0x10);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr) != 0x0C);
+
+ BUILD_BUG_ON(offsetof(siginfo_t, si_addr_lsb) != 0x18);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr_lsb) != 0x10);
+
+ BUILD_BUG_ON(offsetof(siginfo_t, si_lower) != 0x20);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_upper) != 0x28);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_lower) != 0x14);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_upper) != 0x18);
+
+ BUILD_BUG_ON(offsetof(siginfo_t, si_pkey) != 0x20);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pkey) != 0x14);
+
CHECK_CSI_OFFSET(_sigpoll);
CHECK_CSI_SIZE (_sigpoll, 2*sizeof(int));
CHECK_SI_SIZE (_sigpoll, 4*sizeof(int));
+ BUILD_BUG_ON(offsetof(siginfo_t, si_band) != 0x10);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_fd) != 0x18);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_band) != 0x0C);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_fd) != 0x10);
+
CHECK_CSI_OFFSET(_sigsys);
CHECK_CSI_SIZE (_sigsys, 3*sizeof(int));
CHECK_SI_SIZE (_sigsys, 4*sizeof(int));
+ BUILD_BUG_ON(offsetof(siginfo_t, si_call_addr) != 0x10);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_syscall) != 0x18);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_arch) != 0x1C);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_call_addr) != 0x0C);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_syscall) != 0x10);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_arch) != 0x14);
+
/* any new si_fields should be added here */
}
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 1f9188f5357c..feb28fee6cea 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -5,7 +5,6 @@
#include <asm/unwind.h>
#include <asm/orc_types.h>
#include <asm/orc_lookup.h>
-#include <asm/sections.h>
#define orc_warn(fmt, ...) \
printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__)
@@ -148,7 +147,7 @@ static struct orc_entry *orc_find(unsigned long ip)
}
/* vmlinux .init slow lookup: */
- if (ip >= (unsigned long)_sinittext && ip < (unsigned long)_einittext)
+ if (init_kernel_text(ip))
return __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
__stop_orc_unwind_ip - __start_orc_unwind_ip, ip);
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 5edb27f1a2c4..9d0b5af7db91 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -727,7 +727,8 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
return;
check_vip:
- if (VEFLAGS & X86_EFLAGS_VIP) {
+ if ((VEFLAGS & (X86_EFLAGS_VIP | X86_EFLAGS_VIF)) ==
+ (X86_EFLAGS_VIP | X86_EFLAGS_VIF)) {
save_v86_state(regs, VM86_STI);
return;
}
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 9b138a06c1a4..b854ebf5851b 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -118,9 +118,11 @@ SECTIONS
#ifdef CONFIG_X86_64
. = ALIGN(PAGE_SIZE);
+ VMLINUX_SYMBOL(__entry_trampoline_start) = .;
_entry_trampoline = .;
*(.entry_trampoline)
. = ALIGN(PAGE_SIZE);
+ VMLINUX_SYMBOL(__entry_trampoline_end) = .;
ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
#endif
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index a0c5a69bc7c4..b671fc2d0422 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -607,7 +607,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
(1 << KVM_FEATURE_PV_EOI) |
(1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
(1 << KVM_FEATURE_PV_UNHALT) |
- (1 << KVM_FEATURE_PV_TLB_FLUSH);
+ (1 << KVM_FEATURE_PV_TLB_FLUSH) |
+ (1 << KVM_FEATURE_ASYNC_PF_VMEXIT);
if (sched_info_on())
entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 924ac8ce9d50..391dda8d43b7 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2002,14 +2002,13 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
{
- struct kvm_lapic *apic;
+ struct kvm_lapic *apic = vcpu->arch.apic;
int i;
- apic_debug("%s\n", __func__);
+ if (!apic)
+ return;
- ASSERT(vcpu);
- apic = vcpu->arch.apic;
- ASSERT(apic != NULL);
+ apic_debug("%s\n", __func__);
/* Stop the timer in case it's a reset to an active apic */
hrtimer_cancel(&apic->lapic_timer.timer);
@@ -2165,7 +2164,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
*/
vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
- kvm_lapic_reset(vcpu, false);
kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
return 0;
@@ -2569,7 +2567,6 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
pe = xchg(&apic->pending_events, 0);
if (test_bit(KVM_APIC_INIT, &pe)) {
- kvm_lapic_reset(vcpu, true);
kvm_vcpu_reset(vcpu, true);
if (kvm_vcpu_is_bsp(apic->vcpu))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 46ff304140c7..763bb3bade63 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2770,8 +2770,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
else
pte_access &= ~ACC_WRITE_MASK;
+ if (!kvm_is_mmio_pfn(pfn))
+ spte |= shadow_me_mask;
+
spte |= (u64)pfn << PAGE_SHIFT;
- spte |= shadow_me_mask;
if (pte_access & ACC_WRITE_MASK) {
@@ -3029,7 +3031,7 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
return RET_PF_RETRY;
}
- return -EFAULT;
+ return RET_PF_EMULATE;
}
static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b3e488a74828..be9c839e2c89 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -49,6 +49,7 @@
#include <asm/debugreg.h>
#include <asm/kvm_para.h>
#include <asm/irq_remapping.h>
+#include <asm/microcode.h>
#include <asm/nospec-branch.h>
#include <asm/virtext.h>
@@ -178,6 +179,8 @@ struct vcpu_svm {
uint64_t sysenter_eip;
uint64_t tsc_aux;
+ u64 msr_decfg;
+
u64 next_rip;
u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
@@ -300,6 +303,8 @@ module_param(vgif, int, 0444);
static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
module_param(sev, int, 0444);
+static u8 rsm_ins_bytes[] = "\x0f\xaa";
+
static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
static void svm_complete_interrupts(struct vcpu_svm *svm);
@@ -1383,6 +1388,7 @@ static void init_vmcb(struct vcpu_svm *svm)
set_intercept(svm, INTERCEPT_SKINIT);
set_intercept(svm, INTERCEPT_WBINVD);
set_intercept(svm, INTERCEPT_XSETBV);
+ set_intercept(svm, INTERCEPT_RSM);
if (!kvm_mwait_in_guest()) {
set_intercept(svm, INTERCEPT_MONITOR);
@@ -1902,6 +1908,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
u32 dummy;
u32 eax = 1;
+ vcpu->arch.microcode_version = 0x01000065;
svm->spec_ctrl = 0;
if (!init_event) {
@@ -3699,6 +3706,12 @@ static int emulate_on_interception(struct vcpu_svm *svm)
return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
}
+static int rsm_interception(struct vcpu_svm *svm)
+{
+ return x86_emulate_instruction(&svm->vcpu, 0, 0,
+ rsm_ins_bytes, 2) == EMULATE_DONE;
+}
+
static int rdpmc_interception(struct vcpu_svm *svm)
{
int err;
@@ -3860,6 +3873,22 @@ static int cr8_write_interception(struct vcpu_svm *svm)
return 0;
}
+static int svm_get_msr_feature(struct kvm_msr_entry *msr)
+{
+ msr->data = 0;
+
+ switch (msr->index) {
+ case MSR_F10H_DECFG:
+ if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC))
+ msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE;
+ break;
+ default:
+ return 1;
+ }
+
+ return 0;
+}
+
static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3935,9 +3964,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = svm->spec_ctrl;
break;
- case MSR_IA32_UCODE_REV:
- msr_info->data = 0x01000065;
- break;
case MSR_F15H_IC_CFG: {
int family, model;
@@ -3955,6 +3981,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = 0x1E;
}
break;
+ case MSR_F10H_DECFG:
+ msr_info->data = svm->msr_decfg;
+ break;
default:
return kvm_get_msr_common(vcpu, msr_info);
}
@@ -4133,6 +4162,24 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
case MSR_VM_IGNNE:
vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
break;
+ case MSR_F10H_DECFG: {
+ struct kvm_msr_entry msr_entry;
+
+ msr_entry.index = msr->index;
+ if (svm_get_msr_feature(&msr_entry))
+ return 1;
+
+ /* Check the supported bits */
+ if (data & ~msr_entry.data)
+ return 1;
+
+ /* Don't allow the guest to change a bit, #GP */
+ if (!msr->host_initiated && (data ^ msr_entry.data))
+ return 1;
+
+ svm->msr_decfg = data;
+ break;
+ }
case MSR_IA32_APICBASE:
if (kvm_vcpu_apicv_active(vcpu))
avic_update_vapic_bar(to_svm(vcpu), data);
@@ -4541,7 +4588,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_MWAIT] = mwait_interception,
[SVM_EXIT_XSETBV] = xsetbv_interception,
[SVM_EXIT_NPF] = npf_interception,
- [SVM_EXIT_RSM] = emulate_on_interception,
+ [SVM_EXIT_RSM] = rsm_interception,
[SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception,
[SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception,
};
@@ -5355,7 +5402,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
* being speculatively taken.
*/
if (svm->spec_ctrl)
- wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+ native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
asm volatile (
"push %%" _ASM_BP "; \n\t"
@@ -5464,11 +5511,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
* If the L02 MSR bitmap does not intercept the MSR, then we need to
* save it.
*/
- if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
- rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+ if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
+ svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
if (svm->spec_ctrl)
- wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+ native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
/* Eliminate branch target predictions from guest mode */
vmexit_fill_RSB();
@@ -6236,16 +6283,18 @@ e_free:
static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
+ void __user *measure = (void __user *)(uintptr_t)argp->data;
struct kvm_sev_info *sev = &kvm->arch.sev_info;
struct sev_data_launch_measure *data;
struct kvm_sev_launch_measure params;
+ void __user *p = NULL;
void *blob = NULL;
int ret;
if (!sev_guest(kvm))
return -ENOTTY;
- if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+ if (copy_from_user(&params, measure, sizeof(params)))
return -EFAULT;
data = kzalloc(sizeof(*data), GFP_KERNEL);
@@ -6256,17 +6305,13 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (!params.len)
goto cmd;
- if (params.uaddr) {
+ p = (void __user *)(uintptr_t)params.uaddr;
+ if (p) {
if (params.len > SEV_FW_BLOB_MAX_SIZE) {
ret = -EINVAL;
goto e_free;
}
- if (!access_ok(VERIFY_WRITE, params.uaddr, params.len)) {
- ret = -EFAULT;
- goto e_free;
- }
-
ret = -ENOMEM;
blob = kmalloc(params.len, GFP_KERNEL);
if (!blob)
@@ -6290,13 +6335,13 @@ cmd:
goto e_free_blob;
if (blob) {
- if (copy_to_user((void __user *)(uintptr_t)params.uaddr, blob, params.len))
+ if (copy_to_user(p, blob, params.len))
ret = -EFAULT;
}
done:
params.len = data->len;
- if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
+ if (copy_to_user(measure, &params, sizeof(params)))
ret = -EFAULT;
e_free_blob:
kfree(blob);
@@ -6597,7 +6642,7 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
struct page **pages;
void *blob, *hdr;
unsigned long n;
- int ret;
+ int ret, offset;
if (!sev_guest(kvm))
return -ENOTTY;
@@ -6623,6 +6668,10 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (!data)
goto e_unpin_memory;
+ offset = params.guest_uaddr & (PAGE_SIZE - 1);
+ data->guest_address = __sme_page_pa(pages[0]) + offset;
+ data->guest_len = params.guest_len;
+
blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
if (IS_ERR(blob)) {
ret = PTR_ERR(blob);
@@ -6637,8 +6686,8 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
ret = PTR_ERR(hdr);
goto e_free_blob;
}
- data->trans_address = __psp_pa(blob);
- data->trans_len = params.trans_len;
+ data->hdr_address = __psp_pa(hdr);
+ data->hdr_len = params.hdr_len;
data->handle = sev->handle;
ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);
@@ -6821,6 +6870,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.vcpu_unblocking = svm_vcpu_unblocking,
.update_bp_intercept = update_bp_intercept,
+ .get_msr_feature = svm_get_msr_feature,
.get_msr = svm_get_msr,
.set_msr = svm_set_msr,
.get_segment_base = svm_get_segment_base,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3dec126aa302..051dab74e4e9 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -51,6 +51,7 @@
#include <asm/apic.h>
#include <asm/irq_remapping.h>
#include <asm/mmu_context.h>
+#include <asm/microcode.h>
#include <asm/nospec-branch.h>
#include "trace.h"
@@ -3226,6 +3227,11 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
return !(val & ~valid_bits);
}
+static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
+{
+ return 1;
+}
+
/*
* Reads an msr value (of 'msr_index') into 'pdata'.
* Returns 0 on success, non-0 otherwise.
@@ -4485,7 +4491,8 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
SECONDARY_EXEC_DESC);
hw_cr4 &= ~X86_CR4_UMIP;
- } else
+ } else if (!is_guest_mode(vcpu) ||
+ !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC))
vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
SECONDARY_EXEC_DESC);
@@ -5765,6 +5772,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vmx->rmode.vm86_active = 0;
vmx->spec_ctrl = 0;
+ vcpu->arch.microcode_version = 0x100000000ULL;
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
kvm_set_cr8(vcpu, 0);
@@ -9452,7 +9460,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
* being speculatively taken.
*/
if (vmx->spec_ctrl)
- wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+ native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
vmx->__launched = vmx->loaded_vmcs->launched;
asm(
@@ -9587,11 +9595,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
* If the L02 MSR bitmap does not intercept the MSR, then we need to
* save it.
*/
- if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
- rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+ if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
+ vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
if (vmx->spec_ctrl)
- wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+ native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
/* Eliminate branch target predictions from guest mode */
vmexit_fill_RSB();
@@ -11199,7 +11207,12 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
if (ret)
return ret;
- if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
+ /*
+ * If we're entering a halted L2 vcpu and the L2 vcpu won't be woken
+ * by event injection, halt vcpu.
+ */
+ if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) &&
+ !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK))
return kvm_vcpu_halt(vcpu);
vmx->nested.nested_run_pending = 1;
@@ -12290,6 +12303,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.vcpu_put = vmx_vcpu_put,
.update_bp_intercept = update_exception_bitmap,
+ .get_msr_feature = vmx_get_msr_feature,
.get_msr = vmx_get_msr,
.set_msr = vmx_set_msr,
.get_segment_base = vmx_get_segment_base,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c8a0b545ac20..18b5ca7a3197 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1049,6 +1049,45 @@ static u32 emulated_msrs[] = {
static unsigned num_emulated_msrs;
+/*
+ * List of msr numbers which are used to expose MSR-based features that
+ * can be used by a hypervisor to validate requested CPU features.
+ */
+static u32 msr_based_features[] = {
+ MSR_F10H_DECFG,
+ MSR_IA32_UCODE_REV,
+};
+
+static unsigned int num_msr_based_features;
+
+static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
+{
+ switch (msr->index) {
+ case MSR_IA32_UCODE_REV:
+ rdmsrl(msr->index, msr->data);
+ break;
+ default:
+ if (kvm_x86_ops->get_msr_feature(msr))
+ return 1;
+ }
+ return 0;
+}
+
+static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
+{
+ struct kvm_msr_entry msr;
+ int r;
+
+ msr.index = index;
+ r = kvm_get_msr_feature(&msr);
+ if (r)
+ return r;
+
+ *data = msr.data;
+
+ return 0;
+}
+
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
{
if (efer & efer_reserved_bits)
@@ -2222,7 +2261,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
switch (msr) {
case MSR_AMD64_NB_CFG:
- case MSR_IA32_UCODE_REV:
case MSR_IA32_UCODE_WRITE:
case MSR_VM_HSAVE_PA:
case MSR_AMD64_PATCH_LOADER:
@@ -2230,6 +2268,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_AMD64_DC_CFG:
break;
+ case MSR_IA32_UCODE_REV:
+ if (msr_info->host_initiated)
+ vcpu->arch.microcode_version = data;
+ break;
case MSR_EFER:
return set_efer(vcpu, data);
case MSR_K7_HWCR:
@@ -2525,7 +2567,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = 0;
break;
case MSR_IA32_UCODE_REV:
- msr_info->data = 0x100000000ULL;
+ msr_info->data = vcpu->arch.microcode_version;
break;
case MSR_MTRRcap:
case 0x200 ... 0x2ff:
@@ -2680,13 +2722,11 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
int (*do_msr)(struct kvm_vcpu *vcpu,
unsigned index, u64 *data))
{
- int i, idx;
+ int i;
- idx = srcu_read_lock(&vcpu->kvm->srcu);
for (i = 0; i < msrs->nmsrs; ++i)
if (do_msr(vcpu, entries[i].index, &entries[i].data))
break;
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
return i;
}
@@ -2785,6 +2825,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SET_BOOT_CPU_ID:
case KVM_CAP_SPLIT_IRQCHIP:
case KVM_CAP_IMMEDIATE_EXIT:
+ case KVM_CAP_GET_MSR_FEATURES:
r = 1;
break;
case KVM_CAP_ADJUST_CLOCK:
@@ -2899,6 +2940,31 @@ long kvm_arch_dev_ioctl(struct file *filp,
goto out;
r = 0;
break;
+ case KVM_GET_MSR_FEATURE_INDEX_LIST: {
+ struct kvm_msr_list __user *user_msr_list = argp;
+ struct kvm_msr_list msr_list;
+ unsigned int n;
+
+ r = -EFAULT;
+ if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
+ goto out;
+ n = msr_list.nmsrs;
+ msr_list.nmsrs = num_msr_based_features;
+ if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
+ goto out;
+ r = -E2BIG;
+ if (n < msr_list.nmsrs)
+ goto out;
+ r = -EFAULT;
+ if (copy_to_user(user_msr_list->indices, &msr_based_features,
+ num_msr_based_features * sizeof(u32)))
+ goto out;
+ r = 0;
+ break;
+ }
+ case KVM_GET_MSRS:
+ r = msr_io(NULL, argp, do_get_msr_feature, 1);
+ break;
}
default:
r = -EINVAL;
@@ -3636,12 +3702,18 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = 0;
break;
}
- case KVM_GET_MSRS:
+ case KVM_GET_MSRS: {
+ int idx = srcu_read_lock(&vcpu->kvm->srcu);
r = msr_io(vcpu, argp, do_get_msr, 1);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
- case KVM_SET_MSRS:
+ }
+ case KVM_SET_MSRS: {
+ int idx = srcu_read_lock(&vcpu->kvm->srcu);
r = msr_io(vcpu, argp, do_set_msr, 0);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
+ }
case KVM_TPR_ACCESS_REPORTING: {
struct kvm_tpr_access_ctl tac;
@@ -4464,6 +4536,19 @@ static void kvm_init_msr_list(void)
j++;
}
num_emulated_msrs = j;
+
+ for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) {
+ struct kvm_msr_entry msr;
+
+ msr.index = msr_based_features[i];
+ if (kvm_get_msr_feature(&msr))
+ continue;
+
+ if (j < i)
+ msr_based_features[j] = msr_based_features[i];
+ j++;
+ }
+ num_msr_based_features = j;
}
static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
@@ -8017,6 +8102,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
{
+ kvm_lapic_reset(vcpu, init_event);
+
vcpu->arch.hflags = 0;
vcpu->arch.smi_pending = 0;
@@ -8460,10 +8547,8 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
return r;
}
- if (!size) {
- r = vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
- WARN_ON(r < 0);
- }
+ if (!size)
+ vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
return 0;
}
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 91e9700cc6dc..25a972c61b0a 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -28,7 +28,6 @@ lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
lib-$(CONFIG_RETPOLINE) += retpoline.o
-OBJECT_FILES_NON_STANDARD_retpoline.o :=y
obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 480edc3a5e03..c909961e678a 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -7,7 +7,6 @@
#include <asm/alternative-asm.h>
#include <asm/export.h>
#include <asm/nospec-branch.h>
-#include <asm/bitsperlong.h>
.macro THUNK reg
.section .text.__x86.indirect_thunk
@@ -47,58 +46,3 @@ GENERATE_THUNK(r13)
GENERATE_THUNK(r14)
GENERATE_THUNK(r15)
#endif
-
-/*
- * Fill the CPU return stack buffer.
- *
- * Each entry in the RSB, if used for a speculative 'ret', contains an
- * infinite 'pause; lfence; jmp' loop to capture speculative execution.
- *
- * This is required in various cases for retpoline and IBRS-based
- * mitigations for the Spectre variant 2 vulnerability. Sometimes to
- * eliminate potentially bogus entries from the RSB, and sometimes
- * purely to ensure that it doesn't get empty, which on some CPUs would
- * allow predictions from other (unwanted!) sources to be used.
- *
- * Google experimented with loop-unrolling and this turned out to be
- * the optimal version - two calls, each with their own speculation
- * trap should their return address end up getting used, in a loop.
- */
-.macro STUFF_RSB nr:req sp:req
- mov $(\nr / 2), %_ASM_BX
- .align 16
-771:
- call 772f
-773: /* speculation trap */
- pause
- lfence
- jmp 773b
- .align 16
-772:
- call 774f
-775: /* speculation trap */
- pause
- lfence
- jmp 775b
- .align 16
-774:
- dec %_ASM_BX
- jnz 771b
- add $((BITS_PER_LONG/8) * \nr), \sp
-.endm
-
-#define RSB_FILL_LOOPS 16 /* To avoid underflow */
-
-ENTRY(__fill_rsb)
- STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP
- ret
-END(__fill_rsb)
-EXPORT_SYMBOL_GPL(__fill_rsb)
-
-#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
-
-ENTRY(__clear_rsb)
- STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP
- ret
-END(__clear_rsb)
-EXPORT_SYMBOL_GPL(__clear_rsb)
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index b9283cc27622..476d810639a8 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -163,4 +163,10 @@ void __init setup_cpu_entry_areas(void)
for_each_possible_cpu(cpu)
setup_cpu_entry_area(cpu);
+
+ /*
+ * This is the last essential update to swapper_pgdir which needs
+ * to be synchronized to initial_page_table on 32bit.
+ */
+ sync_initial_page_table();
}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 800de815519c..25a30b5d6582 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -330,7 +330,7 @@ static noinline int vmalloc_fault(unsigned long address)
if (!pmd_k)
return -1;
- if (pmd_huge(*pmd_k))
+ if (pmd_large(*pmd_k))
return 0;
pte_k = pte_offset_kernel(pmd_k, address);
@@ -475,7 +475,7 @@ static noinline int vmalloc_fault(unsigned long address)
if (pud_none(*pud) || pud_pfn(*pud) != pud_pfn(*pud_ref))
BUG();
- if (pud_huge(*pud))
+ if (pud_large(*pud))
return 0;
pmd = pmd_offset(pud, address);
@@ -486,7 +486,7 @@ static noinline int vmalloc_fault(unsigned long address)
if (pmd_none(*pmd) || pmd_pfn(*pmd) != pmd_pfn(*pmd_ref))
BUG();
- if (pmd_huge(*pmd))
+ if (pmd_large(*pmd))
return 0;
pte_ref = pte_offset_kernel(pmd_ref, address);
@@ -1248,10 +1248,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
tsk = current;
mm = tsk->mm;
- /*
- * Detect and handle instructions that would cause a page fault for
- * both a tracked kernel page and a userspace page.
- */
prefetchw(&mm->mmap_sem);
if (unlikely(kmmio_fault(regs, address)))
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 79cb066f40c0..396e1f0151ac 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -453,6 +453,21 @@ static inline void permanent_kmaps_init(pgd_t *pgd_base)
}
#endif /* CONFIG_HIGHMEM */
+void __init sync_initial_page_table(void)
+{
+ clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
+ swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+ KERNEL_PGD_PTRS);
+
+ /*
+ * sync back low identity map too. It is used for example
+ * in the 32-bit EFI stub.
+ */
+ clone_pgd_range(initial_page_table,
+ swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+ min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
+}
+
void __init native_pagetable_init(void)
{
unsigned long pfn, va;
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
index 01f682cf77a8..40a6085063d6 100644
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -15,6 +15,7 @@
#include <asm/page.h>
#include <asm/processor-flags.h>
#include <asm/msr-index.h>
+#include <asm/nospec-branch.h>
.text
.code64
@@ -59,6 +60,7 @@ ENTRY(sme_encrypt_execute)
movq %rax, %r8 /* Workarea encryption routine */
addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */
+ ANNOTATE_RETPOLINE_SAFE
call *%rax /* Call the encryption routine */
pop %r12
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index ce38f165489b..631507f0c198 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -332,7 +332,7 @@ static void __init pti_clone_user_shared(void)
}
/*
- * Clone the ESPFIX P4D into the user space visinble page table
+ * Clone the ESPFIX P4D into the user space visible page table
*/
static void __init pti_setup_espfix64(void)
{
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
index 2c67bae6bb53..fb1df9488e98 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -79,7 +79,7 @@ static void intel_mid_power_off(void)
static void intel_mid_reboot(void)
{
- intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0);
+ intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 0);
}
static unsigned long __init intel_mid_calibrate_tsc(void)
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
index de53bd15df5a..24bb7598774e 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -102,7 +102,7 @@ ENTRY(startup_32)
* don't we'll eventually crash trying to execute encrypted
* instructions.
*/
- bt $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags
+ btl $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags
jnc .Ldone
movl $MSR_K8_SYSCFG, %ecx
rdmsr
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index c047f42552e1..3c2c2530737e 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1376,8 +1376,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
if (!xen_initial_domain()) {
add_preferred_console("xenboot", 0, NULL);
- add_preferred_console("tty", 0, NULL);
- add_preferred_console("hvc", 0, NULL);
if (pci_xen)
x86_init.pci.arch_init = pci_xen_init;
} else {
@@ -1410,6 +1408,10 @@ asmlinkage __visible void __init xen_start_kernel(void)
xen_boot_params_init_edd();
}
+
+ add_preferred_console("tty", 0, NULL);
+ add_preferred_console("hvc", 0, NULL);
+
#ifdef CONFIG_PCI
/* PCI BIOS service won't work from a PV guest. */
pci_probe &= ~PCI_PROBE_BIOS;
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index d9f96cc5d743..1d83152c761b 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -1,12 +1,15 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/tick.h>
+#include <linux/percpu-defs.h>
#include <xen/xen.h>
#include <xen/interface/xen.h>
#include <xen/grant_table.h>
#include <xen/events.h>
+#include <asm/cpufeatures.h>
+#include <asm/msr-index.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/page.h>
#include <asm/fixmap.h>
@@ -15,6 +18,8 @@
#include "mmu.h"
#include "pmu.h"
+static DEFINE_PER_CPU(u64, spec_ctrl);
+
void xen_arch_pre_suspend(void)
{
xen_save_time_memory_area();
@@ -35,6 +40,9 @@ void xen_arch_post_suspend(int cancelled)
static void xen_vcpu_notify_restore(void *data)
{
+ if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL))
+ wrmsrl(MSR_IA32_SPEC_CTRL, this_cpu_read(spec_ctrl));
+
/* Boot processor notified via generic timekeeping_resume() */
if (smp_processor_id() == 0)
return;
@@ -44,7 +52,15 @@ static void xen_vcpu_notify_restore(void *data)
static void xen_vcpu_notify_suspend(void *data)
{
+ u64 tmp;
+
tick_suspend_local();
+
+ if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL)) {
+ rdmsrl(MSR_IA32_SPEC_CTRL, tmp);
+ this_cpu_write(spec_ctrl, tmp);
+ wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+ }
}
void xen_arch_resume(void)