diff options
Diffstat (limited to 'arch/x86/entry/entry_32.S')
-rw-r--r-- | arch/x86/entry/entry_32.S | 422 |
1 files changed, 262 insertions, 160 deletions
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index f83ca5aa8b77..7e0560442538 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -172,7 +172,7 @@ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI .if \no_user_check == 0 /* coming from usermode? */ - testl $SEGMENT_RPL_MASK, PT_CS(%esp) + testl $USER_SEGMENT_RPL_MASK, PT_CS(%esp) jz .Lend_\@ .endif /* On user-cr3? */ @@ -205,64 +205,76 @@ #define CS_FROM_ENTRY_STACK (1 << 31) #define CS_FROM_USER_CR3 (1 << 30) #define CS_FROM_KERNEL (1 << 29) +#define CS_FROM_ESPFIX (1 << 28) .macro FIXUP_FRAME /* * The high bits of the CS dword (__csh) are used for CS_FROM_*. * Clear them in case hardware didn't do this for us. */ - andl $0x0000ffff, 3*4(%esp) + andl $0x0000ffff, 4*4(%esp) #ifdef CONFIG_VM86 - testl $X86_EFLAGS_VM, 4*4(%esp) + testl $X86_EFLAGS_VM, 5*4(%esp) jnz .Lfrom_usermode_no_fixup_\@ #endif - testl $SEGMENT_RPL_MASK, 3*4(%esp) + testl $USER_SEGMENT_RPL_MASK, 4*4(%esp) jnz .Lfrom_usermode_no_fixup_\@ - orl $CS_FROM_KERNEL, 3*4(%esp) + orl $CS_FROM_KERNEL, 4*4(%esp) /* * When we're here from kernel mode; the (exception) stack looks like: * - * 5*4(%esp) - <previous context> - * 4*4(%esp) - flags - * 3*4(%esp) - cs - * 2*4(%esp) - ip - * 1*4(%esp) - orig_eax - * 0*4(%esp) - gs / function + * 6*4(%esp) - <previous context> + * 5*4(%esp) - flags + * 4*4(%esp) - cs + * 3*4(%esp) - ip + * 2*4(%esp) - orig_eax + * 1*4(%esp) - gs / function + * 0*4(%esp) - fs * * Lets build a 5 entry IRET frame after that, such that struct pt_regs * is complete and in particular regs->sp is correct. This gives us - * the original 5 enties as gap: + * the original 6 enties as gap: * - * 12*4(%esp) - <previous context> - * 11*4(%esp) - gap / flags - * 10*4(%esp) - gap / cs - * 9*4(%esp) - gap / ip - * 8*4(%esp) - gap / orig_eax - * 7*4(%esp) - gap / gs / function - * 6*4(%esp) - ss - * 5*4(%esp) - sp - * 4*4(%esp) - flags - * 3*4(%esp) - cs - * 2*4(%esp) - ip - * 1*4(%esp) - orig_eax - * 0*4(%esp) - gs / function + * 14*4(%esp) - <previous context> + * 13*4(%esp) - gap / flags + * 12*4(%esp) - gap / cs + * 11*4(%esp) - gap / ip + * 10*4(%esp) - gap / orig_eax + * 9*4(%esp) - gap / gs / function + * 8*4(%esp) - gap / fs + * 7*4(%esp) - ss + * 6*4(%esp) - sp + * 5*4(%esp) - flags + * 4*4(%esp) - cs + * 3*4(%esp) - ip + * 2*4(%esp) - orig_eax + * 1*4(%esp) - gs / function + * 0*4(%esp) - fs */ pushl %ss # ss pushl %esp # sp (points at ss) - addl $6*4, (%esp) # point sp back at the previous context - pushl 6*4(%esp) # flags - pushl 6*4(%esp) # cs - pushl 6*4(%esp) # ip - pushl 6*4(%esp) # orig_eax - pushl 6*4(%esp) # gs / function + addl $7*4, (%esp) # point sp back at the previous context + pushl 7*4(%esp) # flags + pushl 7*4(%esp) # cs + pushl 7*4(%esp) # ip + pushl 7*4(%esp) # orig_eax + pushl 7*4(%esp) # gs / function + pushl 7*4(%esp) # fs .Lfrom_usermode_no_fixup_\@: .endm .macro IRET_FRAME + /* + * We're called with %ds, %es, %fs, and %gs from the interrupted + * frame, so we shouldn't use them. Also, we may be in ESPFIX + * mode and therefore have a nonzero SS base and an offset ESP, + * so any attempt to access the stack needs to use SS. (except for + * accesses through %esp, which automatically use SS.) + */ testl $CS_FROM_KERNEL, 1*4(%esp) jz .Lfinished_frame_\@ @@ -276,31 +288,40 @@ movl 5*4(%esp), %eax # (modified) regs->sp movl 4*4(%esp), %ecx # flags - movl %ecx, -4(%eax) + movl %ecx, %ss:-1*4(%eax) movl 3*4(%esp), %ecx # cs andl $0x0000ffff, %ecx - movl %ecx, -8(%eax) + movl %ecx, %ss:-2*4(%eax) movl 2*4(%esp), %ecx # ip - movl %ecx, -12(%eax) + movl %ecx, %ss:-3*4(%eax) movl 1*4(%esp), %ecx # eax - movl %ecx, -16(%eax) + movl %ecx, %ss:-4*4(%eax) popl %ecx - lea -16(%eax), %esp + lea -4*4(%eax), %esp popl %eax .Lfinished_frame_\@: .endm -.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0 +.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0 unwind_espfix=0 cld .if \skip_gs == 0 PUSH_GS .endif - FIXUP_FRAME pushl %fs + + pushl %eax + movl $(__KERNEL_PERCPU), %eax + movl %eax, %fs +.if \unwind_espfix > 0 + UNWIND_ESPFIX_STACK +.endif + popl %eax + + FIXUP_FRAME pushl %es pushl %ds pushl \pt_regs_ax @@ -313,8 +334,6 @@ movl $(__USER_DS), %edx movl %edx, %ds movl %edx, %es - movl $(__KERNEL_PERCPU), %edx - movl %edx, %fs .if \skip_gs == 0 SET_KERNEL_GS %edx .endif @@ -324,8 +343,8 @@ .endif .endm -.macro SAVE_ALL_NMI cr3_reg:req - SAVE_ALL +.macro SAVE_ALL_NMI cr3_reg:req unwind_espfix=0 + SAVE_ALL unwind_espfix=\unwind_espfix BUG_IF_WRONG_CR3 @@ -357,6 +376,7 @@ 2: popl %es 3: popl %fs POP_GS \pop + IRET_FRAME .pushsection .fixup, "ax" 4: movl $0, (%esp) jmp 1b @@ -395,7 +415,8 @@ .macro CHECK_AND_APPLY_ESPFIX #ifdef CONFIG_X86_ESPFIX32 -#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8) +#define GDT_ESPFIX_OFFSET (GDT_ENTRY_ESPFIX_SS * 8) +#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + GDT_ESPFIX_OFFSET ALTERNATIVE "jmp .Lend_\@", "", X86_BUG_ESPFIX @@ -709,7 +730,7 @@ * %eax: prev task * %edx: next task */ -ENTRY(__switch_to_asm) +SYM_CODE_START(__switch_to_asm) /* * Save callee-saved registers * This must match the order in struct inactive_task_frame @@ -718,6 +739,11 @@ ENTRY(__switch_to_asm) pushl %ebx pushl %edi pushl %esi + /* + * Flags are saved to prevent AC leakage. This could go + * away if objtool would have 32bit support to verify + * the STAC/CLAC correctness. + */ pushfl /* switch stack */ @@ -740,15 +766,16 @@ ENTRY(__switch_to_asm) FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW #endif - /* restore callee-saved registers */ + /* Restore flags or the incoming task to restore AC state. */ popfl + /* restore callee-saved registers */ popl %esi popl %edi popl %ebx popl %ebp jmp __switch_to -END(__switch_to_asm) +SYM_CODE_END(__switch_to_asm) /* * The unwinder expects the last frame on the stack to always be at the same @@ -757,7 +784,7 @@ END(__switch_to_asm) * asmlinkage function so its argument has to be pushed on the stack. This * wrapper creates a proper "end of stack" frame header before the call. */ -ENTRY(schedule_tail_wrapper) +SYM_FUNC_START(schedule_tail_wrapper) FRAME_BEGIN pushl %eax @@ -766,7 +793,7 @@ ENTRY(schedule_tail_wrapper) FRAME_END ret -ENDPROC(schedule_tail_wrapper) +SYM_FUNC_END(schedule_tail_wrapper) /* * A newly forked process directly context switches into this address. * @@ -774,7 +801,7 @@ ENDPROC(schedule_tail_wrapper) * ebx: kernel thread func (NULL for user thread) * edi: kernel thread arg */ -ENTRY(ret_from_fork) +SYM_CODE_START(ret_from_fork) call schedule_tail_wrapper testl %ebx, %ebx @@ -797,7 +824,7 @@ ENTRY(ret_from_fork) */ movl $0, PT_EAX(%esp) jmp 2b -END(ret_from_fork) +SYM_CODE_END(ret_from_fork) /* * Return to user mode is not as complex as all this looks, @@ -807,8 +834,7 @@ END(ret_from_fork) */ # userspace resumption stub bypassing syscall exit tracing - ALIGN -ret_from_exception: +SYM_CODE_START_LOCAL(ret_from_exception) preempt_stop(CLBR_ANY) ret_from_intr: #ifdef CONFIG_VM86 @@ -825,15 +851,14 @@ ret_from_intr: cmpl $USER_RPL, %eax jb restore_all_kernel # not returning to v8086 or userspace -ENTRY(resume_userspace) DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF movl %esp, %eax call prepare_exit_to_usermode jmp restore_all -END(ret_from_exception) +SYM_CODE_END(ret_from_exception) -GLOBAL(__begin_SYSENTER_singlestep_region) +SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE) /* * All code from here through __end_SYSENTER_singlestep_region is subject * to being single-stepped if a user program sets TF and executes SYSENTER. @@ -848,9 +873,10 @@ GLOBAL(__begin_SYSENTER_singlestep_region) * Xen doesn't set %esp to be precisely what the normal SYSENTER * entry point expects, so fix it up before using the normal path. */ -ENTRY(xen_sysenter_target) +SYM_CODE_START(xen_sysenter_target) addl $5*4, %esp /* remove xen-provided frame */ jmp .Lsysenter_past_esp +SYM_CODE_END(xen_sysenter_target) #endif /* @@ -885,7 +911,7 @@ ENTRY(xen_sysenter_target) * ebp user stack * 0(%ebp) arg6 */ -ENTRY(entry_SYSENTER_32) +SYM_FUNC_START(entry_SYSENTER_32) /* * On entry-stack with all userspace-regs live - save and * restore eflags and %eax to use it as scratch-reg for the cr3 @@ -1012,8 +1038,8 @@ ENTRY(entry_SYSENTER_32) pushl $X86_EFLAGS_FIXED popfl jmp .Lsysenter_flags_fixed -GLOBAL(__end_SYSENTER_singlestep_region) -ENDPROC(entry_SYSENTER_32) +SYM_ENTRY(__end_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE) +SYM_FUNC_END(entry_SYSENTER_32) /* * 32-bit legacy system call entry. @@ -1043,7 +1069,7 @@ ENDPROC(entry_SYSENTER_32) * edi arg5 * ebp arg6 */ -ENTRY(entry_INT80_32) +SYM_FUNC_START(entry_INT80_32) ASM_CLAC pushl %eax /* pt_regs->orig_ax */ @@ -1064,7 +1090,6 @@ ENTRY(entry_INT80_32) restore_all: TRACE_IRQS_IRET SWITCH_TO_ENTRY_STACK -.Lrestore_all_notrace: CHECK_AND_APPLY_ESPFIX .Lrestore_nocheck: /* Switch back to user CR3 */ @@ -1075,7 +1100,6 @@ restore_all: /* Restore user state */ RESTORE_REGS pop=4 # skip orig_eax/error_code .Lirq_return: - IRET_FRAME /* * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization * when returning from IPI handler and when returning from @@ -1100,7 +1124,7 @@ restore_all_kernel: jmp .Lirq_return .section .fixup, "ax" -ENTRY(iret_exc ) +SYM_CODE_START(iret_exc) pushl $0 # no error code pushl $do_iret_error @@ -1117,9 +1141,10 @@ ENTRY(iret_exc ) #endif jmp common_exception +SYM_CODE_END(iret_exc) .previous _ASM_EXTABLE(.Lirq_return, iret_exc) -ENDPROC(entry_INT80_32) +SYM_FUNC_END(entry_INT80_32) .macro FIXUP_ESPFIX_STACK /* @@ -1128,30 +1153,43 @@ ENDPROC(entry_INT80_32) * We can't call C functions using the ESPFIX stack. This code reads * the high word of the segment base from the GDT and swiches to the * normal stack and adjusts ESP with the matching offset. + * + * We might be on user CR3 here, so percpu data is not mapped and we can't + * access the GDT through the percpu segment. Instead, use SGDT to find + * the cpu_entry_area alias of the GDT. */ #ifdef CONFIG_X86_ESPFIX32 /* fixup the stack */ - mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */ - mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ + pushl %ecx + subl $2*4, %esp + sgdt (%esp) + movl 2(%esp), %ecx /* GDT address */ + /* + * Careful: ECX is a linear pointer, so we need to force base + * zero. %cs is the only known-linear segment we have right now. + */ + mov %cs:GDT_ESPFIX_OFFSET + 4(%ecx), %al /* bits 16..23 */ + mov %cs:GDT_ESPFIX_OFFSET + 7(%ecx), %ah /* bits 24..31 */ shl $16, %eax + addl $2*4, %esp + popl %ecx addl %esp, %eax /* the adjusted stack pointer */ pushl $__KERNEL_DS pushl %eax lss (%esp), %esp /* switch to the normal stack segment */ #endif .endm + .macro UNWIND_ESPFIX_STACK + /* It's safe to clobber %eax, all other regs need to be preserved */ #ifdef CONFIG_X86_ESPFIX32 movl %ss, %eax /* see if on espfix stack */ cmpw $__ESPFIX_SS, %ax - jne 27f - movl $__KERNEL_DS, %eax - movl %eax, %ds - movl %eax, %es + jne .Lno_fixup_\@ /* switch to normal stack */ FIXUP_ESPFIX_STACK -27: +.Lno_fixup_\@: #endif .endm @@ -1160,7 +1198,7 @@ ENDPROC(entry_INT80_32) * We pack 1 stub into every 8-byte block. */ .align 8 -ENTRY(irq_entries_start) +SYM_CODE_START(irq_entries_start) vector=FIRST_EXTERNAL_VECTOR .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) pushl $(~vector+0x80) /* Note: always in signed byte range */ @@ -1168,11 +1206,11 @@ ENTRY(irq_entries_start) jmp common_interrupt .align 8 .endr -END(irq_entries_start) +SYM_CODE_END(irq_entries_start) #ifdef CONFIG_X86_LOCAL_APIC .align 8 -ENTRY(spurious_entries_start) +SYM_CODE_START(spurious_entries_start) vector=FIRST_SYSTEM_VECTOR .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR) pushl $(~vector+0x80) /* Note: always in signed byte range */ @@ -1180,9 +1218,9 @@ ENTRY(spurious_entries_start) jmp common_spurious .align 8 .endr -END(spurious_entries_start) +SYM_CODE_END(spurious_entries_start) -common_spurious: +SYM_CODE_START_LOCAL(common_spurious) ASM_CLAC addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */ SAVE_ALL switch_stacks=1 @@ -1191,7 +1229,7 @@ common_spurious: movl %esp, %eax call smp_spurious_interrupt jmp ret_from_intr -ENDPROC(common_spurious) +SYM_CODE_END(common_spurious) #endif /* @@ -1199,7 +1237,7 @@ ENDPROC(common_spurious) * so IRQ-flags tracing has to follow that: */ .p2align CONFIG_X86_L1_CACHE_SHIFT -common_interrupt: +SYM_CODE_START_LOCAL(common_interrupt) ASM_CLAC addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */ @@ -1209,10 +1247,10 @@ common_interrupt: movl %esp, %eax call do_IRQ jmp ret_from_intr -ENDPROC(common_interrupt) +SYM_CODE_END(common_interrupt) #define BUILD_INTERRUPT3(name, nr, fn) \ -ENTRY(name) \ +SYM_FUNC_START(name) \ ASM_CLAC; \ pushl $~(nr); \ SAVE_ALL switch_stacks=1; \ @@ -1221,7 +1259,7 @@ ENTRY(name) \ movl %esp, %eax; \ call fn; \ jmp ret_from_intr; \ -ENDPROC(name) +SYM_FUNC_END(name) #define BUILD_INTERRUPT(name, nr) \ BUILD_INTERRUPT3(name, nr, smp_##name); \ @@ -1229,14 +1267,14 @@ ENDPROC(name) /* The include is where all of the SMP etc. interrupts come from */ #include <asm/entry_arch.h> -ENTRY(coprocessor_error) +SYM_CODE_START(coprocessor_error) ASM_CLAC pushl $0 pushl $do_coprocessor_error jmp common_exception -END(coprocessor_error) +SYM_CODE_END(coprocessor_error) -ENTRY(simd_coprocessor_error) +SYM_CODE_START(simd_coprocessor_error) ASM_CLAC pushl $0 #ifdef CONFIG_X86_INVD_BUG @@ -1248,104 +1286,99 @@ ENTRY(simd_coprocessor_error) pushl $do_simd_coprocessor_error #endif jmp common_exception -END(simd_coprocessor_error) +SYM_CODE_END(simd_coprocessor_error) -ENTRY(device_not_available) +SYM_CODE_START(device_not_available) ASM_CLAC pushl $-1 # mark this as an int pushl $do_device_not_available jmp common_exception -END(device_not_available) +SYM_CODE_END(device_not_available) #ifdef CONFIG_PARAVIRT -ENTRY(native_iret) +SYM_CODE_START(native_iret) iret _ASM_EXTABLE(native_iret, iret_exc) -END(native_iret) +SYM_CODE_END(native_iret) #endif -ENTRY(overflow) +SYM_CODE_START(overflow) ASM_CLAC pushl $0 pushl $do_overflow jmp common_exception -END(overflow) +SYM_CODE_END(overflow) -ENTRY(bounds) +SYM_CODE_START(bounds) ASM_CLAC pushl $0 pushl $do_bounds jmp common_exception -END(bounds) +SYM_CODE_END(bounds) -ENTRY(invalid_op) +SYM_CODE_START(invalid_op) ASM_CLAC pushl $0 pushl $do_invalid_op jmp common_exception -END(invalid_op) +SYM_CODE_END(invalid_op) -ENTRY(coprocessor_segment_overrun) +SYM_CODE_START(coprocessor_segment_overrun) ASM_CLAC pushl $0 pushl $do_coprocessor_segment_overrun jmp common_exception -END(coprocessor_segment_overrun) +SYM_CODE_END(coprocessor_segment_overrun) -ENTRY(invalid_TSS) +SYM_CODE_START(invalid_TSS) ASM_CLAC pushl $do_invalid_TSS jmp common_exception -END(invalid_TSS) +SYM_CODE_END(invalid_TSS) -ENTRY(segment_not_present) +SYM_CODE_START(segment_not_present) ASM_CLAC pushl $do_segment_not_present jmp common_exception -END(segment_not_present) +SYM_CODE_END(segment_not_present) -ENTRY(stack_segment) +SYM_CODE_START(stack_segment) ASM_CLAC pushl $do_stack_segment jmp common_exception -END(stack_segment) +SYM_CODE_END(stack_segment) -ENTRY(alignment_check) +SYM_CODE_START(alignment_check) ASM_CLAC pushl $do_alignment_check jmp common_exception -END(alignment_check) +SYM_CODE_END(alignment_check) -ENTRY(divide_error) +SYM_CODE_START(divide_error) ASM_CLAC pushl $0 # no error code pushl $do_divide_error jmp common_exception -END(divide_error) +SYM_CODE_END(divide_error) #ifdef CONFIG_X86_MCE -ENTRY(machine_check) +SYM_CODE_START(machine_check) ASM_CLAC pushl $0 pushl machine_check_vector jmp common_exception -END(machine_check) +SYM_CODE_END(machine_check) #endif -ENTRY(spurious_interrupt_bug) +SYM_CODE_START(spurious_interrupt_bug) ASM_CLAC pushl $0 pushl $do_spurious_interrupt_bug jmp common_exception -END(spurious_interrupt_bug) +SYM_CODE_END(spurious_interrupt_bug) #ifdef CONFIG_XEN_PV -ENTRY(xen_hypervisor_callback) - pushl $-1 /* orig_ax = -1 => not a system call */ - SAVE_ALL - ENCODE_FRAME_POINTER - TRACE_IRQS_OFF - +SYM_FUNC_START(xen_hypervisor_callback) /* * Check to see if we got the event in the critical * region in xen_iret_direct, after we've reenabled @@ -1353,22 +1386,23 @@ ENTRY(xen_hypervisor_callback) * iret instruction's behaviour where it delivers a * pending interrupt when enabling interrupts: */ - movl PT_EIP(%esp), %eax - cmpl $xen_iret_start_crit, %eax + cmpl $xen_iret_start_crit, (%esp) jb 1f - cmpl $xen_iret_end_crit, %eax + cmpl $xen_iret_end_crit, (%esp) jae 1f - - jmp xen_iret_crit_fixup - -ENTRY(xen_do_upcall) -1: mov %esp, %eax + call xen_iret_crit_fixup +1: + pushl $-1 /* orig_ax = -1 => not a system call */ + SAVE_ALL + ENCODE_FRAME_POINTER + TRACE_IRQS_OFF + mov %esp, %eax call xen_evtchn_do_upcall #ifndef CONFIG_PREEMPTION call xen_maybe_preempt_hcall #endif jmp ret_from_intr -ENDPROC(xen_hypervisor_callback) +SYM_FUNC_END(xen_hypervisor_callback) /* * Hypervisor uses this for application faults while it executes. @@ -1382,7 +1416,7 @@ ENDPROC(xen_hypervisor_callback) * to pop the stack frame we end up in an infinite loop of failsafe callbacks. * We distinguish between categories by maintaining a status value in EAX. */ -ENTRY(xen_failsafe_callback) +SYM_FUNC_START(xen_failsafe_callback) pushl %eax movl $1, %eax 1: mov 4(%esp), %ds @@ -1419,7 +1453,7 @@ ENTRY(xen_failsafe_callback) _ASM_EXTABLE(2b, 7b) _ASM_EXTABLE(3b, 8b) _ASM_EXTABLE(4b, 9b) -ENDPROC(xen_failsafe_callback) +SYM_FUNC_END(xen_failsafe_callback) #endif /* CONFIG_XEN_PV */ #ifdef CONFIG_XEN_PVHVM @@ -1441,18 +1475,17 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR, #endif /* CONFIG_HYPERV */ -ENTRY(page_fault) +SYM_CODE_START(page_fault) ASM_CLAC pushl $do_page_fault jmp common_exception_read_cr2 -END(page_fault) +SYM_CODE_END(page_fault) -common_exception_read_cr2: +SYM_CODE_START_LOCAL_NOALIGN(common_exception_read_cr2) /* the function address is in %gs's slot on the stack */ - SAVE_ALL switch_stacks=1 skip_gs=1 + SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1 ENCODE_FRAME_POINTER - UNWIND_ESPFIX_STACK /* fixup %gs */ GS_TO_REG %ecx @@ -1470,13 +1503,12 @@ common_exception_read_cr2: movl %esp, %eax # pt_regs pointer CALL_NOSPEC %edi jmp ret_from_exception -END(common_exception_read_cr2) +SYM_CODE_END(common_exception_read_cr2) -common_exception: +SYM_CODE_START_LOCAL_NOALIGN(common_exception) /* the function address is in %gs's slot on the stack */ - SAVE_ALL switch_stacks=1 skip_gs=1 + SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1 ENCODE_FRAME_POINTER - UNWIND_ESPFIX_STACK /* fixup %gs */ GS_TO_REG %ecx @@ -1492,9 +1524,9 @@ common_exception: movl %esp, %eax # pt_regs pointer CALL_NOSPEC %edi jmp ret_from_exception -END(common_exception) +SYM_CODE_END(common_exception) -ENTRY(debug) +SYM_CODE_START(debug) /* * Entry from sysenter is now handled in common_exception */ @@ -1502,7 +1534,49 @@ ENTRY(debug) pushl $-1 # mark this as an int pushl $do_debug jmp common_exception -END(debug) +SYM_CODE_END(debug) + +#ifdef CONFIG_DOUBLEFAULT +SYM_CODE_START(double_fault) +1: + /* + * This is a task gate handler, not an interrupt gate handler. + * The error code is on the stack, but the stack is otherwise + * empty. Interrupts are off. Our state is sane with the following + * exceptions: + * + * - CR0.TS is set. "TS" literally means "task switched". + * - EFLAGS.NT is set because we're a "nested task". + * - The doublefault TSS has back_link set and has been marked busy. + * - TR points to the doublefault TSS and the normal TSS is busy. + * - CR3 is the normal kernel PGD. This would be delightful, except + * that the CPU didn't bother to save the old CR3 anywhere. This + * would make it very awkward to return back to the context we came + * from. + * + * The rest of EFLAGS is sanitized for us, so we don't need to + * worry about AC or DF. + * + * Don't even bother popping the error code. It's always zero, + * and ignoring it makes us a bit more robust against buggy + * hypervisor task gate implementations. + * + * We will manually undo the task switch instead of doing a + * task-switching IRET. + */ + + clts /* clear CR0.TS */ + pushl $X86_EFLAGS_FIXED + popfl /* clear EFLAGS.NT */ + + call doublefault_shim + + /* We don't support returning, so we have no IRET here. */ +1: + hlt + jmp 1b +SYM_CODE_END(double_fault) +#endif /* * NMI is doubly nasty. It can happen on the first instruction of @@ -1511,10 +1585,14 @@ END(debug) * switched stacks. We handle both conditions by simply checking whether we * interrupted kernel code running on the SYSENTER stack. */ -ENTRY(nmi) +SYM_CODE_START(nmi) ASM_CLAC #ifdef CONFIG_X86_ESPFIX32 + /* + * ESPFIX_SS is only ever set on the return to user path + * after we've switched to the entry stack. + */ pushl %eax movl %ss, %eax cmpw $__ESPFIX_SS, %ax @@ -1550,6 +1628,11 @@ ENTRY(nmi) movl %ebx, %esp .Lnmi_return: +#ifdef CONFIG_X86_ESPFIX32 + testl $CS_FROM_ESPFIX, PT_CS(%esp) + jnz .Lnmi_from_espfix +#endif + CHECK_AND_APPLY_ESPFIX RESTORE_ALL_NMI cr3_reg=%edi pop=4 jmp .Lirq_return @@ -1557,28 +1640,47 @@ ENTRY(nmi) #ifdef CONFIG_X86_ESPFIX32 .Lnmi_espfix_stack: /* - * create the pointer to lss back + * Create the pointer to LSS back */ pushl %ss pushl %esp addl $4, (%esp) - /* copy the iret frame of 12 bytes */ - .rept 3 - pushl 16(%esp) - .endr - pushl %eax - SAVE_ALL_NMI cr3_reg=%edi + + /* Copy the (short) IRET frame */ + pushl 4*4(%esp) # flags + pushl 4*4(%esp) # cs + pushl 4*4(%esp) # ip + + pushl %eax # orig_ax + + SAVE_ALL_NMI cr3_reg=%edi unwind_espfix=1 ENCODE_FRAME_POINTER - FIXUP_ESPFIX_STACK # %eax == %esp + + /* clear CS_FROM_KERNEL, set CS_FROM_ESPFIX */ + xorl $(CS_FROM_ESPFIX | CS_FROM_KERNEL), PT_CS(%esp) + xorl %edx, %edx # zero error code - call do_nmi + movl %esp, %eax # pt_regs pointer + jmp .Lnmi_from_sysenter_stack + +.Lnmi_from_espfix: RESTORE_ALL_NMI cr3_reg=%edi - lss 12+4(%esp), %esp # back to espfix stack + /* + * Because we cleared CS_FROM_KERNEL, IRET_FRAME 'forgot' to + * fix up the gap and long frame: + * + * 3 - original frame (exception) + * 2 - ESPFIX block (above) + * 6 - gap (FIXUP_FRAME) + * 5 - long frame (FIXUP_FRAME) + * 1 - orig_ax + */ + lss (1+5+6)*4(%esp), %esp # back to espfix stack jmp .Lirq_return #endif -END(nmi) +SYM_CODE_END(nmi) -ENTRY(int3) +SYM_CODE_START(int3) ASM_CLAC pushl $-1 # mark this as an int @@ -1589,22 +1691,22 @@ ENTRY(int3) movl %esp, %eax # pt_regs pointer call do_int3 jmp ret_from_exception -END(int3) +SYM_CODE_END(int3) -ENTRY(general_protection) +SYM_CODE_START(general_protection) pushl $do_general_protection jmp common_exception -END(general_protection) +SYM_CODE_END(general_protection) #ifdef CONFIG_KVM_GUEST -ENTRY(async_page_fault) +SYM_CODE_START(async_page_fault) ASM_CLAC pushl $do_async_page_fault jmp common_exception_read_cr2 -END(async_page_fault) +SYM_CODE_END(async_page_fault) #endif -ENTRY(rewind_stack_do_exit) +SYM_CODE_START(rewind_stack_do_exit) /* Prevent any naive code from trying to unwind to our caller. */ xorl %ebp, %ebp @@ -1613,4 +1715,4 @@ ENTRY(rewind_stack_do_exit) call do_exit 1: jmp 1b -END(rewind_stack_do_exit) +SYM_CODE_END(rewind_stack_do_exit) |