diff options
author | Denys Vlasenko <dvlasenk@redhat.com> | 2015-02-26 14:40:27 -0800 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2015-03-04 22:50:49 +0100 |
commit | 76f5df43cab5e765c0bd42289103e8f625813ae1 (patch) | |
tree | f83868c1222fe87789a149bd66ae726f919a9f3d /arch/x86/ia32 | |
parent | 6e1327bd2b20ccb387fcddc0caa605cb253cc458 (diff) |
x86/asm/entry/64: Always allocate a complete "struct pt_regs" on the kernel stack
The 64-bit entry code was using six stack slots less by not
saving/restoring registers which are callee-preserved according
to the C ABI, and was not allocating space for them.
Only when syscalls needed a complete "struct pt_regs" was
the complete area allocated and filled in.
As an additional twist, on interrupt entry a "slightly less
truncated pt_regs" trick is used, to make nested interrupt
stacks easier to unwind.
This proved to be a source of significant obfuscation and subtle
bugs. For example, 'stub_fork' had to pop the return address,
extend the struct, save registers, and push return address back.
Ugly. 'ia32_ptregs_common' pops return address and "returns" via
jmp insn, throwing a wrench into CPU return stack cache.
This patch changes the code to always allocate a complete
"struct pt_regs" on the kernel stack. The saving of registers
is still done lazily.
"Partial pt_regs" trick on interrupt stack is retained.
Macros which manipulate "struct pt_regs" on stack are reworked:
- ALLOC_PT_GPREGS_ON_STACK allocates the structure.
- SAVE_C_REGS saves to it those registers which are clobbered
by C code.
- SAVE_EXTRA_REGS saves to it all other registers.
- Corresponding RESTORE_* and REMOVE_PT_GPREGS_FROM_STACK macros
reverse it.
'ia32_ptregs_common', 'stub_fork' and friends lost their ugly dance
with the return pointer.
LOAD_ARGS32 in ia32entry.S now uses symbolic stack offsets
instead of magic numbers.
'error_entry' and 'save_paranoid' now use SAVE_C_REGS +
SAVE_EXTRA_REGS instead of having it open-coded yet again.
Patch was run-tested: 64-bit executables, 32-bit executables,
strace works.
Timing tests did not show measurable difference in 32-bit
and 64-bit syscalls.
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Will Drewry <wad@chromium.org>
Link: http://lkml.kernel.org/r/1423778052-21038-2-git-send-email-dvlasenk@redhat.com
Link: http://lkml.kernel.org/r/b89763d354aa23e670b9bdf3a40ae320320a7c2e.1424989793.git.luto@amacapital.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/ia32')
-rw-r--r-- | arch/x86/ia32/ia32entry.S | 47 |
1 files changed, 25 insertions, 22 deletions
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 156ebcab4ada..f4bed4971673 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -62,12 +62,12 @@ */ .macro LOAD_ARGS32 offset, _r9=0 .if \_r9 - movl \offset+16(%rsp),%r9d + movl \offset+R9(%rsp),%r9d .endif - movl \offset+40(%rsp),%ecx - movl \offset+48(%rsp),%edx - movl \offset+56(%rsp),%esi - movl \offset+64(%rsp),%edi + movl \offset+RCX(%rsp),%ecx + movl \offset+RDX(%rsp),%edx + movl \offset+RSI(%rsp),%esi + movl \offset+RDI(%rsp),%edi movl %eax,%eax /* zero extension */ .endm @@ -144,7 +144,8 @@ ENTRY(ia32_sysenter_target) CFI_REL_OFFSET rip,0 pushq_cfi %rax cld - SAVE_ARGS 0,1,0 + ALLOC_PT_GPREGS_ON_STACK + SAVE_C_REGS_EXCEPT_R891011 /* no need to do an access_ok check here because rbp has been 32bit zero extended */ ASM_STAC @@ -182,7 +183,8 @@ sysexit_from_sys_call: andl $~0x200,EFLAGS-ARGOFFSET(%rsp) movl RIP-ARGOFFSET(%rsp),%edx /* User %eip */ CFI_REGISTER rip,rdx - RESTORE_ARGS 0,24,0,0,0,0 + RESTORE_RSI_RDI + REMOVE_PT_GPREGS_FROM_STACK 3*8 xorq %r8,%r8 xorq %r9,%r9 xorq %r10,%r10 @@ -256,13 +258,13 @@ sysenter_tracesys: testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) jz sysenter_auditsys #endif - SAVE_REST + SAVE_EXTRA_REGS CLEAR_RREGS movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ - RESTORE_REST + RESTORE_EXTRA_REGS cmpq $(IA32_NR_syscalls-1),%rax ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ jmp sysenter_do_call @@ -304,7 +306,8 @@ ENTRY(ia32_cstar_target) * disabled irqs and here we enable it straight after entry: */ ENABLE_INTERRUPTS(CLBR_NONE) - SAVE_ARGS 8,0,0 + ALLOC_PT_GPREGS_ON_STACK 8 + SAVE_C_REGS_EXCEPT_RCX_R891011 movl %eax,%eax /* zero extension */ movq %rax,ORIG_RAX-ARGOFFSET(%rsp) movq %rcx,RIP-ARGOFFSET(%rsp) @@ -341,7 +344,7 @@ cstar_dispatch: jnz sysretl_audit sysretl_from_sys_call: andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) - RESTORE_ARGS 0,-ARG_SKIP,0,0,0 + RESTORE_RSI_RDI_RDX movl RIP-ARGOFFSET(%rsp),%ecx CFI_REGISTER rip,rcx movl EFLAGS-ARGOFFSET(%rsp),%r11d @@ -372,13 +375,13 @@ cstar_tracesys: jz cstar_auditsys #endif xchgl %r9d,%ebp - SAVE_REST + SAVE_EXTRA_REGS CLEAR_RREGS 0, r9 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */ - RESTORE_REST + RESTORE_EXTRA_REGS xchgl %ebp,%r9d cmpq $(IA32_NR_syscalls-1),%rax ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ @@ -433,7 +436,8 @@ ENTRY(ia32_syscall) cld /* note the registers are not zero extended to the sf. this could be a problem. */ - SAVE_ARGS 0,1,0 + ALLOC_PT_GPREGS_ON_STACK + SAVE_C_REGS_EXCEPT_R891011 orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) jnz ia32_tracesys @@ -446,16 +450,16 @@ ia32_sysret: movq %rax,RAX-ARGOFFSET(%rsp) ia32_ret_from_sys_call: CLEAR_RREGS -ARGOFFSET - jmp int_ret_from_sys_call + jmp int_ret_from_sys_call -ia32_tracesys: - SAVE_REST +ia32_tracesys: + SAVE_EXTRA_REGS CLEAR_RREGS movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ - RESTORE_REST + RESTORE_EXTRA_REGS cmpq $(IA32_NR_syscalls-1),%rax ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ jmp ia32_do_call @@ -492,7 +496,6 @@ GLOBAL(stub32_clone) ALIGN ia32_ptregs_common: - popq %r11 CFI_ENDPROC CFI_STARTPROC32 simple CFI_SIGNAL_FRAME @@ -507,9 +510,9 @@ ia32_ptregs_common: /* CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ CFI_REL_OFFSET rsp,RSP-ARGOFFSET /* CFI_REL_OFFSET ss,SS-ARGOFFSET*/ - SAVE_REST + SAVE_EXTRA_REGS 8 call *%rax - RESTORE_REST - jmp ia32_sysret /* misbalances the return cache */ + RESTORE_EXTRA_REGS 8 + ret CFI_ENDPROC END(ia32_ptregs_common) |