diff options
Diffstat (limited to 'coregrind/m_syscall.c')
-rw-r--r-- | coregrind/m_syscall.c | 567 |
1 files changed, 567 insertions, 0 deletions
diff --git a/coregrind/m_syscall.c b/coregrind/m_syscall.c new file mode 100644 index 0000000..676254a --- /dev/null +++ b/coregrind/m_syscall.c @@ -0,0 +1,567 @@ + +/*--------------------------------------------------------------------*/ +/*--- Doing syscalls. m_syscall.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2009 Julian Seward + jseward@acm.org + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#include "pub_core_basics.h" +#include "pub_core_vki.h" +#include "pub_core_vkiscnums.h" +#include "pub_core_syscall.h" + +/* --------------------------------------------------------------------- + Building syscall return values. + ------------------------------------------------------------------ */ + +/* Make a SysRes value from an syscall return value. This is + Linux-specific. + + From: + http://sources.redhat.com/cgi-bin/cvsweb.cgi/libc/sysdeps/unix/sysv/ + linux/i386/sysdep.h? + rev=1.28&content-type=text/x-cvsweb-markup&cvsroot=glibc + + Linux uses a negative return value to indicate syscall errors, + unlike most Unices, which use the condition codes' carry flag. + + Since version 2.1 the return value of a system call might be + negative even if the call succeeded. E.g., the 'lseek' system call + might return a large offset. Therefore we must not anymore test + for < 0, but test for a real error by making sure the value in %eax + is a real error number. Linus said he will make sure the no + syscall returns a value in -1 .. -4095 as a valid result so we can + safely test with -4095. +*/ +SysRes VG_(mk_SysRes_x86_linux) ( UInt val ) { + SysRes res; + res.isError = val >= -4095 && val <= -1; + if (res.isError) { + res.err = -val; + res.res = 0; + } else { + res.err = 0; + res.res = val; + } + return res; +} + +/* Similarly .. */ +SysRes VG_(mk_SysRes_amd64_linux) ( ULong val ) { + SysRes res; + res.isError = val >= -4095 && val <= -1; + if (res.isError) { + res.err = -val; + res.res = 0; + } else { + res.err = 0; + res.res = val; + } + return res; +} + +/* PPC uses the CR7.SO bit to flag an error (CR0 in IBM-speak) */ +/* Note this must be in the bottom bit of the second arg */ +SysRes VG_(mk_SysRes_ppc32_linux) ( UInt val, UInt cr0so ) { + SysRes res; + res.isError = (cr0so & 1) != 0; + if (res.isError) { + res.err = val; + res.res = 0; + } else { + res.err = 0; + res.res = val; + } + return res; +} + +/* As per ppc32 version, cr0.so must be in l.s.b. of 2nd arg */ +SysRes VG_(mk_SysRes_ppc64_linux) ( ULong val, ULong cr0so ) { + SysRes res; + res.isError = (cr0so & 1) != 0; + if (res.isError) { + res.err = val; + res.res = 0; + } else { + res.err = 0; + res.res = val; + } + return res; +} + +/* AIX scheme: we have to record both 'res' (r3) and 'err' (r4). If + 'err' is nonzero then the call has failed, but it could still be + that AIX userspace will ignore 'err' and instead consult 'res' to + determine if the call failed. So we have to record both. */ +SysRes VG_(mk_SysRes_ppc32_aix5) ( UInt res, UInt err ) { + SysRes r; + r.res = res; + r.err = err; + r.isError = r.err != 0; + return r; +} + +SysRes VG_(mk_SysRes_ppc64_aix5) ( ULong res, ULong err ) { + SysRes r; + r.res = res; + r.err = err; + r.isError = r.err != 0; + return r; +} + +/* Generic constructors. */ +SysRes VG_(mk_SysRes_Error) ( UWord err ) { + SysRes r; + r.res = 0; + r.err = err; + r.isError = True; + return r; +} + +SysRes VG_(mk_SysRes_Success) ( UWord res ) { + SysRes r; + r.res = res; + r.err = 0; + r.isError = False; + return r; +} + + +/* --------------------------------------------------------------------- + A function for doing syscalls. + ------------------------------------------------------------------ */ + +#if defined(VGP_x86_linux) +/* Incoming args (syscall number + up to 6 args) come on the stack. + (ie. the C calling convention). + + The syscall number goes in %eax. The args are passed to the syscall in + the regs %ebx, %ecx, %edx, %esi, %edi, %ebp, ie. the kernel's syscall + calling convention. + + %eax gets the return value. Not sure which registers the kernel + clobbers, so we preserve all the callee-save regs (%esi, %edi, %ebx, + %ebp). +*/ +extern UWord do_syscall_WRK ( + UWord syscall_no, + UWord a1, UWord a2, UWord a3, + UWord a4, UWord a5, UWord a6 + ); +asm( +".text\n" +"do_syscall_WRK:\n" +" push %esi\n" +" push %edi\n" +" push %ebx\n" +" push %ebp\n" +" movl 16+ 4(%esp),%eax\n" +" movl 16+ 8(%esp),%ebx\n" +" movl 16+12(%esp),%ecx\n" +" movl 16+16(%esp),%edx\n" +" movl 16+20(%esp),%esi\n" +" movl 16+24(%esp),%edi\n" +" movl 16+28(%esp),%ebp\n" +" int $0x80\n" +" popl %ebp\n" +" popl %ebx\n" +" popl %edi\n" +" popl %esi\n" +" ret\n" +".previous\n" +); + +#elif defined(VGP_amd64_linux) +/* Incoming args (syscall number + up to 6 args) come in %rdi, %rsi, + %rdx, %rcx, %r8, %r9, and the last one on the stack (ie. the C + calling convention). + + The syscall number goes in %rax. The args are passed to the syscall in + the regs %rdi, %rsi, %rdx, %r10, %r8, %r9 (yes, really %r10, not %rcx), + ie. the kernel's syscall calling convention. + + %rax gets the return value. %rcx and %r11 are clobbered by the syscall; + no matter, they are caller-save (the syscall clobbers no callee-save + regs, so we don't have to do any register saving/restoring). +*/ +extern UWord do_syscall_WRK ( + UWord syscall_no, + UWord a1, UWord a2, UWord a3, + UWord a4, UWord a5, UWord a6 + ); +asm( +".text\n" +"do_syscall_WRK:\n" + /* Convert function calling convention --> syscall calling + convention */ +" movq %rdi, %rax\n" +" movq %rsi, %rdi\n" +" movq %rdx, %rsi\n" +" movq %rcx, %rdx\n" +" movq %r8, %r10\n" +" movq %r9, %r8\n" +" movq 8(%rsp), %r9\n" /* last arg from stack */ +" syscall\n" +" ret\n" +".previous\n" +); + +#elif defined(VGP_ppc32_linux) +/* Incoming args (syscall number + up to 6 args) come in %r3:%r9. + + The syscall number goes in %r0. The args are passed to the syscall in + the regs %r3:%r8, i.e. the kernel's syscall calling convention. + + The %cr0.so bit flags an error. + We return the syscall return value in %r3, and the %cr0.so in + the lowest bit of %r4. + We return a ULong, of which %r3 is the high word, and %r4 the low. + No callee-save regs are clobbered, so no saving/restoring is needed. +*/ +extern ULong do_syscall_WRK ( + UWord syscall_no, + UWord a1, UWord a2, UWord a3, + UWord a4, UWord a5, UWord a6 + ); +asm( +".text\n" +"do_syscall_WRK:\n" +" mr 0,3\n" +" mr 3,4\n" +" mr 4,5\n" +" mr 5,6\n" +" mr 6,7\n" +" mr 7,8\n" +" mr 8,9\n" +" sc\n" /* syscall: sets %cr0.so on error */ +" mfcr 4\n" /* %cr -> low word of return var */ +" rlwinm 4,4,4,31,31\n" /* rotate flag bit so to lsb, and mask it */ +" blr\n" /* and return */ +".previous\n" +); + +#elif defined(VGP_ppc64_linux) +/* Due to the need to return 65 bits of result, this is completely + different from the ppc32 case. The single arg register points to a + 7-word block containing the syscall # and the 6 args. The syscall + result proper is put in [0] of the block, and %cr0.so is in the + bottom but of [1]. */ +extern void do_syscall_WRK ( ULong* argblock ); +asm( +".align 2\n" +".globl do_syscall_WRK\n" +".section \".opd\",\"aw\"\n" +".align 3\n" +"do_syscall_WRK:\n" +".quad .do_syscall_WRK,.TOC.@tocbase,0\n" +".previous\n" +".type .do_syscall_WRK,@function\n" +".globl .do_syscall_WRK\n" +".do_syscall_WRK:\n" +" std 3,-16(1)\n" /* stash arg */ +" ld 8, 48(3)\n" /* sc arg 6 */ +" ld 7, 40(3)\n" /* sc arg 5 */ +" ld 6, 32(3)\n" /* sc arg 4 */ +" ld 5, 24(3)\n" /* sc arg 3 */ +" ld 4, 16(3)\n" /* sc arg 2 */ +" ld 0, 0(3)\n" /* sc number */ +" ld 3, 8(3)\n" /* sc arg 1 */ +" sc\n" /* result in r3 and cr0.so */ +" ld 5,-16(1)\n" /* reacquire argblock ptr (r5 is caller-save) */ +" std 3,0(5)\n" /* argblock[0] = r3 */ +" mfcr 3\n" +" srwi 3,3,28\n" +" andi. 3,3,1\n" +" std 3,8(5)\n" /* argblock[1] = cr0.s0 & 1 */ +" blr\n" +); + +#elif defined(VGP_ppc32_aix5) +static void do_syscall_WRK ( UWord* res_r3, UWord* res_r4, + UWord sysno, + UWord a1, UWord a2, UWord a3, + UWord a4, UWord a5, UWord a6, + UWord a7, UWord a8 ) +{ + /* Syscalls on AIX are very similar to function calls: + - up to 8 args in r3-r10 + - syscall number in r2 + - kernel resumes at 'lr', so must set it appropriately beforehand + - r3 holds the result and r4 any applicable error code + See http://www.cs.utexas.edu/users/cart/publications/tr00-04.ps + and also 'man truss'. + */ + /* For some reason gcc-3.3.2 doesn't preserve r31 across the asm + even though we state it to be trashed. So use r27 instead. */ + UWord args[9]; + args[0] = sysno; + args[1] = a1; args[2] = a2; + args[3] = a3; args[4] = a4; + args[5] = a5; args[6] = a6; + args[7] = a7; args[8] = a8; + + __asm__ __volatile__( + + // establish base ptr + "mr 28,%0\n\t" + + // save r2, lr + "mr 27,2\n\t" // save r2 in r27 + "mflr 30\n\t" // save lr in r30 + + // set syscall number and args + "lwz 2, 0(28)\n\t" + "lwz 3, 4(28)\n\t" + "lwz 4, 8(28)\n\t" + "lwz 5, 12(28)\n\t" + "lwz 6, 16(28)\n\t" + "lwz 7, 20(28)\n\t" + "lwz 8, 24(28)\n\t" + "lwz 9, 28(28)\n\t" + "lwz 10, 32(28)\n\t" + + // set bit 3 of CR1 otherwise AIX 5.1 returns to the + // wrong address after the sc instruction + "crorc 6,6,6\n\t" + + // set up LR to point just after the sc insn + ".long 0x48000005\n\t" // "bl here+4" -- lr := & next insn + "mflr 29\n\t" + "addi 29,29,16\n\t" + "mtlr 29\n\t" + + // do it! + "sc\n\t" + + // result is now in r3; save it in args[0] + "stw 3,0(28)\n\t" + // error code in r4; save it in args[1] + "stw 4,4(28)\n\t" + + // restore + "mr 2,27\n\t" + "mtlr 30\n\t" + + : /*out*/ + : /*in*/ "b" (&args[0]) + : /*trash*/ + /*temps*/ "r31","r30","r29","r28","r27", + /*args*/ "r3","r4","r5","r6","r7","r8","r9","r10", + /*paranoia*/ "memory","cc","r0","r1","r11","r12","r13", + "xer","ctr","cr0","cr1","cr2","cr3", + "cr4","cr5","cr6","cr7" + ); + + *res_r3 = args[0]; + *res_r4 = args[1]; +} + +#elif defined(VGP_ppc64_aix5) +static void do_syscall_WRK ( UWord* res_r3, UWord* res_r4, + UWord sysno, + UWord a1, UWord a2, UWord a3, + UWord a4, UWord a5, UWord a6, + UWord a7, UWord a8 ) +{ + /* Same scheme as ppc32-aix5. */ + UWord args[9]; + args[0] = sysno; + args[1] = a1; args[2] = a2; + args[3] = a3; args[4] = a4; + args[5] = a5; args[6] = a6; + args[7] = a7; args[8] = a8; + + __asm__ __volatile__( + + // establish base ptr + "mr 28,%0\n\t" + + // save r2, lr + "mr 27,2\n\t" // save r2 in r27 + "mflr 30\n\t" // save lr in r30 + + // set syscall number and args + "ld 2, 0(28)\n\t" + "ld 3, 8(28)\n\t" + "ld 4, 16(28)\n\t" + "ld 5, 24(28)\n\t" + "ld 6, 32(28)\n\t" + "ld 7, 40(28)\n\t" + "ld 8, 48(28)\n\t" + "ld 9, 56(28)\n\t" + "ld 10, 64(28)\n\t" + + // set bit 3 of CR1 otherwise AIX 5.1 returns to the + // wrong address after the sc instruction + "crorc 6,6,6\n\t" + + // set up LR to point just after the sc insn + ".long 0x48000005\n\t" // "bl here+4" -- lr := & next insn + "mflr 29\n\t" + "addi 29,29,16\n\t" + "mtlr 29\n\t" + + // do it! + "sc\n\t" + + // result is now in r3; save it in args[0] + "std 3,0(28)\n\t" + // error code in r4; save it in args[1] + "std 4,8(28)\n\t" + + // restore + "mr 2,27\n\t" + "mtlr 30\n\t" + + : /*out*/ + : /*in*/ "b" (&args[0]) + : /*trash*/ + /*temps*/ "r31","r30","r29","r28","r27", + /*args*/ "r3","r4","r5","r6","r7","r8","r9","r10", + /*paranoia*/ "memory","cc","r0","r1","r11","r12","r13", + "xer","ctr","cr0","cr1","cr2","cr3", + "cr4","cr5","cr6","cr7" + ); + + *res_r3 = args[0]; + *res_r4 = args[1]; +} + +#else +# error Unknown platform +#endif + + +SysRes VG_(do_syscall) ( UWord sysno, UWord a1, UWord a2, UWord a3, + UWord a4, UWord a5, UWord a6, + UWord a7, UWord a8 ) +{ +#if defined(VGP_x86_linux) + UWord val = do_syscall_WRK(sysno,a1,a2,a3,a4,a5,a6); + return VG_(mk_SysRes_x86_linux)( val ); + +#elif defined(VGP_amd64_linux) + UWord val = do_syscall_WRK(sysno,a1,a2,a3,a4,a5,a6); + return VG_(mk_SysRes_amd64_linux)( val ); + +#elif defined(VGP_ppc32_linux) + ULong ret = do_syscall_WRK(sysno,a1,a2,a3,a4,a5,a6); + UInt val = (UInt)(ret>>32); + UInt cr0so = (UInt)(ret); + return VG_(mk_SysRes_ppc32_linux)( val, cr0so ); + +#elif defined(VGP_ppc64_linux) + ULong argblock[7]; + argblock[0] = sysno; + argblock[1] = a1; + argblock[2] = a2; + argblock[3] = a3; + argblock[4] = a4; + argblock[5] = a5; + argblock[6] = a6; + do_syscall_WRK( &argblock[0] ); + return VG_(mk_SysRes_ppc64_linux)( argblock[0], argblock[1] ); + +#elif defined(VGP_ppc32_aix5) + UWord res; + UWord err; + do_syscall_WRK( &res, &err, + sysno, a1, a2, a3, a4, a5, a6, a7, a8); + /* Try to set the error number to zero if the syscall hasn't + really failed. */ + if (sysno == __NR_AIX5_kread + || sysno == __NR_AIX5_kwrite) { + if (res != (UWord)-1L) + err = 0; + } + else if (sysno == __NR_AIX5_sigprocmask + || sysno == __NR_AIX5__sigpending) { + if (res == 0) + err = 0; + } + + return VG_(mk_SysRes_ppc32_aix5)( res, err ); + +#elif defined(VGP_ppc64_aix5) + UWord res; + UWord err; + do_syscall_WRK( &res, &err, + sysno, a1, a2, a3, a4, a5, a6, a7, a8); + /* Try to set the error number to zero if the syscall hasn't + really failed. */ + if (sysno == __NR_AIX5_kread + || sysno == __NR_AIX5_kwrite) { + if (res != (UWord)-1L) + err = 0; + } + else if (sysno == __NR_AIX5_sigprocmask + || sysno == __NR_AIX5__sigpending) { + if (res == 0) + err = 0; + } + + return VG_(mk_SysRes_ppc64_aix5)( res, err ); + +#else +# error Unknown platform +#endif +} + +/* --------------------------------------------------------------------- + Names of errors. + ------------------------------------------------------------------ */ + +/* Return a string which gives the name of an error value. Note, + unlike the standard C syserror fn, the returned string is not + malloc-allocated or writable -- treat it as a constant. + TODO: implement this properly. */ + +const HChar* VG_(strerror) ( UWord errnum ) +{ + switch (errnum) { + case VKI_EPERM: return "Operation not permitted"; + case VKI_ENOENT: return "No such file or directory"; + case VKI_ESRCH: return "No such process"; + case VKI_EINTR: return "Interrupted system call"; + case VKI_EBADF: return "Bad file number"; + case VKI_EAGAIN: return "Try again"; + case VKI_ENOMEM: return "Out of memory"; + case VKI_EACCES: return "Permission denied"; + case VKI_EFAULT: return "Bad address"; + case VKI_EEXIST: return "File exists"; + case VKI_EINVAL: return "Invalid argument"; + case VKI_EMFILE: return "Too many open files"; + case VKI_ENOSYS: return "Function not implemented"; + case VKI_EOVERFLOW: return "Value too large for defined data type"; + case VKI_ERESTARTSYS: return "ERESTARTSYS"; + default: return "VG_(strerror): unknown error"; + } +} + + +/*--------------------------------------------------------------------*/ +/*--- end ---*/ +/*--------------------------------------------------------------------*/ |