summaryrefslogtreecommitdiff
path: root/coregrind/m_syscall.c
diff options
context:
space:
mode:
Diffstat (limited to 'coregrind/m_syscall.c')
-rw-r--r--coregrind/m_syscall.c567
1 files changed, 567 insertions, 0 deletions
diff --git a/coregrind/m_syscall.c b/coregrind/m_syscall.c
new file mode 100644
index 0000000..676254a
--- /dev/null
+++ b/coregrind/m_syscall.c
@@ -0,0 +1,567 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Doing syscalls. m_syscall.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2000-2009 Julian Seward
+ jseward@acm.org
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "pub_core_basics.h"
+#include "pub_core_vki.h"
+#include "pub_core_vkiscnums.h"
+#include "pub_core_syscall.h"
+
+/* ---------------------------------------------------------------------
+ Building syscall return values.
+ ------------------------------------------------------------------ */
+
+/* Make a SysRes value from an syscall return value. This is
+ Linux-specific.
+
+ From:
+ http://sources.redhat.com/cgi-bin/cvsweb.cgi/libc/sysdeps/unix/sysv/
+ linux/i386/sysdep.h?
+ rev=1.28&content-type=text/x-cvsweb-markup&cvsroot=glibc
+
+ Linux uses a negative return value to indicate syscall errors,
+ unlike most Unices, which use the condition codes' carry flag.
+
+ Since version 2.1 the return value of a system call might be
+ negative even if the call succeeded. E.g., the 'lseek' system call
+ might return a large offset. Therefore we must not anymore test
+ for < 0, but test for a real error by making sure the value in %eax
+ is a real error number. Linus said he will make sure the no
+ syscall returns a value in -1 .. -4095 as a valid result so we can
+ safely test with -4095.
+*/
+SysRes VG_(mk_SysRes_x86_linux) ( UInt val ) {
+ SysRes res;
+ res.isError = val >= -4095 && val <= -1;
+ if (res.isError) {
+ res.err = -val;
+ res.res = 0;
+ } else {
+ res.err = 0;
+ res.res = val;
+ }
+ return res;
+}
+
+/* Similarly .. */
+SysRes VG_(mk_SysRes_amd64_linux) ( ULong val ) {
+ SysRes res;
+ res.isError = val >= -4095 && val <= -1;
+ if (res.isError) {
+ res.err = -val;
+ res.res = 0;
+ } else {
+ res.err = 0;
+ res.res = val;
+ }
+ return res;
+}
+
+/* PPC uses the CR7.SO bit to flag an error (CR0 in IBM-speak) */
+/* Note this must be in the bottom bit of the second arg */
+SysRes VG_(mk_SysRes_ppc32_linux) ( UInt val, UInt cr0so ) {
+ SysRes res;
+ res.isError = (cr0so & 1) != 0;
+ if (res.isError) {
+ res.err = val;
+ res.res = 0;
+ } else {
+ res.err = 0;
+ res.res = val;
+ }
+ return res;
+}
+
+/* As per ppc32 version, cr0.so must be in l.s.b. of 2nd arg */
+SysRes VG_(mk_SysRes_ppc64_linux) ( ULong val, ULong cr0so ) {
+ SysRes res;
+ res.isError = (cr0so & 1) != 0;
+ if (res.isError) {
+ res.err = val;
+ res.res = 0;
+ } else {
+ res.err = 0;
+ res.res = val;
+ }
+ return res;
+}
+
+/* AIX scheme: we have to record both 'res' (r3) and 'err' (r4). If
+ 'err' is nonzero then the call has failed, but it could still be
+ that AIX userspace will ignore 'err' and instead consult 'res' to
+ determine if the call failed. So we have to record both. */
+SysRes VG_(mk_SysRes_ppc32_aix5) ( UInt res, UInt err ) {
+ SysRes r;
+ r.res = res;
+ r.err = err;
+ r.isError = r.err != 0;
+ return r;
+}
+
+SysRes VG_(mk_SysRes_ppc64_aix5) ( ULong res, ULong err ) {
+ SysRes r;
+ r.res = res;
+ r.err = err;
+ r.isError = r.err != 0;
+ return r;
+}
+
+/* Generic constructors. */
+SysRes VG_(mk_SysRes_Error) ( UWord err ) {
+ SysRes r;
+ r.res = 0;
+ r.err = err;
+ r.isError = True;
+ return r;
+}
+
+SysRes VG_(mk_SysRes_Success) ( UWord res ) {
+ SysRes r;
+ r.res = res;
+ r.err = 0;
+ r.isError = False;
+ return r;
+}
+
+
+/* ---------------------------------------------------------------------
+ A function for doing syscalls.
+ ------------------------------------------------------------------ */
+
+#if defined(VGP_x86_linux)
+/* Incoming args (syscall number + up to 6 args) come on the stack.
+ (ie. the C calling convention).
+
+ The syscall number goes in %eax. The args are passed to the syscall in
+ the regs %ebx, %ecx, %edx, %esi, %edi, %ebp, ie. the kernel's syscall
+ calling convention.
+
+ %eax gets the return value. Not sure which registers the kernel
+ clobbers, so we preserve all the callee-save regs (%esi, %edi, %ebx,
+ %ebp).
+*/
+extern UWord do_syscall_WRK (
+ UWord syscall_no,
+ UWord a1, UWord a2, UWord a3,
+ UWord a4, UWord a5, UWord a6
+ );
+asm(
+".text\n"
+"do_syscall_WRK:\n"
+" push %esi\n"
+" push %edi\n"
+" push %ebx\n"
+" push %ebp\n"
+" movl 16+ 4(%esp),%eax\n"
+" movl 16+ 8(%esp),%ebx\n"
+" movl 16+12(%esp),%ecx\n"
+" movl 16+16(%esp),%edx\n"
+" movl 16+20(%esp),%esi\n"
+" movl 16+24(%esp),%edi\n"
+" movl 16+28(%esp),%ebp\n"
+" int $0x80\n"
+" popl %ebp\n"
+" popl %ebx\n"
+" popl %edi\n"
+" popl %esi\n"
+" ret\n"
+".previous\n"
+);
+
+#elif defined(VGP_amd64_linux)
+/* Incoming args (syscall number + up to 6 args) come in %rdi, %rsi,
+ %rdx, %rcx, %r8, %r9, and the last one on the stack (ie. the C
+ calling convention).
+
+ The syscall number goes in %rax. The args are passed to the syscall in
+ the regs %rdi, %rsi, %rdx, %r10, %r8, %r9 (yes, really %r10, not %rcx),
+ ie. the kernel's syscall calling convention.
+
+ %rax gets the return value. %rcx and %r11 are clobbered by the syscall;
+ no matter, they are caller-save (the syscall clobbers no callee-save
+ regs, so we don't have to do any register saving/restoring).
+*/
+extern UWord do_syscall_WRK (
+ UWord syscall_no,
+ UWord a1, UWord a2, UWord a3,
+ UWord a4, UWord a5, UWord a6
+ );
+asm(
+".text\n"
+"do_syscall_WRK:\n"
+ /* Convert function calling convention --> syscall calling
+ convention */
+" movq %rdi, %rax\n"
+" movq %rsi, %rdi\n"
+" movq %rdx, %rsi\n"
+" movq %rcx, %rdx\n"
+" movq %r8, %r10\n"
+" movq %r9, %r8\n"
+" movq 8(%rsp), %r9\n" /* last arg from stack */
+" syscall\n"
+" ret\n"
+".previous\n"
+);
+
+#elif defined(VGP_ppc32_linux)
+/* Incoming args (syscall number + up to 6 args) come in %r3:%r9.
+
+ The syscall number goes in %r0. The args are passed to the syscall in
+ the regs %r3:%r8, i.e. the kernel's syscall calling convention.
+
+ The %cr0.so bit flags an error.
+ We return the syscall return value in %r3, and the %cr0.so in
+ the lowest bit of %r4.
+ We return a ULong, of which %r3 is the high word, and %r4 the low.
+ No callee-save regs are clobbered, so no saving/restoring is needed.
+*/
+extern ULong do_syscall_WRK (
+ UWord syscall_no,
+ UWord a1, UWord a2, UWord a3,
+ UWord a4, UWord a5, UWord a6
+ );
+asm(
+".text\n"
+"do_syscall_WRK:\n"
+" mr 0,3\n"
+" mr 3,4\n"
+" mr 4,5\n"
+" mr 5,6\n"
+" mr 6,7\n"
+" mr 7,8\n"
+" mr 8,9\n"
+" sc\n" /* syscall: sets %cr0.so on error */
+" mfcr 4\n" /* %cr -> low word of return var */
+" rlwinm 4,4,4,31,31\n" /* rotate flag bit so to lsb, and mask it */
+" blr\n" /* and return */
+".previous\n"
+);
+
+#elif defined(VGP_ppc64_linux)
+/* Due to the need to return 65 bits of result, this is completely
+ different from the ppc32 case. The single arg register points to a
+ 7-word block containing the syscall # and the 6 args. The syscall
+ result proper is put in [0] of the block, and %cr0.so is in the
+ bottom but of [1]. */
+extern void do_syscall_WRK ( ULong* argblock );
+asm(
+".align 2\n"
+".globl do_syscall_WRK\n"
+".section \".opd\",\"aw\"\n"
+".align 3\n"
+"do_syscall_WRK:\n"
+".quad .do_syscall_WRK,.TOC.@tocbase,0\n"
+".previous\n"
+".type .do_syscall_WRK,@function\n"
+".globl .do_syscall_WRK\n"
+".do_syscall_WRK:\n"
+" std 3,-16(1)\n" /* stash arg */
+" ld 8, 48(3)\n" /* sc arg 6 */
+" ld 7, 40(3)\n" /* sc arg 5 */
+" ld 6, 32(3)\n" /* sc arg 4 */
+" ld 5, 24(3)\n" /* sc arg 3 */
+" ld 4, 16(3)\n" /* sc arg 2 */
+" ld 0, 0(3)\n" /* sc number */
+" ld 3, 8(3)\n" /* sc arg 1 */
+" sc\n" /* result in r3 and cr0.so */
+" ld 5,-16(1)\n" /* reacquire argblock ptr (r5 is caller-save) */
+" std 3,0(5)\n" /* argblock[0] = r3 */
+" mfcr 3\n"
+" srwi 3,3,28\n"
+" andi. 3,3,1\n"
+" std 3,8(5)\n" /* argblock[1] = cr0.s0 & 1 */
+" blr\n"
+);
+
+#elif defined(VGP_ppc32_aix5)
+static void do_syscall_WRK ( UWord* res_r3, UWord* res_r4,
+ UWord sysno,
+ UWord a1, UWord a2, UWord a3,
+ UWord a4, UWord a5, UWord a6,
+ UWord a7, UWord a8 )
+{
+ /* Syscalls on AIX are very similar to function calls:
+ - up to 8 args in r3-r10
+ - syscall number in r2
+ - kernel resumes at 'lr', so must set it appropriately beforehand
+ - r3 holds the result and r4 any applicable error code
+ See http://www.cs.utexas.edu/users/cart/publications/tr00-04.ps
+ and also 'man truss'.
+ */
+ /* For some reason gcc-3.3.2 doesn't preserve r31 across the asm
+ even though we state it to be trashed. So use r27 instead. */
+ UWord args[9];
+ args[0] = sysno;
+ args[1] = a1; args[2] = a2;
+ args[3] = a3; args[4] = a4;
+ args[5] = a5; args[6] = a6;
+ args[7] = a7; args[8] = a8;
+
+ __asm__ __volatile__(
+
+ // establish base ptr
+ "mr 28,%0\n\t"
+
+ // save r2, lr
+ "mr 27,2\n\t" // save r2 in r27
+ "mflr 30\n\t" // save lr in r30
+
+ // set syscall number and args
+ "lwz 2, 0(28)\n\t"
+ "lwz 3, 4(28)\n\t"
+ "lwz 4, 8(28)\n\t"
+ "lwz 5, 12(28)\n\t"
+ "lwz 6, 16(28)\n\t"
+ "lwz 7, 20(28)\n\t"
+ "lwz 8, 24(28)\n\t"
+ "lwz 9, 28(28)\n\t"
+ "lwz 10, 32(28)\n\t"
+
+ // set bit 3 of CR1 otherwise AIX 5.1 returns to the
+ // wrong address after the sc instruction
+ "crorc 6,6,6\n\t"
+
+ // set up LR to point just after the sc insn
+ ".long 0x48000005\n\t" // "bl here+4" -- lr := & next insn
+ "mflr 29\n\t"
+ "addi 29,29,16\n\t"
+ "mtlr 29\n\t"
+
+ // do it!
+ "sc\n\t"
+
+ // result is now in r3; save it in args[0]
+ "stw 3,0(28)\n\t"
+ // error code in r4; save it in args[1]
+ "stw 4,4(28)\n\t"
+
+ // restore
+ "mr 2,27\n\t"
+ "mtlr 30\n\t"
+
+ : /*out*/
+ : /*in*/ "b" (&args[0])
+ : /*trash*/
+ /*temps*/ "r31","r30","r29","r28","r27",
+ /*args*/ "r3","r4","r5","r6","r7","r8","r9","r10",
+ /*paranoia*/ "memory","cc","r0","r1","r11","r12","r13",
+ "xer","ctr","cr0","cr1","cr2","cr3",
+ "cr4","cr5","cr6","cr7"
+ );
+
+ *res_r3 = args[0];
+ *res_r4 = args[1];
+}
+
+#elif defined(VGP_ppc64_aix5)
+static void do_syscall_WRK ( UWord* res_r3, UWord* res_r4,
+ UWord sysno,
+ UWord a1, UWord a2, UWord a3,
+ UWord a4, UWord a5, UWord a6,
+ UWord a7, UWord a8 )
+{
+ /* Same scheme as ppc32-aix5. */
+ UWord args[9];
+ args[0] = sysno;
+ args[1] = a1; args[2] = a2;
+ args[3] = a3; args[4] = a4;
+ args[5] = a5; args[6] = a6;
+ args[7] = a7; args[8] = a8;
+
+ __asm__ __volatile__(
+
+ // establish base ptr
+ "mr 28,%0\n\t"
+
+ // save r2, lr
+ "mr 27,2\n\t" // save r2 in r27
+ "mflr 30\n\t" // save lr in r30
+
+ // set syscall number and args
+ "ld 2, 0(28)\n\t"
+ "ld 3, 8(28)\n\t"
+ "ld 4, 16(28)\n\t"
+ "ld 5, 24(28)\n\t"
+ "ld 6, 32(28)\n\t"
+ "ld 7, 40(28)\n\t"
+ "ld 8, 48(28)\n\t"
+ "ld 9, 56(28)\n\t"
+ "ld 10, 64(28)\n\t"
+
+ // set bit 3 of CR1 otherwise AIX 5.1 returns to the
+ // wrong address after the sc instruction
+ "crorc 6,6,6\n\t"
+
+ // set up LR to point just after the sc insn
+ ".long 0x48000005\n\t" // "bl here+4" -- lr := & next insn
+ "mflr 29\n\t"
+ "addi 29,29,16\n\t"
+ "mtlr 29\n\t"
+
+ // do it!
+ "sc\n\t"
+
+ // result is now in r3; save it in args[0]
+ "std 3,0(28)\n\t"
+ // error code in r4; save it in args[1]
+ "std 4,8(28)\n\t"
+
+ // restore
+ "mr 2,27\n\t"
+ "mtlr 30\n\t"
+
+ : /*out*/
+ : /*in*/ "b" (&args[0])
+ : /*trash*/
+ /*temps*/ "r31","r30","r29","r28","r27",
+ /*args*/ "r3","r4","r5","r6","r7","r8","r9","r10",
+ /*paranoia*/ "memory","cc","r0","r1","r11","r12","r13",
+ "xer","ctr","cr0","cr1","cr2","cr3",
+ "cr4","cr5","cr6","cr7"
+ );
+
+ *res_r3 = args[0];
+ *res_r4 = args[1];
+}
+
+#else
+# error Unknown platform
+#endif
+
+
+SysRes VG_(do_syscall) ( UWord sysno, UWord a1, UWord a2, UWord a3,
+ UWord a4, UWord a5, UWord a6,
+ UWord a7, UWord a8 )
+{
+#if defined(VGP_x86_linux)
+ UWord val = do_syscall_WRK(sysno,a1,a2,a3,a4,a5,a6);
+ return VG_(mk_SysRes_x86_linux)( val );
+
+#elif defined(VGP_amd64_linux)
+ UWord val = do_syscall_WRK(sysno,a1,a2,a3,a4,a5,a6);
+ return VG_(mk_SysRes_amd64_linux)( val );
+
+#elif defined(VGP_ppc32_linux)
+ ULong ret = do_syscall_WRK(sysno,a1,a2,a3,a4,a5,a6);
+ UInt val = (UInt)(ret>>32);
+ UInt cr0so = (UInt)(ret);
+ return VG_(mk_SysRes_ppc32_linux)( val, cr0so );
+
+#elif defined(VGP_ppc64_linux)
+ ULong argblock[7];
+ argblock[0] = sysno;
+ argblock[1] = a1;
+ argblock[2] = a2;
+ argblock[3] = a3;
+ argblock[4] = a4;
+ argblock[5] = a5;
+ argblock[6] = a6;
+ do_syscall_WRK( &argblock[0] );
+ return VG_(mk_SysRes_ppc64_linux)( argblock[0], argblock[1] );
+
+#elif defined(VGP_ppc32_aix5)
+ UWord res;
+ UWord err;
+ do_syscall_WRK( &res, &err,
+ sysno, a1, a2, a3, a4, a5, a6, a7, a8);
+ /* Try to set the error number to zero if the syscall hasn't
+ really failed. */
+ if (sysno == __NR_AIX5_kread
+ || sysno == __NR_AIX5_kwrite) {
+ if (res != (UWord)-1L)
+ err = 0;
+ }
+ else if (sysno == __NR_AIX5_sigprocmask
+ || sysno == __NR_AIX5__sigpending) {
+ if (res == 0)
+ err = 0;
+ }
+
+ return VG_(mk_SysRes_ppc32_aix5)( res, err );
+
+#elif defined(VGP_ppc64_aix5)
+ UWord res;
+ UWord err;
+ do_syscall_WRK( &res, &err,
+ sysno, a1, a2, a3, a4, a5, a6, a7, a8);
+ /* Try to set the error number to zero if the syscall hasn't
+ really failed. */
+ if (sysno == __NR_AIX5_kread
+ || sysno == __NR_AIX5_kwrite) {
+ if (res != (UWord)-1L)
+ err = 0;
+ }
+ else if (sysno == __NR_AIX5_sigprocmask
+ || sysno == __NR_AIX5__sigpending) {
+ if (res == 0)
+ err = 0;
+ }
+
+ return VG_(mk_SysRes_ppc64_aix5)( res, err );
+
+#else
+# error Unknown platform
+#endif
+}
+
+/* ---------------------------------------------------------------------
+ Names of errors.
+ ------------------------------------------------------------------ */
+
+/* Return a string which gives the name of an error value. Note,
+ unlike the standard C syserror fn, the returned string is not
+ malloc-allocated or writable -- treat it as a constant.
+ TODO: implement this properly. */
+
+const HChar* VG_(strerror) ( UWord errnum )
+{
+ switch (errnum) {
+ case VKI_EPERM: return "Operation not permitted";
+ case VKI_ENOENT: return "No such file or directory";
+ case VKI_ESRCH: return "No such process";
+ case VKI_EINTR: return "Interrupted system call";
+ case VKI_EBADF: return "Bad file number";
+ case VKI_EAGAIN: return "Try again";
+ case VKI_ENOMEM: return "Out of memory";
+ case VKI_EACCES: return "Permission denied";
+ case VKI_EFAULT: return "Bad address";
+ case VKI_EEXIST: return "File exists";
+ case VKI_EINVAL: return "Invalid argument";
+ case VKI_EMFILE: return "Too many open files";
+ case VKI_ENOSYS: return "Function not implemented";
+ case VKI_EOVERFLOW: return "Value too large for defined data type";
+ case VKI_ERESTARTSYS: return "ERESTARTSYS";
+ default: return "VG_(strerror): unknown error";
+ }
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end ---*/
+/*--------------------------------------------------------------------*/