summaryrefslogtreecommitdiff
path: root/coregrind/m_syswrap/.svn/text-base/syswrap-main.c.svn-base
diff options
context:
space:
mode:
Diffstat (limited to 'coregrind/m_syswrap/.svn/text-base/syswrap-main.c.svn-base')
-rw-r--r--coregrind/m_syswrap/.svn/text-base/syswrap-main.c.svn-base1485
1 files changed, 1485 insertions, 0 deletions
diff --git a/coregrind/m_syswrap/.svn/text-base/syswrap-main.c.svn-base b/coregrind/m_syswrap/.svn/text-base/syswrap-main.c.svn-base
new file mode 100644
index 0000000..63a03f6
--- /dev/null
+++ b/coregrind/m_syswrap/.svn/text-base/syswrap-main.c.svn-base
@@ -0,0 +1,1485 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Handle system calls. syswrap-main.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2000-2009 Julian Seward
+ jseward@acm.org
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "libvex_guest_offsets.h"
+#include "pub_core_basics.h"
+#include "pub_core_aspacemgr.h"
+#include "pub_core_vki.h"
+#include "pub_core_vkiscnums.h"
+#include "pub_core_threadstate.h"
+#include "pub_core_libcbase.h"
+#include "pub_core_libcassert.h"
+#include "pub_core_libcprint.h"
+#include "pub_core_libcproc.h" // For VG_(getpid)()
+#include "pub_core_libcsignal.h"
+#include "pub_core_scheduler.h" // For VG_({acquire,release}_BigLock),
+ // and VG_(vg_yield)
+#include "pub_core_stacktrace.h" // For VG_(get_and_pp_StackTrace)()
+#include "pub_core_tooliface.h"
+#include "pub_core_options.h"
+#include "pub_core_signals.h" // For VG_SIGVGKILL, VG_(poll_signals)
+#include "pub_core_syscall.h"
+#include "pub_core_machine.h"
+#include "pub_core_syswrap.h"
+
+#include "priv_types_n_macros.h"
+#include "priv_syswrap-main.h"
+
+
+/* Useful info which needs to be recorded somewhere:
+ Use of registers in syscalls is:
+
+ NUM ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8 RESULT
+ LINUX:
+ x86 eax ebx ecx edx esi edi ebp n/a n/a eax (== NUM)
+ amd64 rax rdi rsi rdx r10 r8 r9 n/a n/a rax (== NUM)
+ ppc32 r0 r3 r4 r5 r6 r7 r8 n/a n/a r3+CR0.SO (== ARG1)
+ ppc64 r0 r3 r4 r5 r6 r7 r8 n/a n/a r3+CR0.SO (== ARG1)
+ AIX:
+ ppc32 r2 r3 r4 r5 r6 r7 r8 r9 r10 r3(res),r4(err)
+ ppc64 r2 r3 r4 r5 r6 r7 r8 r9 r10 r3(res),r4(err)
+*/
+
+/* This is the top level of the system-call handler module. All
+ system calls are channelled through here, doing two things:
+
+ * notify the tool of the events (mem/reg reads, writes) happening
+
+ * perform the syscall, usually by passing it along to the kernel
+ unmodified.
+
+ A magical piece of assembly code, do_syscall_for_client_WRK, in
+ syscall-$PLATFORM.S does the tricky bit of passing a syscall to the
+ kernel, whilst having the simulator retain control.
+*/
+
+/* The main function is VG_(client_syscall). The simulation calls it
+ whenever a client thread wants to do a syscall. The following is a
+ sketch of what it does.
+
+ * Ensures the root thread's stack is suitably mapped. Tedious and
+ arcane. See big big comment in VG_(client_syscall).
+
+ * First, it rounds up the syscall number and args (which is a
+ platform dependent activity) and puts them in a struct ("args")
+ and also a copy in "orig_args".
+
+ The pre/post wrappers refer to these structs and so no longer
+ need magic macros to access any specific registers. This struct
+ is stored in thread-specific storage.
+
+
+ * The pre-wrapper is called, passing it a pointer to struct
+ "args".
+
+
+ * The pre-wrapper examines the args and pokes the tool
+ appropriately. It may modify the args; this is why "orig_args"
+ is also stored.
+
+ The pre-wrapper may choose to 'do' the syscall itself, and
+ concludes one of three outcomes:
+
+ Success(N) -- syscall is already complete, with success;
+ result is N
+
+ Fail(N) -- syscall is already complete, with failure;
+ error code is N
+
+ HandToKernel -- (the usual case): this needs to be given to
+ the kernel to be done, using the values in
+ the possibly-modified "args" struct.
+
+ In addition, the pre-wrapper may set some flags:
+
+ MayBlock -- only applicable when outcome==HandToKernel
+
+ PostOnFail -- only applicable when outcome==HandToKernel or Fail
+
+
+ * If the pre-outcome is HandToKernel, the syscall is duly handed
+ off to the kernel (perhaps involving some thread switchery, but
+ that's not important). This reduces the possible set of outcomes
+ to either Success(N) or Fail(N).
+
+
+ * The outcome (Success(N) or Fail(N)) is written back to the guest
+ register(s). This is platform specific:
+
+ x86: Success(N) ==> eax = N
+ Fail(N) ==> eax = -N
+
+ ditto amd64
+
+ ppc32: Success(N) ==> r3 = N, CR0.SO = 0
+ Fail(N) ==> r3 = N, CR0.SO = 1
+
+ * The post wrapper is called if:
+
+ - it exists, and
+ - outcome==Success or (outcome==Fail and PostOnFail is set)
+
+ The post wrapper is passed the adulterated syscall args (struct
+ "args"), and the syscall outcome (viz, Success(N) or Fail(N)).
+
+ There are several other complications, primarily to do with
+ syscalls getting interrupted, explained in comments in the code.
+*/
+
+/* CAVEATS for writing wrappers. It is important to follow these!
+
+ The macros defined in priv_types_n_macros.h are designed to help
+ decouple the wrapper logic from the actual representation of
+ syscall args/results, since these wrappers are designed to work on
+ multiple platforms.
+
+ Sometimes a PRE wrapper will complete the syscall itself, without
+ handing it to the kernel. It will use one of SET_STATUS_Success,
+ SET_STATUS_Failure or SET_STATUS_from_SysRes to set the return
+ value. It is critical to appreciate that use of the macro does not
+ immediately cause the underlying guest state to be updated -- that
+ is done by the driver logic in this file, when the wrapper returns.
+
+ As a result, PRE wrappers of the following form will malfunction:
+
+ PRE(fooble)
+ {
+ ... do stuff ...
+ SET_STATUS_Somehow(...)
+
+ // do something that assumes guest state is up to date
+ }
+
+ In particular, direct or indirect calls to VG_(poll_signals) after
+ setting STATUS can cause the guest state to be read (in order to
+ build signal frames). Do not do this. If you want a signal poll
+ after the syscall goes through, do "*flags |= SfPollAfter" and the
+ driver logic will do it for you.
+
+ -----------
+
+ Another critical requirement following introduction of new address
+ space manager (JRS, 20050923):
+
+ In a situation where the mappedness of memory has changed, aspacem
+ should be notified BEFORE the tool. Hence the following is
+ correct:
+
+ Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start);
+ VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start );
+ if (d)
+ VG_(discard_translations)(s->start, s->end+1 - s->start);
+
+ whilst this is wrong:
+
+ VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start );
+ Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start);
+ if (d)
+ VG_(discard_translations)(s->start, s->end+1 - s->start);
+
+ The reason is that the tool may itself ask aspacem for more shadow
+ memory as a result of the VG_TRACK call. In such a situation it is
+ critical that aspacem's segment array is up to date -- hence the
+ need to notify aspacem first.
+
+ -----------
+
+ Also .. take care to call VG_(discard_translations) whenever
+ memory with execute permissions is unmapped.
+*/
+
+
+/* ---------------------------------------------------------------------
+ Do potentially blocking syscall for the client, and mess with
+ signal masks at the same time.
+ ------------------------------------------------------------------ */
+
+/* Perform a syscall on behalf of a client thread, using a specific
+ signal mask. On completion, the signal mask is set to restore_mask
+ (which presumably blocks almost everything). If a signal happens
+ during the syscall, the handler should call
+ VG_(fixup_guest_state_after_syscall_interrupted) to adjust the
+ thread's context to do the right thing.
+
+ The _WRK function is handwritten assembly, implemented per-platform
+ in coregrind/m_syswrap/syscall-$PLAT.S. It has some very magic
+ properties. See comments at the top of
+ VG_(fixup_guest_state_after_syscall_interrupted) below for details.
+*/
+extern
+UWord ML_(do_syscall_for_client_WRK)( Word syscallno,
+ void* guest_state,
+ const vki_sigset_t *syscall_mask,
+ const vki_sigset_t *restore_mask,
+ Word nsigwords
+# if defined(VGO_aix5)
+ , Word __nr_sigprocmask
+# endif
+ );
+
+static
+void do_syscall_for_client ( Int syscallno,
+ ThreadState* tst,
+ const vki_sigset_t* syscall_mask )
+{
+ vki_sigset_t saved;
+ UWord err
+ = ML_(do_syscall_for_client_WRK)(
+ syscallno, &tst->arch.vex,
+ syscall_mask, &saved, _VKI_NSIG_WORDS * sizeof(UWord)
+# if defined(VGO_aix5)
+ , __NR_rt_sigprocmask
+# endif
+ );
+ vg_assert2(
+ err == 0,
+ "ML_(do_syscall_for_client_WRK): sigprocmask error %d",
+ (Int)(err & 0xFFF)
+ );
+}
+
+
+
+/* ---------------------------------------------------------------------
+ Impedance matchers and misc helpers
+ ------------------------------------------------------------------ */
+
+static
+Bool eq_SyscallArgs ( SyscallArgs* a1, SyscallArgs* a2 )
+{
+ return a1->sysno == a2->sysno
+ && a1->arg1 == a2->arg1
+ && a1->arg2 == a2->arg2
+ && a1->arg3 == a2->arg3
+ && a1->arg4 == a2->arg4
+ && a1->arg5 == a2->arg5
+ && a1->arg6 == a2->arg6
+ && a1->arg7 == a2->arg7
+ && a1->arg8 == a2->arg8;
+}
+
+static
+Bool eq_SyscallStatus ( SyscallStatus* s1, SyscallStatus* s2 )
+{
+ return s1->what == s2->what
+ && s1->sres.res == s2->sres.res
+ && s1->sres.err == s2->sres.err;
+}
+
+
+/* Convert between SysRes and SyscallStatus, to the extent possible. */
+
+static
+SyscallStatus convert_SysRes_to_SyscallStatus ( SysRes res )
+{
+ SyscallStatus status;
+ status.what = SsComplete;
+ status.sres = res;
+ return status;
+}
+
+
+/* Impedance matchers. These convert syscall arg or result data from
+ the platform-specific in-guest-state format to the canonical
+ formats, and back. */
+
+static
+void getSyscallArgsFromGuestState ( /*OUT*/SyscallArgs* canonical,
+ /*IN*/ VexGuestArchState* gst_vanilla )
+{
+#if defined(VGP_x86_linux)
+ VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
+ canonical->sysno = gst->guest_EAX;
+ canonical->arg1 = gst->guest_EBX;
+ canonical->arg2 = gst->guest_ECX;
+ canonical->arg3 = gst->guest_EDX;
+ canonical->arg4 = gst->guest_ESI;
+ canonical->arg5 = gst->guest_EDI;
+ canonical->arg6 = gst->guest_EBP;
+ canonical->arg7 = 0;
+ canonical->arg8 = 0;
+
+#elif defined(VGP_amd64_linux)
+ VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
+ canonical->sysno = gst->guest_RAX;
+ canonical->arg1 = gst->guest_RDI;
+ canonical->arg2 = gst->guest_RSI;
+ canonical->arg3 = gst->guest_RDX;
+ canonical->arg4 = gst->guest_R10;
+ canonical->arg5 = gst->guest_R8;
+ canonical->arg6 = gst->guest_R9;
+ canonical->arg7 = 0;
+ canonical->arg8 = 0;
+
+
+#elif defined(VGP_ppc32_linux)
+ VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
+ canonical->sysno = gst->guest_GPR0;
+ canonical->arg1 = gst->guest_GPR3;
+ canonical->arg2 = gst->guest_GPR4;
+ canonical->arg3 = gst->guest_GPR5;
+ canonical->arg4 = gst->guest_GPR6;
+ canonical->arg5 = gst->guest_GPR7;
+ canonical->arg6 = gst->guest_GPR8;
+ canonical->arg7 = 0;
+ canonical->arg8 = 0;
+
+
+#elif defined(VGP_ppc64_linux)
+ VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
+ canonical->sysno = gst->guest_GPR0;
+ canonical->arg1 = gst->guest_GPR3;
+ canonical->arg2 = gst->guest_GPR4;
+ canonical->arg3 = gst->guest_GPR5;
+ canonical->arg4 = gst->guest_GPR6;
+ canonical->arg5 = gst->guest_GPR7;
+ canonical->arg6 = gst->guest_GPR8;
+ canonical->arg7 = 0;
+ canonical->arg8 = 0;
+
+
+#elif defined(VGP_ppc32_aix5)
+ VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
+ canonical->sysno = gst->guest_GPR2;
+ canonical->arg1 = gst->guest_GPR3;
+ canonical->arg2 = gst->guest_GPR4;
+ canonical->arg3 = gst->guest_GPR5;
+ canonical->arg4 = gst->guest_GPR6;
+ canonical->arg5 = gst->guest_GPR7;
+ canonical->arg6 = gst->guest_GPR8;
+ canonical->arg7 = gst->guest_GPR9;
+ canonical->arg8 = gst->guest_GPR10;
+
+#elif defined(VGP_ppc64_aix5)
+ VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
+ canonical->sysno = gst->guest_GPR2;
+ canonical->arg1 = gst->guest_GPR3;
+ canonical->arg2 = gst->guest_GPR4;
+ canonical->arg3 = gst->guest_GPR5;
+ canonical->arg4 = gst->guest_GPR6;
+ canonical->arg5 = gst->guest_GPR7;
+ canonical->arg6 = gst->guest_GPR8;
+ canonical->arg7 = gst->guest_GPR9;
+ canonical->arg8 = gst->guest_GPR10;
+
+#else
+# error "getSyscallArgsFromGuestState: unknown arch"
+#endif
+}
+
+static
+void putSyscallArgsIntoGuestState ( /*IN*/ SyscallArgs* canonical,
+ /*OUT*/VexGuestArchState* gst_vanilla )
+{
+#if defined(VGP_x86_linux)
+ VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
+ gst->guest_EAX = canonical->sysno;
+ gst->guest_EBX = canonical->arg1;
+ gst->guest_ECX = canonical->arg2;
+ gst->guest_EDX = canonical->arg3;
+ gst->guest_ESI = canonical->arg4;
+ gst->guest_EDI = canonical->arg5;
+ gst->guest_EBP = canonical->arg6;
+
+#elif defined(VGP_amd64_linux)
+ VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
+ gst->guest_RAX = canonical->sysno;
+ gst->guest_RDI = canonical->arg1;
+ gst->guest_RSI = canonical->arg2;
+ gst->guest_RDX = canonical->arg3;
+ gst->guest_R10 = canonical->arg4;
+ gst->guest_R8 = canonical->arg5;
+ gst->guest_R9 = canonical->arg6;
+
+#elif defined(VGP_ppc32_linux)
+ VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
+ gst->guest_GPR0 = canonical->sysno;
+ gst->guest_GPR3 = canonical->arg1;
+ gst->guest_GPR4 = canonical->arg2;
+ gst->guest_GPR5 = canonical->arg3;
+ gst->guest_GPR6 = canonical->arg4;
+ gst->guest_GPR7 = canonical->arg5;
+ gst->guest_GPR8 = canonical->arg6;
+
+#elif defined(VGP_ppc64_linux)
+ VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
+ gst->guest_GPR0 = canonical->sysno;
+ gst->guest_GPR3 = canonical->arg1;
+ gst->guest_GPR4 = canonical->arg2;
+ gst->guest_GPR5 = canonical->arg3;
+ gst->guest_GPR6 = canonical->arg4;
+ gst->guest_GPR7 = canonical->arg5;
+ gst->guest_GPR8 = canonical->arg6;
+
+#elif defined(VGP_ppc32_aix5)
+ VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
+ gst->guest_GPR2 = canonical->sysno;
+ gst->guest_GPR3 = canonical->arg1;
+ gst->guest_GPR4 = canonical->arg2;
+ gst->guest_GPR5 = canonical->arg3;
+ gst->guest_GPR6 = canonical->arg4;
+ gst->guest_GPR7 = canonical->arg5;
+ gst->guest_GPR8 = canonical->arg6;
+ gst->guest_GPR9 = canonical->arg7;
+ gst->guest_GPR10 = canonical->arg8;
+
+#elif defined(VGP_ppc64_aix5)
+ VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
+ gst->guest_GPR2 = canonical->sysno;
+ gst->guest_GPR3 = canonical->arg1;
+ gst->guest_GPR4 = canonical->arg2;
+ gst->guest_GPR5 = canonical->arg3;
+ gst->guest_GPR6 = canonical->arg4;
+ gst->guest_GPR7 = canonical->arg5;
+ gst->guest_GPR8 = canonical->arg6;
+ gst->guest_GPR9 = canonical->arg7;
+ gst->guest_GPR10 = canonical->arg8;
+
+#else
+# error "putSyscallArgsIntoGuestState: unknown arch"
+#endif
+}
+
+static
+void getSyscallStatusFromGuestState ( /*OUT*/SyscallStatus* canonical,
+ /*IN*/ VexGuestArchState* gst_vanilla )
+{
+# if defined(VGP_x86_linux)
+ VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
+ canonical->sres = VG_(mk_SysRes_x86_linux)( gst->guest_EAX );
+ canonical->what = SsComplete;
+
+# elif defined(VGP_amd64_linux)
+ VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
+ canonical->sres = VG_(mk_SysRes_amd64_linux)( gst->guest_RAX );
+ canonical->what = SsComplete;
+
+# elif defined(VGP_ppc32_linux)
+ VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
+ UInt cr = LibVEX_GuestPPC32_get_CR( gst );
+ UInt cr0so = (cr >> 28) & 1;
+ canonical->sres = VG_(mk_SysRes_ppc32_linux)( gst->guest_GPR3, cr0so );
+ canonical->what = SsComplete;
+
+# elif defined(VGP_ppc64_linux)
+ VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
+ UInt cr = LibVEX_GuestPPC64_get_CR( gst );
+ UInt cr0so = (cr >> 28) & 1;
+ canonical->sres = VG_(mk_SysRes_ppc64_linux)( gst->guest_GPR3, cr0so );
+ canonical->what = SsComplete;
+
+# elif defined(VGP_ppc32_aix5)
+ VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
+ canonical->sres = VG_(mk_SysRes_ppc32_aix5)( gst->guest_GPR3,
+ gst->guest_GPR4 );
+ canonical->what = SsComplete;
+
+# elif defined(VGP_ppc64_aix5)
+ VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
+ canonical->sres = VG_(mk_SysRes_ppc64_aix5)( gst->guest_GPR3,
+ gst->guest_GPR4 );
+ canonical->what = SsComplete;
+
+# else
+# error "getSyscallStatusFromGuestState: unknown arch"
+# endif
+}
+
+static
+void putSyscallStatusIntoGuestState ( /*IN*/ SyscallStatus* canonical,
+ /*OUT*/VexGuestArchState* gst_vanilla )
+{
+# if defined(VGP_x86_linux)
+ VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
+ vg_assert(canonical->what == SsComplete);
+ if (canonical->sres.isError) {
+ /* This isn't exactly right, in that really a Failure with res
+ not in the range 1 .. 4095 is unrepresentable in the
+ Linux-x86 scheme. Oh well. */
+ gst->guest_EAX = - (Int)canonical->sres.err;
+ } else {
+ gst->guest_EAX = canonical->sres.res;
+ }
+
+# elif defined(VGP_amd64_linux)
+ VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
+ vg_assert(canonical->what == SsComplete);
+ if (canonical->sres.isError) {
+ /* This isn't exactly right, in that really a Failure with res
+ not in the range 1 .. 4095 is unrepresentable in the
+ Linux-x86 scheme. Oh well. */
+ gst->guest_RAX = - (Long)canonical->sres.err;
+ } else {
+ gst->guest_RAX = canonical->sres.res;
+ }
+
+# elif defined(VGP_ppc32_linux)
+ VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
+ UInt old_cr = LibVEX_GuestPPC32_get_CR(gst);
+ vg_assert(canonical->what == SsComplete);
+ if (canonical->sres.isError) {
+ /* set CR0.SO */
+ LibVEX_GuestPPC32_put_CR( old_cr | (1<<28), gst );
+ gst->guest_GPR3 = canonical->sres.err;
+ } else {
+ /* clear CR0.SO */
+ LibVEX_GuestPPC32_put_CR( old_cr & ~(1<<28), gst );
+ gst->guest_GPR3 = canonical->sres.res;
+ }
+
+# elif defined(VGP_ppc64_linux)
+ VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
+ UInt old_cr = LibVEX_GuestPPC64_get_CR(gst);
+ vg_assert(canonical->what == SsComplete);
+ if (canonical->sres.isError) {
+ /* set CR0.SO */
+ LibVEX_GuestPPC64_put_CR( old_cr | (1<<28), gst );
+ gst->guest_GPR3 = canonical->sres.err;
+ } else {
+ /* clear CR0.SO */
+ LibVEX_GuestPPC64_put_CR( old_cr & ~(1<<28), gst );
+ gst->guest_GPR3 = canonical->sres.res;
+ }
+
+# elif defined(VGP_ppc32_aix5)
+ VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
+ vg_assert(canonical->what == SsComplete);
+ gst->guest_GPR3 = canonical->sres.res;
+ gst->guest_GPR4 = canonical->sres.err;
+
+# elif defined(VGP_ppc64_aix5)
+ VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
+ vg_assert(canonical->what == SsComplete);
+ gst->guest_GPR3 = canonical->sres.res;
+ gst->guest_GPR4 = canonical->sres.err;
+
+# else
+# error "putSyscallStatusIntoGuestState: unknown arch"
+# endif
+}
+
+
+/* Tell me the offsets in the guest state of the syscall params, so
+ that the scalar argument checkers don't have to have this info
+ hardwired. */
+
+static
+void getSyscallArgLayout ( /*OUT*/SyscallArgLayout* layout )
+{
+#if defined(VGP_x86_linux)
+ layout->o_sysno = OFFSET_x86_EAX;
+ layout->o_arg1 = OFFSET_x86_EBX;
+ layout->o_arg2 = OFFSET_x86_ECX;
+ layout->o_arg3 = OFFSET_x86_EDX;
+ layout->o_arg4 = OFFSET_x86_ESI;
+ layout->o_arg5 = OFFSET_x86_EDI;
+ layout->o_arg6 = OFFSET_x86_EBP;
+ layout->o_arg7 = -1; /* impossible value */
+ layout->o_arg8 = -1; /* impossible value */
+ layout->o_retval = OFFSET_x86_EAX;
+
+#elif defined(VGP_amd64_linux)
+ layout->o_sysno = OFFSET_amd64_RAX;
+ layout->o_arg1 = OFFSET_amd64_RDI;
+ layout->o_arg2 = OFFSET_amd64_RSI;
+ layout->o_arg3 = OFFSET_amd64_RDX;
+ layout->o_arg4 = OFFSET_amd64_R10;
+ layout->o_arg5 = OFFSET_amd64_R8;
+ layout->o_arg6 = OFFSET_amd64_R9;
+ layout->o_arg7 = -1; /* impossible value */
+ layout->o_arg8 = -1; /* impossible value */
+ layout->o_retval = OFFSET_amd64_RAX;
+
+#elif defined(VGP_ppc32_linux)
+ layout->o_sysno = OFFSET_ppc32_GPR0;
+ layout->o_arg1 = OFFSET_ppc32_GPR3;
+ layout->o_arg2 = OFFSET_ppc32_GPR4;
+ layout->o_arg3 = OFFSET_ppc32_GPR5;
+ layout->o_arg4 = OFFSET_ppc32_GPR6;
+ layout->o_arg5 = OFFSET_ppc32_GPR7;
+ layout->o_arg6 = OFFSET_ppc32_GPR8;
+ layout->o_arg7 = -1; /* impossible value */
+ layout->o_arg8 = -1; /* impossible value */
+ layout->o_retval = OFFSET_ppc32_GPR3;
+
+#elif defined(VGP_ppc64_linux)
+ layout->o_sysno = OFFSET_ppc64_GPR0;
+ layout->o_arg1 = OFFSET_ppc64_GPR3;
+ layout->o_arg2 = OFFSET_ppc64_GPR4;
+ layout->o_arg3 = OFFSET_ppc64_GPR5;
+ layout->o_arg4 = OFFSET_ppc64_GPR6;
+ layout->o_arg5 = OFFSET_ppc64_GPR7;
+ layout->o_arg6 = OFFSET_ppc64_GPR8;
+ layout->o_arg7 = -1; /* impossible value */
+ layout->o_arg8 = -1; /* impossible value */
+ layout->o_retval = OFFSET_ppc64_GPR3;
+
+#elif defined(VGP_ppc32_aix5)
+ layout->o_sysno = OFFSET_ppc32_GPR2;
+ layout->o_arg1 = OFFSET_ppc32_GPR3;
+ layout->o_arg2 = OFFSET_ppc32_GPR4;
+ layout->o_arg3 = OFFSET_ppc32_GPR5;
+ layout->o_arg4 = OFFSET_ppc32_GPR6;
+ layout->o_arg5 = OFFSET_ppc32_GPR7;
+ layout->o_arg6 = OFFSET_ppc32_GPR8;
+ layout->o_arg7 = OFFSET_ppc32_GPR9;
+ layout->o_arg8 = OFFSET_ppc32_GPR10;
+ layout->o_retval = OFFSET_ppc32_GPR3;
+
+#elif defined(VGP_ppc64_aix5)
+ layout->o_sysno = OFFSET_ppc64_GPR2;
+ layout->o_arg1 = OFFSET_ppc64_GPR3;
+ layout->o_arg2 = OFFSET_ppc64_GPR4;
+ layout->o_arg3 = OFFSET_ppc64_GPR5;
+ layout->o_arg4 = OFFSET_ppc64_GPR6;
+ layout->o_arg5 = OFFSET_ppc64_GPR7;
+ layout->o_arg6 = OFFSET_ppc64_GPR8;
+ layout->o_arg7 = OFFSET_ppc64_GPR9;
+ layout->o_arg8 = OFFSET_ppc64_GPR10;
+ layout->o_retval = OFFSET_ppc64_GPR3;
+
+#else
+# error "getSyscallLayout: unknown arch"
+#endif
+}
+
+
+/* ---------------------------------------------------------------------
+ The main driver logic
+ ------------------------------------------------------------------ */
+
+/* Finding the handlers for a given syscall, or faking up one
+ when no handler is found. */
+
+static
+void bad_before ( ThreadId tid,
+ SyscallArgLayout* layout,
+ /*MOD*/SyscallArgs* args,
+ /*OUT*/SyscallStatus* status,
+ /*OUT*/UWord* flags )
+{
+ VG_(message)
+ (Vg_DebugMsg,"WARNING: unhandled syscall: %llu", (ULong)args->sysno);
+# if defined(VGO_aix5)
+ VG_(message)
+ (Vg_DebugMsg," name of syscall: \"%s\"",
+ VG_(aix5_sysno_to_sysname)(args->sysno));
+# endif
+ if (VG_(clo_verbosity) > 1) {
+ VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
+ }
+ VG_(message)
+ (Vg_DebugMsg,"You may be able to write your own handler.");
+ VG_(message)
+ (Vg_DebugMsg,"Read the file README_MISSING_SYSCALL_OR_IOCTL.");
+ VG_(message)
+ (Vg_DebugMsg,"Nevertheless we consider this a bug. Please report");
+ VG_(message)
+ (Vg_DebugMsg,"it at http://valgrind.org/support/bug_reports.html.");
+
+ SET_STATUS_Failure(VKI_ENOSYS);
+}
+
+static SyscallTableEntry bad_sys =
+ { bad_before, NULL };
+
+static const SyscallTableEntry* get_syscall_entry ( UInt syscallno )
+{
+ const SyscallTableEntry* sys = NULL;
+
+# if defined(VGO_linux)
+ if (syscallno < ML_(syscall_table_size) &&
+ ML_(syscall_table)[syscallno].before != NULL)
+ sys = &ML_(syscall_table)[syscallno];
+
+# elif defined(VGP_ppc32_aix5)
+ sys = ML_(get_ppc32_aix5_syscall_entry) ( syscallno );
+
+# elif defined(VGP_ppc64_aix5)
+ sys = ML_(get_ppc64_aix5_syscall_entry) ( syscallno );
+
+# else
+# error Unknown OS
+# endif
+
+ return sys == NULL ? &bad_sys : sys;
+}
+
+
+/* Add and remove signals from mask so that we end up telling the
+ kernel the state we actually want rather than what the client
+ wants. */
+static void sanitize_client_sigmask(vki_sigset_t *mask)
+{
+ VG_(sigdelset)(mask, VKI_SIGKILL);
+ VG_(sigdelset)(mask, VKI_SIGSTOP);
+ VG_(sigdelset)(mask, VG_SIGVGKILL); /* never block */
+}
+
+typedef
+ struct {
+ SyscallArgs orig_args;
+ SyscallArgs args;
+ SyscallStatus status;
+ UWord flags;
+ }
+ SyscallInfo;
+
+SyscallInfo syscallInfo[VG_N_THREADS];
+
+
+/* The scheduler needs to be able to zero out these records after a
+ fork, hence this is exported from m_syswrap. */
+void VG_(clear_syscallInfo) ( Int tid )
+{
+ vg_assert(tid >= 0 && tid < VG_N_THREADS);
+ VG_(memset)( & syscallInfo[tid], 0, sizeof( syscallInfo[tid] ));
+ syscallInfo[tid].status.what = SsIdle;
+}
+
+static void ensure_initialised ( void )
+{
+ Int i;
+ static Bool init_done = False;
+ if (init_done)
+ return;
+ init_done = True;
+ for (i = 0; i < VG_N_THREADS; i++) {
+ VG_(clear_syscallInfo)( i );
+ }
+}
+
+/* --- This is the main function of this file. --- */
+
+void VG_(client_syscall) ( ThreadId tid )
+{
+ UWord sysno;
+ ThreadState* tst;
+ const SyscallTableEntry* ent;
+ SyscallArgLayout layout;
+ SyscallInfo* sci;
+
+ ensure_initialised();
+
+ vg_assert(VG_(is_valid_tid)(tid));
+ vg_assert(tid >= 1 && tid < VG_N_THREADS);
+ vg_assert(VG_(is_running_thread)(tid));
+
+ tst = VG_(get_ThreadState)(tid);
+
+ /* BEGIN ensure root thread's stack is suitably mapped */
+ /* In some rare circumstances, we may do the syscall without the
+ bottom page of the stack being mapped, because the stack pointer
+ was moved down just a few instructions before the syscall
+ instruction, and there have been no memory references since
+ then, that would cause a call to VG_(extend_stack) to have
+ happened.
+
+ In native execution that's OK: the kernel automagically extends
+ the stack's mapped area down to cover the stack pointer (or sp -
+ redzone, really). In simulated normal execution that's OK too,
+ since any signals we get from accessing below the mapped area of
+ the (guest's) stack lead us to VG_(extend_stack), where we
+ simulate the kernel's stack extension logic. But that leaves
+ the problem of entering a syscall with the SP unmapped. Because
+ the kernel doesn't know that the segment immediately above SP is
+ supposed to be a grow-down segment, it causes the syscall to
+ fail, and thereby causes a divergence between native behaviour
+ (syscall succeeds) and simulated behaviour (syscall fails).
+
+ This is quite a rare failure mode. It has only been seen
+ affecting calls to sys_readlink on amd64-linux, and even then it
+ requires a certain code sequence around the syscall to trigger
+ it. Here is one:
+
+ extern int my_readlink ( const char* path );
+ asm(
+ ".text\n"
+ ".globl my_readlink\n"
+ "my_readlink:\n"
+ "\tsubq $0x1008,%rsp\n"
+ "\tmovq %rdi,%rdi\n" // path is in rdi
+ "\tmovq %rsp,%rsi\n" // &buf[0] -> rsi
+ "\tmovl $0x1000,%edx\n" // sizeof(buf) in rdx
+ "\tmovl $"__NR_READLINK",%eax\n" // syscall number
+ "\tsyscall\n"
+ "\taddq $0x1008,%rsp\n"
+ "\tret\n"
+ ".previous\n"
+ );
+
+ For more details, see bug #156404
+ (https://bugs.kde.org/show_bug.cgi?id=156404).
+
+ The fix is actually very simple. We simply need to call
+ VG_(extend_stack) for this thread, handing it the lowest
+ possible valid address for stack (sp - redzone), to ensure the
+ pages all the way down to that address, are mapped. Because
+ this is a potentially expensive and frequent operation, we
+ filter in two ways:
+
+ First, only the main thread (tid=1) has a growdown stack. So
+ ignore all others. It is conceivable, although highly unlikely,
+ that the main thread exits, and later another thread is
+ allocated tid=1, but that's harmless, I believe;
+ VG_(extend_stack) will do nothing when applied to a non-root
+ thread.
+
+ Secondly, first call VG_(am_find_nsegment) directly, to see if
+ the page holding (sp - redzone) is mapped correctly. If so, do
+ nothing. This is almost always the case. VG_(extend_stack)
+ calls VG_(am_find_nsegment) twice, so this optimisation -- and
+ that's all it is -- more or less halves the number of calls to
+ VG_(am_find_nsegment) required.
+
+ TODO: the test "seg->kind == SkAnonC" is really inadequate,
+ because although it tests whether the segment is mapped
+ _somehow_, it doesn't check that it has the right permissions
+ (r,w, maybe x) ? We could test that here, but it will also be
+ necessary to fix the corresponding test in VG_(extend_stack).
+
+ All this guff is of course Linux-specific. Hence the ifdef.
+ */
+# if defined(VGO_linux)
+ if (tid == 1/*ROOT THREAD*/) {
+ Addr stackMin = VG_(get_SP)(tid) - VG_STACK_REDZONE_SZB;
+ NSegment const* seg = VG_(am_find_nsegment)(stackMin);
+ if (seg && seg->kind == SkAnonC) {
+ /* stackMin is already mapped. Nothing to do. */
+ } else {
+ (void)VG_(extend_stack)( stackMin,
+ tst->client_stack_szB );
+ }
+ }
+# endif
+ /* END ensure root thread's stack is suitably mapped */
+
+ /* First off, get the syscall args and number. This is a
+ platform-dependent action. */
+
+ sci = & syscallInfo[tid];
+ vg_assert(sci->status.what == SsIdle);
+
+ getSyscallArgsFromGuestState( &sci->orig_args, &tst->arch.vex );
+
+ /* Copy .orig_args to .args. The pre-handler may modify .args, but
+ we want to keep the originals too, just in case. */
+ sci->args = sci->orig_args;
+
+ /* Save the syscall number in the thread state in case the syscall
+ is interrupted by a signal. */
+ sysno = sci->orig_args.sysno;
+
+ /* The default what-to-do-next thing is hand the syscall to the
+ kernel, so we pre-set that here. Set .sres to something
+ harmless looking (is irrelevant because .what is not
+ SsComplete.) */
+ sci->status.what = SsHandToKernel;
+ sci->status.sres = VG_(mk_SysRes_Error)(0);
+ sci->flags = 0;
+
+ /* Fetch the syscall's handlers. If no handlers exist for this
+ syscall, we are given dummy handlers which force an immediate
+ return with ENOSYS. */
+ ent = get_syscall_entry(sysno);
+
+ /* Fetch the layout information, which tells us where in the guest
+ state the syscall args reside. This is a platform-dependent
+ action. This info is needed so that the scalar syscall argument
+ checks (PRE_REG_READ calls) know which bits of the guest state
+ they need to inspect. */
+ getSyscallArgLayout( &layout );
+
+ /* Make sure the tmp signal mask matches the real signal mask;
+ sigsuspend may change this. */
+ vg_assert(VG_(iseqsigset)(&tst->sig_mask, &tst->tmp_sig_mask));
+
+ /* Right, we're finally ready to Party. Call the pre-handler and
+ see what we get back. At this point:
+
+ sci->status.what is Unset (we don't know yet).
+ sci->orig_args contains the original args.
+ sci->args is the same as sci->orig_args.
+ sci->flags is zero.
+ */
+
+ PRINT("SYSCALL[%d,%d](%3lld) ", VG_(getpid)(), tid, (ULong)sysno);
+
+ /* Do any pre-syscall actions */
+ if (VG_(needs).syscall_wrapper) {
+ VG_TDICT_CALL(tool_pre_syscall, tid, sysno);
+ }
+
+ vg_assert(ent);
+ vg_assert(ent->before);
+ (ent->before)( tid,
+ &layout,
+ &sci->args, &sci->status, &sci->flags );
+
+ /* The pre-handler may have modified:
+ sci->args
+ sci->status
+ sci->flags
+ All else remains unchanged.
+ Although the args may be modified, pre handlers are not allowed
+ to change the syscall number.
+ */
+ /* Now we proceed according to what the pre-handler decided. */
+ vg_assert(sci->status.what == SsHandToKernel
+ || sci->status.what == SsComplete);
+ vg_assert(sci->args.sysno == sci->orig_args.sysno);
+
+ if (sci->status.what == SsComplete && !sci->status.sres.isError) {
+ /* The pre-handler completed the syscall itself, declaring
+ success. */
+ if (sci->flags & SfNoWriteResult) {
+ PRINT(" --> [pre-success] NoWriteResult\n");
+ } else {
+ PRINT(" --> [pre-success] Success(0x%llx)\n",
+ (ULong)sci->status.sres.res );
+ }
+ /* In this case the allowable flags are to ask for a signal-poll
+ and/or a yield after the call. Changing the args isn't
+ allowed. */
+ vg_assert(0 == (sci->flags
+ & ~(SfPollAfter | SfYieldAfter | SfNoWriteResult)));
+ vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
+ }
+
+ else
+ if (sci->status.what == SsComplete && sci->status.sres.isError) {
+ /* The pre-handler decided to fail syscall itself. */
+ PRINT(" --> [pre-fail] Failure(0x%llx)\n", (ULong)sci->status.sres.err );
+ /* In this case, the pre-handler is also allowed to ask for the
+ post-handler to be run anyway. Changing the args is not
+ allowed. */
+ vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter)));
+ vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
+ }
+
+ else
+ if (sci->status.what != SsHandToKernel) {
+ /* huh?! */
+ vg_assert(0);
+ }
+
+ else /* (sci->status.what == HandToKernel) */ {
+ /* Ok, this is the usual case -- and the complicated one. There
+ are two subcases: sync and async. async is the general case
+ and is to be used when there is any possibility that the
+ syscall might block [a fact that the pre-handler must tell us
+ via the sci->flags field.] Because the tidying-away /
+ context-switch overhead of the async case could be large, if
+ we are sure that the syscall will not block, we fast-track it
+ by doing it directly in this thread, which is a lot
+ simpler. */
+
+ /* Check that the given flags are allowable: MayBlock, PollAfter
+ and PostOnFail are ok. */
+ vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter)));
+
+ if (sci->flags & SfMayBlock) {
+
+ /* Syscall may block, so run it asynchronously */
+ vki_sigset_t mask;
+
+ PRINT(" --> [async] ... \n");
+
+ mask = tst->sig_mask;
+ sanitize_client_sigmask(&mask);
+
+ /* Gack. More impedance matching. Copy the possibly
+ modified syscall args back into the guest state. */
+ vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
+ putSyscallArgsIntoGuestState( &sci->args, &tst->arch.vex );
+
+ /* Drop the lock */
+ VG_(release_BigLock)(tid, VgTs_WaitSys, "VG_(client_syscall)[async]");
+
+ /* Do the call, which operates directly on the guest state,
+ not on our abstracted copies of the args/result. */
+ do_syscall_for_client(sysno, tst, &mask);
+
+ /* do_syscall_for_client may not return if the syscall was
+ interrupted by a signal. In that case, flow of control is
+ first to m_signals.async_sighandler, which calls
+ VG_(fixup_guest_state_after_syscall_interrupted), which
+ fixes up the guest state, and possibly calls
+ VG_(post_syscall). Once that's done, control drops back
+ to the scheduler. */
+
+ /* Reacquire the lock */
+ VG_(acquire_BigLock)(tid, "VG_(client_syscall)[async]");
+
+ /* Even more impedance matching. Extract the syscall status
+ from the guest state. */
+ getSyscallStatusFromGuestState( &sci->status, &tst->arch.vex );
+ vg_assert(sci->status.what == SsComplete);
+
+ PRINT("SYSCALL[%d,%d](%3ld) ... [async] --> %s(0x%llx)\n",
+ VG_(getpid)(), tid, sysno,
+ sci->status.sres.isError ? "Failure" : "Success",
+ sci->status.sres.isError ? (ULong)sci->status.sres.err
+ : (ULong)sci->status.sres.res );
+
+ } else {
+
+ /* run the syscall directly */
+ /* The pre-handler may have modified the syscall args, but
+ since we're passing values in ->args directly to the
+ kernel, there's no point in flushing them back to the
+ guest state. Indeed doing so could be construed as
+ incorrect. */
+ SysRes sres
+ = VG_(do_syscall)(sysno, sci->args.arg1, sci->args.arg2,
+ sci->args.arg3, sci->args.arg4,
+ sci->args.arg5, sci->args.arg6,
+ sci->args.arg7, sci->args.arg8 );
+ sci->status = convert_SysRes_to_SyscallStatus(sres);
+
+ PRINT("[sync] --> %s(0x%llx)\n",
+ sci->status.sres.isError ? "Failure" : "Success",
+ sci->status.sres.isError ? (ULong)sci->status.sres.err
+ : (ULong)sci->status.sres.res );
+ }
+ }
+
+ vg_assert(sci->status.what == SsComplete);
+
+ vg_assert(VG_(is_running_thread)(tid));
+
+ /* Dump the syscall result back in the guest state. This is
+ a platform-specific action. */
+ if (!(sci->flags & SfNoWriteResult))
+ putSyscallStatusIntoGuestState( &sci->status, &tst->arch.vex );
+
+ /* Situation now:
+ - the guest state is now correctly modified following the syscall
+ - modified args, original args and syscall status are still
+ available in the syscallInfo[] entry for this syscall.
+
+ Now go on to do the post-syscall actions (read on down ..)
+ */
+ VG_(post_syscall)(tid);
+}
+
+
+/* Perform post syscall actions. The expected state on entry is
+ precisely as at the end of VG_(client_syscall), that is:
+
+ - guest state up to date following the syscall
+ - modified args, original args and syscall status are still
+ available in the syscallInfo[] entry for this syscall.
+ - syscall status matches what's in the guest state.
+
+ There are two ways to get here: the normal way -- being called by
+ VG_(client_syscall), and the unusual way, from
+ VG_(fixup_guest_state_after_syscall_interrupted).
+*/
+void VG_(post_syscall) (ThreadId tid)
+{
+ SyscallArgLayout layout;
+ SyscallInfo* sci;
+ const SyscallTableEntry* ent;
+ SyscallStatus test_status;
+ ThreadState* tst;
+ UWord sysno;
+
+ /* Preliminaries */
+ vg_assert(VG_(is_valid_tid)(tid));
+ vg_assert(tid >= 1 && tid < VG_N_THREADS);
+ vg_assert(VG_(is_running_thread)(tid));
+
+ tst = VG_(get_ThreadState)(tid);
+ sci = & syscallInfo[tid];
+
+ /* m_signals.sigvgkill_handler might call here even when not in
+ a syscall. */
+ if (sci->status.what == SsIdle || sci->status.what == SsHandToKernel) {
+ sci->status.what = SsIdle;
+ return;
+ }
+
+ /* Validate current syscallInfo entry. In particular we require
+ that the current .status matches what's actually in the guest
+ state. At least in the normal case where we have actually
+ previously written the result into the guest state. */
+ vg_assert(sci->status.what == SsComplete);
+
+ getSyscallStatusFromGuestState( &test_status, &tst->arch.vex );
+ if (!(sci->flags & SfNoWriteResult))
+ vg_assert(eq_SyscallStatus( &sci->status, &test_status ));
+ /* Ok, looks sane */
+
+ /* Get the system call number. Because the pre-handler isn't
+ allowed to mess with it, it should be the same for both the
+ original and potentially-modified args. */
+ vg_assert(sci->args.sysno == sci->orig_args.sysno);
+ sysno = sci->args.sysno;
+ ent = get_syscall_entry(sysno);
+
+ /* We need the arg layout .. sigh */
+ getSyscallArgLayout( &layout );
+
+ /* Tell the tool that the assignment has occurred, so it can update
+ shadow regs as necessary. */
+ VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, layout.o_retval,
+ sizeof(UWord) );
+
+ /* pre: status == Complete (asserted above) */
+ /* Consider either success or failure. Now run the post handler if:
+ - it exists, and
+ - Success or (Failure and PostOnFail is set)
+ */
+ if (ent->after
+ && ((!sci->status.sres.isError)
+ || (sci->status.sres.isError
+ && (sci->flags & SfPostOnFail) ))) {
+
+ (ent->after)( tid, &sci->args, &sci->status );
+ }
+
+ /* Because the post handler might have changed the status (eg, the
+ post-handler for sys_open can change the result from success to
+ failure if the kernel supplied a fd that it doesn't like), once
+ again dump the syscall result back in the guest state.*/
+ if (!(sci->flags & SfNoWriteResult))
+ putSyscallStatusIntoGuestState( &sci->status, &tst->arch.vex );
+
+ /* Do any post-syscall actions required by the tool. */
+ if (VG_(needs).syscall_wrapper)
+ VG_TDICT_CALL(tool_post_syscall, tid, sysno, sci->status.sres);
+
+ /* The syscall is done. */
+ vg_assert(sci->status.what == SsComplete);
+ sci->status.what = SsIdle;
+
+ /* The pre/post wrappers may have concluded that pending signals
+ might have been created, and will have set SfPollAfter to
+ request a poll for them once the syscall is done. */
+ if (sci->flags & SfPollAfter)
+ VG_(poll_signals)(tid);
+
+ /* Similarly, the wrappers might have asked for a yield
+ afterwards. */
+ if (sci->flags & SfYieldAfter)
+ VG_(vg_yield)();
+}
+
+
+/* ---------------------------------------------------------------------
+ Dealing with syscalls which get interrupted by a signal:
+ VG_(fixup_guest_state_after_syscall_interrupted)
+ ------------------------------------------------------------------ */
+
+/* Syscalls done on behalf of the client are finally handed off to the
+ kernel in VG_(client_syscall) above, either by calling
+ do_syscall_for_client (the async case), or by calling
+ VG_(do_syscall6) (the sync case).
+
+ If the syscall is not interrupted by a signal (it may block and
+ later unblock, but that's irrelevant here) then those functions
+ eventually return and so control is passed to VG_(post_syscall).
+ NB: not sure if the sync case can actually get interrupted, as it
+ operates with all signals masked.
+
+ However, the syscall may get interrupted by an async-signal. In
+ that case do_syscall_for_client/VG_(do_syscall6) do not
+ return. Instead we wind up in m_signals.async_sighandler. We need
+ to fix up the guest state to make it look like the syscall was
+ interrupted for guest. So async_sighandler calls here, and this
+ does the fixup. Note that from here we wind up calling
+ VG_(post_syscall) too.
+*/
+
+
+/* These are addresses within ML_(do_syscall_for_client_WRK). See
+ syscall-$PLAT.S for details.
+*/
+extern const Addr ML_(blksys_setup);
+extern const Addr ML_(blksys_restart);
+extern const Addr ML_(blksys_complete);
+extern const Addr ML_(blksys_committed);
+extern const Addr ML_(blksys_finished);
+
+
+/* Back up guest state to restart a system call. */
+
+void ML_(fixup_guest_state_to_restart_syscall) ( ThreadArchState* arch )
+{
+#if defined(VGP_x86_linux)
+ arch->vex.guest_EIP -= 2; // sizeof(int $0x80)
+
+ /* Make sure our caller is actually sane, and we're really backing
+ back over a syscall.
+
+ int $0x80 == CD 80
+ */
+ {
+ UChar *p = (UChar *)arch->vex.guest_EIP;
+
+ if (p[0] != 0xcd || p[1] != 0x80)
+ VG_(message)(Vg_DebugMsg,
+ "?! restarting over syscall at %#x %02x %02x\n",
+ arch->vex.guest_EIP, p[0], p[1]);
+
+ vg_assert(p[0] == 0xcd && p[1] == 0x80);
+ }
+
+#elif defined(VGP_amd64_linux)
+ arch->vex.guest_RIP -= 2; // sizeof(syscall)
+
+ /* Make sure our caller is actually sane, and we're really backing
+ back over a syscall.
+
+ syscall == 0F 05
+ */
+ {
+ UChar *p = (UChar *)arch->vex.guest_RIP;
+
+ if (p[0] != 0x0F || p[1] != 0x05)
+ VG_(message)(Vg_DebugMsg,
+ "?! restarting over syscall at %#llx %02x %02x\n",
+ arch->vex.guest_RIP, p[0], p[1]);
+
+ vg_assert(p[0] == 0x0F && p[1] == 0x05);
+ }
+
+#elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
+ arch->vex.guest_CIA -= 4; // sizeof(ppc32 instr)
+
+ /* Make sure our caller is actually sane, and we're really backing
+ back over a syscall.
+
+ sc == 44 00 00 02
+ */
+ {
+ UChar *p = (UChar *)arch->vex.guest_CIA;
+
+ if (p[0] != 0x44 || p[1] != 0x0 || p[2] != 0x0 || p[3] != 0x02)
+ VG_(message)(Vg_DebugMsg,
+ "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
+ arch->vex.guest_CIA + 0ULL, p[0], p[1], p[2], p[3]);
+
+ vg_assert(p[0] == 0x44 && p[1] == 0x0 && p[2] == 0x0 && p[3] == 0x2);
+ }
+
+#elif defined(VGP_ppc32_aix5) || defined(VGP_ppc64_aix5)
+ /* Hmm. This is problematic, because on AIX the kernel resumes
+ after a syscall at LR, not at the insn following SC. Hence
+ there is no obvious way to figure out where the SC is. Current
+ solution is to have a pseudo-register in the guest state,
+ CIA_AT_SC, which holds the address of the most recent SC
+ executed. Backing up to that syscall then simply involves
+ copying that value back into CIA (the program counter). */
+ arch->vex.guest_CIA = arch->vex.guest_CIA_AT_SC;
+
+ /* Make sure our caller is actually sane, and we're really backing
+ back over a syscall.
+
+ sc == 44 00 00 02
+ */
+ {
+ UChar *p = (UChar *)arch->vex.guest_CIA;
+
+ if (p[0] != 0x44 || p[1] != 0x0 || p[2] != 0x0 || p[3] != 0x02)
+ VG_(message)(Vg_DebugMsg,
+ "?! restarting over syscall at %#lx %02x %02x %02x %02x\n",
+ (UWord)arch->vex.guest_CIA, p[0], p[1], p[2], p[3]);
+
+ vg_assert(p[0] == 0x44 && p[1] == 0x0 && p[2] == 0x0 && p[3] == 0x2);
+ }
+
+#else
+# error "ML_(fixup_guest_state_to_restart_syscall): unknown plat"
+#endif
+}
+
+/*
+ Fix up the guest state when a syscall is interrupted by a signal
+ and so has been forced to return 'sysret'.
+
+ To do this, we determine the precise state of the syscall by
+ looking at the (real) IP at the time the signal happened. The
+ syscall sequence looks like:
+
+ 1. unblock signals
+ 2. perform syscall
+ 3. save result to guest state (EAX, RAX, R3+CR0.SO)
+ 4. re-block signals
+
+ If a signal
+ happens at Then Why?
+ [1-2) restart nothing has happened (restart syscall)
+ [2] restart syscall hasn't started, or kernel wants to restart
+ [2-3) save syscall complete, but results not saved
+ [3-4) syscall complete, results saved
+
+ Sometimes we never want to restart an interrupted syscall (because
+ sigaction says not to), so we only restart if "restart" is True.
+
+ This will also call VG_(post_syscall) if the syscall has actually
+ completed (either because it was interrupted, or because it
+ actually finished). It will not call VG_(post_syscall) if the
+ syscall is set up for restart, which means that the pre-wrapper may
+ get called multiple times.
+*/
+
+void
+VG_(fixup_guest_state_after_syscall_interrupted)( ThreadId tid,
+ Addr ip,
+ UWord sysnum,
+ SysRes sres,
+ Bool restart)
+{
+ /* Note that the sysnum arg seems to contain not-dependable-on info
+ (I think it depends on the state the real syscall was in at
+ interrupt) and so is ignored, apart from in the following
+ printf. */
+
+ static const Bool debug = False;
+
+ ThreadState* tst;
+ SyscallStatus canonical;
+ ThreadArchState* th_regs;
+ SyscallInfo* sci;
+
+ if (debug)
+ VG_(printf)( "interrupted_syscall %d: tid=%d, IP=0x%llx, "
+ "restart=%s, sysret.isError=%s, sysret.val=%lld\n",
+ (Int)sysnum,
+ (Int)tid,
+ (ULong)ip,
+ restart ? "True" : "False",
+ sres.isError ? "True" : "False",
+ (Long)(Word)(sres.isError ? sres.err : sres.res) );
+
+ vg_assert(VG_(is_valid_tid)(tid));
+ vg_assert(tid >= 1 && tid < VG_N_THREADS);
+ vg_assert(VG_(is_running_thread)(tid));
+
+ tst = VG_(get_ThreadState)(tid);
+ th_regs = &tst->arch;
+ sci = & syscallInfo[tid];
+
+ /* Figure out what the state of the syscall was by examining the
+ (real) IP at the time of the signal, and act accordingly. */
+
+ if (ip < ML_(blksys_setup) || ip >= ML_(blksys_finished)) {
+ VG_(printf)(" not in syscall (%#lx - %#lx)\n",
+ ML_(blksys_setup), ML_(blksys_finished));
+ /* Looks like we weren't in a syscall at all. Hmm. */
+ vg_assert(sci->status.what != SsIdle);
+ return;
+ }
+
+ /* We should not be here unless this thread had first started up
+ the machinery for a syscall by calling VG_(client_syscall).
+ Hence: */
+ vg_assert(sci->status.what != SsIdle);
+
+ if (ip >= ML_(blksys_setup) && ip < ML_(blksys_restart)) {
+ /* syscall hasn't even started; go around again */
+ if (debug)
+ VG_(printf)(" not started: restart\n");
+ vg_assert(sci->status.what == SsHandToKernel);
+ ML_(fixup_guest_state_to_restart_syscall)(th_regs);
+ }
+
+ else
+ if (ip == ML_(blksys_restart)) {
+ /* We're either about to run the syscall, or it was interrupted
+ and the kernel restarted it. Restart if asked, otherwise
+ EINTR it. */
+ if (restart)
+ ML_(fixup_guest_state_to_restart_syscall)(th_regs);
+ else {
+ canonical = convert_SysRes_to_SyscallStatus(
+ VG_(mk_SysRes_Error)( VKI_EINTR )
+ );
+ if (!(sci->flags & SfNoWriteResult))
+ putSyscallStatusIntoGuestState( &canonical, &th_regs->vex );
+ sci->status = canonical;
+ VG_(post_syscall)(tid);
+ }
+ }
+
+ else
+ if (ip >= ML_(blksys_complete) && ip < ML_(blksys_committed)) {
+ /* Syscall complete, but result hasn't been written back yet.
+ Write the SysRes we were supplied with back to the guest
+ state. */
+ if (debug)
+ VG_(printf)(" completed\n");
+ canonical = convert_SysRes_to_SyscallStatus( sres );
+ if (!(sci->flags & SfNoWriteResult))
+ putSyscallStatusIntoGuestState( &canonical, &th_regs->vex );
+ sci->status = canonical;
+ VG_(post_syscall)(tid);
+ }
+
+ else
+ if (ip >= ML_(blksys_committed) && ip < ML_(blksys_finished)) {
+ /* Result committed, but the signal mask has not been restored;
+ we expect our caller (the signal handler) will have fixed
+ this up. */
+ if (debug)
+ VG_(printf)(" all done\n");
+ VG_(post_syscall)(tid);
+ }
+
+ else
+ VG_(core_panic)("?? strange syscall interrupt state?");
+
+ /* In all cases, the syscall is now finished (even if we called
+ ML_(fixup_guest_state_to_restart_syscall), since that just
+ re-positions the guest's IP for another go at it). So we need
+ to record that fact. */
+ sci->status.what = SsIdle;
+}
+
+
+/* ---------------------------------------------------------------------
+ A place to store the where-to-call-when-really-done pointer
+ ------------------------------------------------------------------ */
+
+// When the final thread is done, where shall I call to shutdown the
+// system cleanly? Is set once at startup (in m_main) and never
+// changes after that. Is basically a pointer to the exit
+// continuation. This is all just a nasty hack to avoid calling
+// directly from m_syswrap to m_main at exit, since that would cause
+// m_main to become part of a module cycle, which is silly.
+void (* VG_(address_of_m_main_shutdown_actions_NORETURN) )
+ (ThreadId,VgSchedReturnCode)
+ = NULL;
+
+/*--------------------------------------------------------------------*/
+/*--- end ---*/
+/*--------------------------------------------------------------------*/