1 files changed, 1485 insertions, 0 deletions
diff --git a/coregrind/m_syswrap/.svn/text-base/syswrap-main.c.svn-base b/coregrind/m_syswrap/.svn/text-base/syswrap-main.c.svn-base
new file mode 100644
index 0000000..63a03f6
--- /dev/null
+++ b/coregrind/m_syswrap/.svn/text-base/syswrap-main.c.svn-base
@@ -0,0 +1,1485 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Handle system calls.                          syswrap-main.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2000-2009 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "libvex_guest_offsets.h"
+#include "pub_core_basics.h"
+#include "pub_core_aspacemgr.h"
+#include "pub_core_vki.h"
+#include "pub_core_vkiscnums.h"
+#include "pub_core_threadstate.h"
+#include "pub_core_libcbase.h"
+#include "pub_core_libcassert.h"
+#include "pub_core_libcprint.h"
+#include "pub_core_libcproc.h"      // For VG_(getpid)()
+#include "pub_core_libcsignal.h"
+#include "pub_core_scheduler.h"     // For VG_({acquire,release}_BigLock),
+                                    //   and VG_(vg_yield)
+#include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
+#include "pub_core_tooliface.h"
+#include "pub_core_options.h"
+#include "pub_core_signals.h"       // For VG_SIGVGKILL, VG_(poll_signals)
+#include "pub_core_syscall.h"
+#include "pub_core_machine.h"
+#include "pub_core_syswrap.h"
+
+#include "priv_types_n_macros.h"
+#include "priv_syswrap-main.h"
+
+
+/* Useful info which needs to be recorded somewhere:
+   Use of registers in syscalls is:
+
+          NUM ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8 RESULT
+   LINUX:
+   x86    eax ebx  ecx  edx  esi  edi  ebp  n/a  n/a  eax       (== NUM)
+   amd64  rax rdi  rsi  rdx  r10  r8   r9   n/a  n/a  rax       (== NUM)
+   ppc32  r0  r3   r4   r5   r6   r7   r8   n/a  n/a  r3+CR0.SO (== ARG1)
+   ppc64  r0  r3   r4   r5   r6   r7   r8   n/a  n/a  r3+CR0.SO (== ARG1)
+   AIX:
+   ppc32  r2  r3   r4   r5   r6   r7   r8   r9   r10  r3(res),r4(err)
+   ppc64  r2  r3   r4   r5   r6   r7   r8   r9   r10  r3(res),r4(err)
+*/
+
+/* This is the top level of the system-call handler module.  All
+   system calls are channelled through here, doing two things:
+
+   * notify the tool of the events (mem/reg reads, writes) happening
+
+   * perform the syscall, usually by passing it along to the kernel
+     unmodified.
+
+   A magical piece of assembly code, do_syscall_for_client_WRK, in
+   syscall-$PLATFORM.S does the tricky bit of passing a syscall to the
+   kernel, whilst having the simulator retain control.
+*/
+
+/* The main function is VG_(client_syscall).  The simulation calls it
+   whenever a client thread wants to do a syscall.  The following is a
+   sketch of what it does.
+
+   * Ensures the root thread's stack is suitably mapped.  Tedious and
+     arcane.  See big big comment in VG_(client_syscall).
+
+   * First, it rounds up the syscall number and args (which is a
+     platform dependent activity) and puts them in a struct ("args")
+     and also a copy in "orig_args".
+
+     The pre/post wrappers refer to these structs and so no longer
+     need magic macros to access any specific registers.  This struct
+     is stored in thread-specific storage.
+
+
+   * The pre-wrapper is called, passing it a pointer to struct
+     "args".
+
+
+   * The pre-wrapper examines the args and pokes the tool
+     appropriately.  It may modify the args; this is why "orig_args"
+     is also stored.
+
+     The pre-wrapper may choose to 'do' the syscall itself, and
+     concludes one of three outcomes:
+
+       Success(N)    -- syscall is already complete, with success;
+                        result is N
+
+       Fail(N)       -- syscall is already complete, with failure;
+                        error code is N
+
+       HandToKernel  -- (the usual case): this needs to be given to
+                        the kernel to be done, using the values in
+                        the possibly-modified "args" struct.
+
+     In addition, the pre-wrapper may set some flags:
+
+       MayBlock   -- only applicable when outcome==HandToKernel
+
+       PostOnFail -- only applicable when outcome==HandToKernel or Fail
+
+
+   * If the pre-outcome is HandToKernel, the syscall is duly handed
+     off to the kernel (perhaps involving some thread switchery, but
+     that's not important).  This reduces the possible set of outcomes
+     to either Success(N) or Fail(N).
+
+
+   * The outcome (Success(N) or Fail(N)) is written back to the guest
+     register(s).  This is platform specific:
+
+     x86:    Success(N) ==>  eax = N
+             Fail(N)    ==>  eax = -N
+
+     ditto amd64
+
+     ppc32:  Success(N) ==>  r3 = N, CR0.SO = 0
+             Fail(N) ==>     r3 = N, CR0.SO = 1
+
+   * The post wrapper is called if:
+
+     - it exists, and
+     - outcome==Success or (outcome==Fail and PostOnFail is set)
+
+     The post wrapper is passed the adulterated syscall args (struct
+     "args"), and the syscall outcome (viz, Success(N) or Fail(N)).
+
+   There are several other complications, primarily to do with
+   syscalls getting interrupted, explained in comments in the code.
+*/
+
+/* CAVEATS for writing wrappers.  It is important to follow these!
+
+   The macros defined in priv_types_n_macros.h are designed to help
+   decouple the wrapper logic from the actual representation of
+   syscall args/results, since these wrappers are designed to work on
+   multiple platforms.
+
+   Sometimes a PRE wrapper will complete the syscall itself, without
+   handing it to the kernel.  It will use one of SET_STATUS_Success,
+   SET_STATUS_Failure or SET_STATUS_from_SysRes to set the return
+   value.  It is critical to appreciate that use of the macro does not
+   immediately cause the underlying guest state to be updated -- that
+   is done by the driver logic in this file, when the wrapper returns.
+
+   As a result, PRE wrappers of the following form will malfunction:
+
+   PRE(fooble) 
+   {
+      ... do stuff ...
+      SET_STATUS_Somehow(...)
+
+      // do something that assumes guest state is up to date
+   }
+
+   In particular, direct or indirect calls to VG_(poll_signals) after
+   setting STATUS can cause the guest state to be read (in order to
+   build signal frames).  Do not do this.  If you want a signal poll
+   after the syscall goes through, do "*flags |= SfPollAfter" and the
+   driver logic will do it for you.
+
+   -----------
+
+   Another critical requirement following introduction of new address
+   space manager (JRS, 20050923):
+
+   In a situation where the mappedness of memory has changed, aspacem
+   should be notified BEFORE the tool.  Hence the following is
+   correct:
+
+      Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start);
+      VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start );
+      if (d)
+         VG_(discard_translations)(s->start, s->end+1 - s->start);
+
+   whilst this is wrong:
+
+      VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start );
+      Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start);
+      if (d)
+         VG_(discard_translations)(s->start, s->end+1 - s->start);
+
+   The reason is that the tool may itself ask aspacem for more shadow
+   memory as a result of the VG_TRACK call.  In such a situation it is
+   critical that aspacem's segment array is up to date -- hence the
+   need to notify aspacem first.
+
+   -----------
+
+   Also .. take care to call VG_(discard_translations) whenever
+   memory with execute permissions is unmapped.
+*/
+
+
+/* ---------------------------------------------------------------------
+   Do potentially blocking syscall for the client, and mess with 
+   signal masks at the same time. 
+   ------------------------------------------------------------------ */
+
+/* Perform a syscall on behalf of a client thread, using a specific
+   signal mask.  On completion, the signal mask is set to restore_mask
+   (which presumably blocks almost everything).  If a signal happens
+   during the syscall, the handler should call
+   VG_(fixup_guest_state_after_syscall_interrupted) to adjust the
+   thread's context to do the right thing.
+
+   The _WRK function is handwritten assembly, implemented per-platform
+   in coregrind/m_syswrap/syscall-$PLAT.S.  It has some very magic
+   properties.  See comments at the top of
+   VG_(fixup_guest_state_after_syscall_interrupted) below for details.
+*/
+extern
+UWord ML_(do_syscall_for_client_WRK)( Word syscallno, 
+                                      void* guest_state,
+                                      const vki_sigset_t *syscall_mask,
+                                      const vki_sigset_t *restore_mask,
+                                      Word nsigwords
+#                                     if defined(VGO_aix5)
+                                      , Word __nr_sigprocmask
+#                                     endif
+                                    );
+
+static
+void do_syscall_for_client ( Int syscallno,
+                             ThreadState* tst,
+                             const vki_sigset_t* syscall_mask )
+{
+   vki_sigset_t saved;
+   UWord err 
+      = ML_(do_syscall_for_client_WRK)(
+           syscallno, &tst->arch.vex, 
+           syscall_mask, &saved, _VKI_NSIG_WORDS * sizeof(UWord)
+#          if defined(VGO_aix5)
+           , __NR_rt_sigprocmask
+#          endif
+        );
+   vg_assert2(
+      err == 0,
+      "ML_(do_syscall_for_client_WRK): sigprocmask error %d",
+      (Int)(err & 0xFFF)
+   );
+}
+
+
+
+/* ---------------------------------------------------------------------
+   Impedance matchers and misc helpers
+   ------------------------------------------------------------------ */
+
+static
+Bool eq_SyscallArgs ( SyscallArgs* a1, SyscallArgs* a2 )
+{
+   return a1->sysno == a2->sysno
+          && a1->arg1 == a2->arg1
+          && a1->arg2 == a2->arg2
+          && a1->arg3 == a2->arg3
+          && a1->arg4 == a2->arg4
+          && a1->arg5 == a2->arg5
+          && a1->arg6 == a2->arg6
+          && a1->arg7 == a2->arg7
+          && a1->arg8 == a2->arg8;
+}
+
+static
+Bool eq_SyscallStatus ( SyscallStatus* s1, SyscallStatus* s2 )
+{
+   return s1->what == s2->what 
+          && s1->sres.res == s2->sres.res
+          && s1->sres.err == s2->sres.err;
+}
+
+
+/* Convert between SysRes and SyscallStatus, to the extent possible. */
+
+static
+SyscallStatus convert_SysRes_to_SyscallStatus ( SysRes res )
+{
+   SyscallStatus status;
+   status.what = SsComplete;
+   status.sres = res;
+   return status;
+}
+
+
+/* Impedance matchers.  These convert syscall arg or result data from
+   the platform-specific in-guest-state format to the canonical
+   formats, and back. */
+
+static 
+void getSyscallArgsFromGuestState ( /*OUT*/SyscallArgs*       canonical,
+                                    /*IN*/ VexGuestArchState* gst_vanilla )
+{
+#if defined(VGP_x86_linux)
+   VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
+   canonical->sysno = gst->guest_EAX;
+   canonical->arg1  = gst->guest_EBX;
+   canonical->arg2  = gst->guest_ECX;
+   canonical->arg3  = gst->guest_EDX;
+   canonical->arg4  = gst->guest_ESI;
+   canonical->arg5  = gst->guest_EDI;
+   canonical->arg6  = gst->guest_EBP;
+   canonical->arg7  = 0;
+   canonical->arg8  = 0;
+
+#elif defined(VGP_amd64_linux)
+   VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
+   canonical->sysno = gst->guest_RAX;
+   canonical->arg1  = gst->guest_RDI;
+   canonical->arg2  = gst->guest_RSI;
+   canonical->arg3  = gst->guest_RDX;
+   canonical->arg4  = gst->guest_R10;
+   canonical->arg5  = gst->guest_R8;
+   canonical->arg6  = gst->guest_R9;
+   canonical->arg7  = 0;
+   canonical->arg8  = 0;
+
+
+#elif defined(VGP_ppc32_linux)
+   VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
+   canonical->sysno = gst->guest_GPR0;
+   canonical->arg1  = gst->guest_GPR3;
+   canonical->arg2  = gst->guest_GPR4;
+   canonical->arg3  = gst->guest_GPR5;
+   canonical->arg4  = gst->guest_GPR6;
+   canonical->arg5  = gst->guest_GPR7;
+   canonical->arg6  = gst->guest_GPR8;
+   canonical->arg7  = 0;
+   canonical->arg8  = 0;
+
+
+#elif defined(VGP_ppc64_linux)
+   VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
+   canonical->sysno = gst->guest_GPR0;
+   canonical->arg1  = gst->guest_GPR3;
+   canonical->arg2  = gst->guest_GPR4;
+   canonical->arg3  = gst->guest_GPR5;
+   canonical->arg4  = gst->guest_GPR6;
+   canonical->arg5  = gst->guest_GPR7;
+   canonical->arg6  = gst->guest_GPR8;
+   canonical->arg7  = 0;
+   canonical->arg8  = 0;
+
+
+#elif defined(VGP_ppc32_aix5)
+   VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
+   canonical->sysno = gst->guest_GPR2;
+   canonical->arg1  = gst->guest_GPR3;
+   canonical->arg2  = gst->guest_GPR4;
+   canonical->arg3  = gst->guest_GPR5;
+   canonical->arg4  = gst->guest_GPR6;
+   canonical->arg5  = gst->guest_GPR7;
+   canonical->arg6  = gst->guest_GPR8;
+   canonical->arg7  = gst->guest_GPR9;
+   canonical->arg8  = gst->guest_GPR10;
+
+#elif defined(VGP_ppc64_aix5)
+   VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
+   canonical->sysno = gst->guest_GPR2;
+   canonical->arg1  = gst->guest_GPR3;
+   canonical->arg2  = gst->guest_GPR4;
+   canonical->arg3  = gst->guest_GPR5;
+   canonical->arg4  = gst->guest_GPR6;
+   canonical->arg5  = gst->guest_GPR7;
+   canonical->arg6  = gst->guest_GPR8;
+   canonical->arg7  = gst->guest_GPR9;
+   canonical->arg8  = gst->guest_GPR10;
+
+#else
+#  error "getSyscallArgsFromGuestState: unknown arch"
+#endif
+}
+
+static 
+void putSyscallArgsIntoGuestState ( /*IN*/ SyscallArgs*       canonical,
+                                    /*OUT*/VexGuestArchState* gst_vanilla )
+{
+#if defined(VGP_x86_linux)
+   VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
+   gst->guest_EAX = canonical->sysno;
+   gst->guest_EBX = canonical->arg1;
+   gst->guest_ECX = canonical->arg2;
+   gst->guest_EDX = canonical->arg3;
+   gst->guest_ESI = canonical->arg4;
+   gst->guest_EDI = canonical->arg5;
+   gst->guest_EBP = canonical->arg6;
+
+#elif defined(VGP_amd64_linux)
+   VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
+   gst->guest_RAX = canonical->sysno;
+   gst->guest_RDI = canonical->arg1;
+   gst->guest_RSI = canonical->arg2;
+   gst->guest_RDX = canonical->arg3;
+   gst->guest_R10 = canonical->arg4;
+   gst->guest_R8  = canonical->arg5;
+   gst->guest_R9  = canonical->arg6;
+
+#elif defined(VGP_ppc32_linux)
+   VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
+   gst->guest_GPR0 = canonical->sysno;
+   gst->guest_GPR3 = canonical->arg1;
+   gst->guest_GPR4 = canonical->arg2;
+   gst->guest_GPR5 = canonical->arg3;
+   gst->guest_GPR6 = canonical->arg4;
+   gst->guest_GPR7 = canonical->arg5;
+   gst->guest_GPR8 = canonical->arg6;
+
+#elif defined(VGP_ppc64_linux)
+   VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
+   gst->guest_GPR0 = canonical->sysno;
+   gst->guest_GPR3 = canonical->arg1;
+   gst->guest_GPR4 = canonical->arg2;
+   gst->guest_GPR5 = canonical->arg3;
+   gst->guest_GPR6 = canonical->arg4;
+   gst->guest_GPR7 = canonical->arg5;
+   gst->guest_GPR8 = canonical->arg6;
+
+#elif defined(VGP_ppc32_aix5)
+   VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
+   gst->guest_GPR2  = canonical->sysno;
+   gst->guest_GPR3  = canonical->arg1;
+   gst->guest_GPR4  = canonical->arg2;
+   gst->guest_GPR5  = canonical->arg3;
+   gst->guest_GPR6  = canonical->arg4;
+   gst->guest_GPR7  = canonical->arg5;
+   gst->guest_GPR8  = canonical->arg6;
+   gst->guest_GPR9  = canonical->arg7;
+   gst->guest_GPR10 = canonical->arg8;
+
+#elif defined(VGP_ppc64_aix5)
+   VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
+   gst->guest_GPR2  = canonical->sysno;
+   gst->guest_GPR3  = canonical->arg1;
+   gst->guest_GPR4  = canonical->arg2;
+   gst->guest_GPR5  = canonical->arg3;
+   gst->guest_GPR6  = canonical->arg4;
+   gst->guest_GPR7  = canonical->arg5;
+   gst->guest_GPR8  = canonical->arg6;
+   gst->guest_GPR9  = canonical->arg7;
+   gst->guest_GPR10 = canonical->arg8;
+
+#else
+#  error "putSyscallArgsIntoGuestState: unknown arch"
+#endif
+}
+
+static
+void getSyscallStatusFromGuestState ( /*OUT*/SyscallStatus*     canonical,
+                                      /*IN*/ VexGuestArchState* gst_vanilla )
+{
+#  if defined(VGP_x86_linux)
+   VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
+   canonical->sres = VG_(mk_SysRes_x86_linux)( gst->guest_EAX );
+   canonical->what = SsComplete;
+
+#  elif defined(VGP_amd64_linux)
+   VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
+   canonical->sres = VG_(mk_SysRes_amd64_linux)( gst->guest_RAX );
+   canonical->what = SsComplete;
+
+#  elif defined(VGP_ppc32_linux)
+   VexGuestPPC32State* gst   = (VexGuestPPC32State*)gst_vanilla;
+   UInt                cr    = LibVEX_GuestPPC32_get_CR( gst );
+   UInt                cr0so = (cr >> 28) & 1;
+   canonical->sres = VG_(mk_SysRes_ppc32_linux)( gst->guest_GPR3, cr0so );
+   canonical->what = SsComplete;
+
+#  elif defined(VGP_ppc64_linux)
+   VexGuestPPC64State* gst   = (VexGuestPPC64State*)gst_vanilla;
+   UInt                cr    = LibVEX_GuestPPC64_get_CR( gst );
+   UInt                cr0so = (cr >> 28) & 1;
+   canonical->sres = VG_(mk_SysRes_ppc64_linux)( gst->guest_GPR3, cr0so );
+   canonical->what = SsComplete;
+
+#  elif defined(VGP_ppc32_aix5)
+   VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
+   canonical->sres = VG_(mk_SysRes_ppc32_aix5)( gst->guest_GPR3, 
+                                                gst->guest_GPR4 );
+   canonical->what = SsComplete;
+
+#  elif defined(VGP_ppc64_aix5)
+   VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
+   canonical->sres = VG_(mk_SysRes_ppc64_aix5)( gst->guest_GPR3, 
+                                                gst->guest_GPR4 );
+   canonical->what = SsComplete;
+
+#  else
+#    error "getSyscallStatusFromGuestState: unknown arch"
+#  endif
+}
+
+static 
+void putSyscallStatusIntoGuestState ( /*IN*/ SyscallStatus*     canonical,
+                                      /*OUT*/VexGuestArchState* gst_vanilla )
+{
+#  if defined(VGP_x86_linux)
+   VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
+   vg_assert(canonical->what == SsComplete);
+   if (canonical->sres.isError) {
+      /* This isn't exactly right, in that really a Failure with res
+         not in the range 1 .. 4095 is unrepresentable in the
+         Linux-x86 scheme.  Oh well. */
+      gst->guest_EAX = - (Int)canonical->sres.err;
+   } else {
+      gst->guest_EAX = canonical->sres.res;
+   }
+
+#  elif defined(VGP_amd64_linux)
+   VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
+   vg_assert(canonical->what == SsComplete);
+   if (canonical->sres.isError) {
+      /* This isn't exactly right, in that really a Failure with res
+         not in the range 1 .. 4095 is unrepresentable in the
+         Linux-x86 scheme.  Oh well. */
+      gst->guest_RAX = - (Long)canonical->sres.err;
+   } else {
+      gst->guest_RAX = canonical->sres.res;
+   }
+
+#  elif defined(VGP_ppc32_linux)
+   VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
+   UInt old_cr = LibVEX_GuestPPC32_get_CR(gst);
+   vg_assert(canonical->what == SsComplete);
+   if (canonical->sres.isError) {
+      /* set CR0.SO */
+      LibVEX_GuestPPC32_put_CR( old_cr | (1<<28), gst );
+      gst->guest_GPR3 = canonical->sres.err;
+   } else {
+      /* clear CR0.SO */
+      LibVEX_GuestPPC32_put_CR( old_cr & ~(1<<28), gst );
+      gst->guest_GPR3 = canonical->sres.res;
+   }
+
+#  elif defined(VGP_ppc64_linux)
+   VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
+   UInt old_cr = LibVEX_GuestPPC64_get_CR(gst);
+   vg_assert(canonical->what == SsComplete);
+   if (canonical->sres.isError) {
+      /* set CR0.SO */
+      LibVEX_GuestPPC64_put_CR( old_cr | (1<<28), gst );
+      gst->guest_GPR3 = canonical->sres.err;
+   } else {
+      /* clear CR0.SO */
+      LibVEX_GuestPPC64_put_CR( old_cr & ~(1<<28), gst );
+      gst->guest_GPR3 = canonical->sres.res;
+   }
+
+#  elif defined(VGP_ppc32_aix5)
+   VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
+   vg_assert(canonical->what == SsComplete);
+   gst->guest_GPR3 = canonical->sres.res;
+   gst->guest_GPR4 = canonical->sres.err;
+
+#  elif defined(VGP_ppc64_aix5)
+   VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
+   vg_assert(canonical->what == SsComplete);
+   gst->guest_GPR3 = canonical->sres.res;
+   gst->guest_GPR4 = canonical->sres.err;
+
+#  else
+#    error "putSyscallStatusIntoGuestState: unknown arch"
+#  endif
+}
+
+
+/* Tell me the offsets in the guest state of the syscall params, so
+   that the scalar argument checkers don't have to have this info
+   hardwired. */
+
+static
+void getSyscallArgLayout ( /*OUT*/SyscallArgLayout* layout )
+{
+#if defined(VGP_x86_linux)
+   layout->o_sysno  = OFFSET_x86_EAX;
+   layout->o_arg1   = OFFSET_x86_EBX;
+   layout->o_arg2   = OFFSET_x86_ECX;
+   layout->o_arg3   = OFFSET_x86_EDX;
+   layout->o_arg4   = OFFSET_x86_ESI;
+   layout->o_arg5   = OFFSET_x86_EDI;
+   layout->o_arg6   = OFFSET_x86_EBP;
+   layout->o_arg7   = -1; /* impossible value */
+   layout->o_arg8   = -1; /* impossible value */
+   layout->o_retval = OFFSET_x86_EAX;
+
+#elif defined(VGP_amd64_linux)
+   layout->o_sysno  = OFFSET_amd64_RAX;
+   layout->o_arg1   = OFFSET_amd64_RDI;
+   layout->o_arg2   = OFFSET_amd64_RSI;
+   layout->o_arg3   = OFFSET_amd64_RDX;
+   layout->o_arg4   = OFFSET_amd64_R10;
+   layout->o_arg5   = OFFSET_amd64_R8;
+   layout->o_arg6   = OFFSET_amd64_R9;
+   layout->o_arg7   = -1; /* impossible value */
+   layout->o_arg8   = -1; /* impossible value */
+   layout->o_retval = OFFSET_amd64_RAX;
+
+#elif defined(VGP_ppc32_linux)
+   layout->o_sysno  = OFFSET_ppc32_GPR0;
+   layout->o_arg1   = OFFSET_ppc32_GPR3;
+   layout->o_arg2   = OFFSET_ppc32_GPR4;
+   layout->o_arg3   = OFFSET_ppc32_GPR5;
+   layout->o_arg4   = OFFSET_ppc32_GPR6;
+   layout->o_arg5   = OFFSET_ppc32_GPR7;
+   layout->o_arg6   = OFFSET_ppc32_GPR8;
+   layout->o_arg7   = -1; /* impossible value */
+   layout->o_arg8   = -1; /* impossible value */
+   layout->o_retval = OFFSET_ppc32_GPR3;
+
+#elif defined(VGP_ppc64_linux)
+   layout->o_sysno  = OFFSET_ppc64_GPR0;
+   layout->o_arg1   = OFFSET_ppc64_GPR3;
+   layout->o_arg2   = OFFSET_ppc64_GPR4;
+   layout->o_arg3   = OFFSET_ppc64_GPR5;
+   layout->o_arg4   = OFFSET_ppc64_GPR6;
+   layout->o_arg5   = OFFSET_ppc64_GPR7;
+   layout->o_arg6   = OFFSET_ppc64_GPR8;
+   layout->o_arg7   = -1; /* impossible value */
+   layout->o_arg8   = -1; /* impossible value */
+   layout->o_retval = OFFSET_ppc64_GPR3;
+
+#elif defined(VGP_ppc32_aix5)
+   layout->o_sysno  = OFFSET_ppc32_GPR2;
+   layout->o_arg1   = OFFSET_ppc32_GPR3;
+   layout->o_arg2   = OFFSET_ppc32_GPR4;
+   layout->o_arg3   = OFFSET_ppc32_GPR5;
+   layout->o_arg4   = OFFSET_ppc32_GPR6;
+   layout->o_arg5   = OFFSET_ppc32_GPR7;
+   layout->o_arg6   = OFFSET_ppc32_GPR8;
+   layout->o_arg7   = OFFSET_ppc32_GPR9;
+   layout->o_arg8   = OFFSET_ppc32_GPR10;
+   layout->o_retval = OFFSET_ppc32_GPR3;
+
+#elif defined(VGP_ppc64_aix5)
+   layout->o_sysno  = OFFSET_ppc64_GPR2;
+   layout->o_arg1   = OFFSET_ppc64_GPR3;
+   layout->o_arg2   = OFFSET_ppc64_GPR4;
+   layout->o_arg3   = OFFSET_ppc64_GPR5;
+   layout->o_arg4   = OFFSET_ppc64_GPR6;
+   layout->o_arg5   = OFFSET_ppc64_GPR7;
+   layout->o_arg6   = OFFSET_ppc64_GPR8;
+   layout->o_arg7   = OFFSET_ppc64_GPR9;
+   layout->o_arg8   = OFFSET_ppc64_GPR10;
+   layout->o_retval = OFFSET_ppc64_GPR3;
+
+#else
+#  error "getSyscallLayout: unknown arch"
+#endif
+}
+
+
+/* ---------------------------------------------------------------------
+   The main driver logic
+   ------------------------------------------------------------------ */
+
+/* Finding the handlers for a given syscall, or faking up one
+   when no handler is found. */
+
+static 
+void bad_before ( ThreadId              tid,
+                  SyscallArgLayout*     layout,
+                  /*MOD*/SyscallArgs*   args,
+                  /*OUT*/SyscallStatus* status,
+                  /*OUT*/UWord*         flags )
+{
+   VG_(message)
+      (Vg_DebugMsg,"WARNING: unhandled syscall: %llu", (ULong)args->sysno);
+#  if defined(VGO_aix5)
+   VG_(message)
+      (Vg_DebugMsg,"           name of syscall: \"%s\"",
+                    VG_(aix5_sysno_to_sysname)(args->sysno));
+#  endif
+   if (VG_(clo_verbosity) > 1) {
+      VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
+   }
+   VG_(message)
+      (Vg_DebugMsg,"You may be able to write your own handler.");
+   VG_(message)
+      (Vg_DebugMsg,"Read the file README_MISSING_SYSCALL_OR_IOCTL.");
+   VG_(message)
+      (Vg_DebugMsg,"Nevertheless we consider this a bug.  Please report");
+   VG_(message)
+      (Vg_DebugMsg,"it at http://valgrind.org/support/bug_reports.html.");
+
+   SET_STATUS_Failure(VKI_ENOSYS);
+}
+
+static SyscallTableEntry bad_sys =
+   { bad_before, NULL };
+
+static const SyscallTableEntry* get_syscall_entry ( UInt syscallno )
+{
+   const SyscallTableEntry* sys = NULL;
+
+#  if defined(VGO_linux)
+   if (syscallno < ML_(syscall_table_size) &&
+       ML_(syscall_table)[syscallno].before != NULL)
+      sys = &ML_(syscall_table)[syscallno];
+
+#  elif defined(VGP_ppc32_aix5)
+   sys = ML_(get_ppc32_aix5_syscall_entry) ( syscallno );
+
+#  elif defined(VGP_ppc64_aix5)
+   sys = ML_(get_ppc64_aix5_syscall_entry) ( syscallno );
+
+#  else
+#    error Unknown OS
+#  endif
+
+   return sys == NULL  ? &bad_sys  : sys;
+}
+
+
+/* Add and remove signals from mask so that we end up telling the
+   kernel the state we actually want rather than what the client
+   wants. */
+static void sanitize_client_sigmask(vki_sigset_t *mask)
+{
+   VG_(sigdelset)(mask, VKI_SIGKILL);
+   VG_(sigdelset)(mask, VKI_SIGSTOP);
+   VG_(sigdelset)(mask, VG_SIGVGKILL); /* never block */
+}
+
+typedef
+   struct {
+      SyscallArgs   orig_args;
+      SyscallArgs   args;
+      SyscallStatus status;
+      UWord         flags;
+   }
+   SyscallInfo;
+
+SyscallInfo syscallInfo[VG_N_THREADS];
+
+
+/* The scheduler needs to be able to zero out these records after a
+   fork, hence this is exported from m_syswrap. */
+void VG_(clear_syscallInfo) ( Int tid )
+{
+   vg_assert(tid >= 0 && tid < VG_N_THREADS);
+   VG_(memset)( & syscallInfo[tid], 0, sizeof( syscallInfo[tid] ));
+   syscallInfo[tid].status.what = SsIdle;
+}
+
+static void ensure_initialised ( void )
+{
+   Int i;
+   static Bool init_done = False;
+   if (init_done) 
+      return;
+   init_done = True;
+   for (i = 0; i < VG_N_THREADS; i++) {
+      VG_(clear_syscallInfo)( i );
+   }
+}
+
+/* --- This is the main function of this file. --- */
+
+void VG_(client_syscall) ( ThreadId tid )
+{
+   UWord                    sysno;
+   ThreadState*             tst;
+   const SyscallTableEntry* ent;
+   SyscallArgLayout         layout;
+   SyscallInfo*             sci;
+
+   ensure_initialised();
+
+   vg_assert(VG_(is_valid_tid)(tid));
+   vg_assert(tid >= 1 && tid < VG_N_THREADS);
+   vg_assert(VG_(is_running_thread)(tid));
+
+   tst = VG_(get_ThreadState)(tid);
+
+   /* BEGIN ensure root thread's stack is suitably mapped */
+   /* In some rare circumstances, we may do the syscall without the
+      bottom page of the stack being mapped, because the stack pointer
+      was moved down just a few instructions before the syscall
+      instruction, and there have been no memory references since
+      then, that would cause a call to VG_(extend_stack) to have
+      happened.
+
+      In native execution that's OK: the kernel automagically extends
+      the stack's mapped area down to cover the stack pointer (or sp -
+      redzone, really).  In simulated normal execution that's OK too,
+      since any signals we get from accessing below the mapped area of
+      the (guest's) stack lead us to VG_(extend_stack), where we
+      simulate the kernel's stack extension logic.  But that leaves
+      the problem of entering a syscall with the SP unmapped.  Because
+      the kernel doesn't know that the segment immediately above SP is
+      supposed to be a grow-down segment, it causes the syscall to
+      fail, and thereby causes a divergence between native behaviour
+      (syscall succeeds) and simulated behaviour (syscall fails).
+
+      This is quite a rare failure mode.  It has only been seen
+      affecting calls to sys_readlink on amd64-linux, and even then it
+      requires a certain code sequence around the syscall to trigger
+      it.  Here is one:
+
+      extern int my_readlink ( const char* path );
+      asm(
+      ".text\n"
+      ".globl my_readlink\n"
+      "my_readlink:\n"
+      "\tsubq    $0x1008,%rsp\n"
+      "\tmovq    %rdi,%rdi\n"              // path is in rdi
+      "\tmovq    %rsp,%rsi\n"              // &buf[0] -> rsi
+      "\tmovl    $0x1000,%edx\n"           // sizeof(buf) in rdx
+      "\tmovl    $"__NR_READLINK",%eax\n"  // syscall number
+      "\tsyscall\n"
+      "\taddq    $0x1008,%rsp\n"
+      "\tret\n"
+      ".previous\n"
+      );
+
+      For more details, see bug #156404
+      (https://bugs.kde.org/show_bug.cgi?id=156404).
+
+      The fix is actually very simple.  We simply need to call
+      VG_(extend_stack) for this thread, handing it the lowest
+      possible valid address for stack (sp - redzone), to ensure the
+      pages all the way down to that address, are mapped.  Because
+      this is a potentially expensive and frequent operation, we
+      filter in two ways:
+
+      First, only the main thread (tid=1) has a growdown stack.  So
+      ignore all others.  It is conceivable, although highly unlikely,
+      that the main thread exits, and later another thread is
+      allocated tid=1, but that's harmless, I believe;
+      VG_(extend_stack) will do nothing when applied to a non-root
+      thread.
+
+      Secondly, first call VG_(am_find_nsegment) directly, to see if
+      the page holding (sp - redzone) is mapped correctly.  If so, do
+      nothing.  This is almost always the case.  VG_(extend_stack)
+      calls VG_(am_find_nsegment) twice, so this optimisation -- and
+      that's all it is -- more or less halves the number of calls to
+      VG_(am_find_nsegment) required.
+
+      TODO: the test "seg->kind == SkAnonC" is really inadequate,
+      because although it tests whether the segment is mapped
+      _somehow_, it doesn't check that it has the right permissions
+      (r,w, maybe x) ?  We could test that here, but it will also be
+      necessary to fix the corresponding test in VG_(extend_stack).
+
+      All this guff is of course Linux-specific.  Hence the ifdef.
+   */
+#  if defined(VGO_linux)
+   if (tid == 1/*ROOT THREAD*/) {
+      Addr     stackMin   = VG_(get_SP)(tid) - VG_STACK_REDZONE_SZB;
+      NSegment const* seg = VG_(am_find_nsegment)(stackMin);
+      if (seg && seg->kind == SkAnonC) {
+         /* stackMin is already mapped.  Nothing to do. */
+      } else {
+         (void)VG_(extend_stack)( stackMin,
+                                  tst->client_stack_szB );
+      }
+   }
+#  endif
+   /* END ensure root thread's stack is suitably mapped */
+
+   /* First off, get the syscall args and number.  This is a
+      platform-dependent action. */
+
+   sci = & syscallInfo[tid];
+   vg_assert(sci->status.what == SsIdle);
+
+   getSyscallArgsFromGuestState( &sci->orig_args, &tst->arch.vex );
+
+   /* Copy .orig_args to .args.  The pre-handler may modify .args, but
+      we want to keep the originals too, just in case. */
+   sci->args = sci->orig_args;
+
+   /* Save the syscall number in the thread state in case the syscall 
+      is interrupted by a signal. */
+   sysno = sci->orig_args.sysno;
+
+   /* The default what-to-do-next thing is hand the syscall to the
+      kernel, so we pre-set that here.  Set .sres to something
+      harmless looking (is irrelevant because .what is not
+      SsComplete.) */
+   sci->status.what = SsHandToKernel;
+   sci->status.sres = VG_(mk_SysRes_Error)(0);
+   sci->flags       = 0;
+
+   /* Fetch the syscall's handlers.  If no handlers exist for this
+      syscall, we are given dummy handlers which force an immediate
+      return with ENOSYS. */
+   ent = get_syscall_entry(sysno);
+
+   /* Fetch the layout information, which tells us where in the guest
+      state the syscall args reside.  This is a platform-dependent
+      action.  This info is needed so that the scalar syscall argument
+      checks (PRE_REG_READ calls) know which bits of the guest state
+      they need to inspect. */
+   getSyscallArgLayout( &layout );
+
+   /* Make sure the tmp signal mask matches the real signal mask;
+      sigsuspend may change this. */
+   vg_assert(VG_(iseqsigset)(&tst->sig_mask, &tst->tmp_sig_mask));
+
+   /* Right, we're finally ready to Party.  Call the pre-handler and
+      see what we get back.  At this point: 
+
+        sci->status.what  is Unset (we don't know yet).
+        sci->orig_args    contains the original args.
+        sci->args         is the same as sci->orig_args.
+        sci->flags        is zero.
+   */
+
+   PRINT("SYSCALL[%d,%d](%3lld) ", VG_(getpid)(), tid, (ULong)sysno);
+
+   /* Do any pre-syscall actions */
+   if (VG_(needs).syscall_wrapper) {
+      VG_TDICT_CALL(tool_pre_syscall, tid, sysno);
+   }
+
+   vg_assert(ent);
+   vg_assert(ent->before);
+   (ent->before)( tid,
+                  &layout, 
+                  &sci->args, &sci->status, &sci->flags );
+   
+   /* The pre-handler may have modified:
+         sci->args
+         sci->status
+         sci->flags
+      All else remains unchanged. 
+      Although the args may be modified, pre handlers are not allowed
+      to change the syscall number.
+   */
+   /* Now we proceed according to what the pre-handler decided. */
+   vg_assert(sci->status.what == SsHandToKernel
+             || sci->status.what == SsComplete);
+   vg_assert(sci->args.sysno == sci->orig_args.sysno);
+
+   if (sci->status.what == SsComplete && !sci->status.sres.isError) {
+      /* The pre-handler completed the syscall itself, declaring
+         success. */
+      if (sci->flags & SfNoWriteResult) {
+         PRINT(" --> [pre-success] NoWriteResult\n");
+      } else {
+         PRINT(" --> [pre-success] Success(0x%llx)\n",
+               (ULong)sci->status.sres.res );
+      }                                      
+      /* In this case the allowable flags are to ask for a signal-poll
+         and/or a yield after the call.  Changing the args isn't
+         allowed. */
+      vg_assert(0 == (sci->flags 
+                      & ~(SfPollAfter | SfYieldAfter | SfNoWriteResult)));
+      vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
+   }
+
+   else
+   if (sci->status.what == SsComplete && sci->status.sres.isError) {
+      /* The pre-handler decided to fail syscall itself. */
+      PRINT(" --> [pre-fail] Failure(0x%llx)\n", (ULong)sci->status.sres.err );
+      /* In this case, the pre-handler is also allowed to ask for the
+         post-handler to be run anyway.  Changing the args is not
+         allowed. */
+      vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter)));
+      vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
+   }
+
+   else
+   if (sci->status.what != SsHandToKernel) {
+      /* huh?! */
+      vg_assert(0);
+   }
+
+   else /* (sci->status.what == HandToKernel) */ {
+      /* Ok, this is the usual case -- and the complicated one.  There
+         are two subcases: sync and async.  async is the general case
+         and is to be used when there is any possibility that the
+         syscall might block [a fact that the pre-handler must tell us
+         via the sci->flags field.]  Because the tidying-away /
+         context-switch overhead of the async case could be large, if
+         we are sure that the syscall will not block, we fast-track it
+         by doing it directly in this thread, which is a lot
+         simpler. */
+
+      /* Check that the given flags are allowable: MayBlock, PollAfter
+         and PostOnFail are ok. */
+      vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter)));
+
+      if (sci->flags & SfMayBlock) {
+
+         /* Syscall may block, so run it asynchronously */
+         vki_sigset_t mask;
+
+         PRINT(" --> [async] ... \n");
+
+         mask = tst->sig_mask;
+         sanitize_client_sigmask(&mask);
+
+         /* Gack.  More impedance matching.  Copy the possibly
+            modified syscall args back into the guest state. */
+         vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
+         putSyscallArgsIntoGuestState( &sci->args, &tst->arch.vex );
+
+         /* Drop the lock */
+         VG_(release_BigLock)(tid, VgTs_WaitSys, "VG_(client_syscall)[async]");
+
+         /* Do the call, which operates directly on the guest state,
+            not on our abstracted copies of the args/result. */
+         do_syscall_for_client(sysno, tst, &mask);
+
+         /* do_syscall_for_client may not return if the syscall was
+            interrupted by a signal.  In that case, flow of control is
+            first to m_signals.async_sighandler, which calls
+            VG_(fixup_guest_state_after_syscall_interrupted), which
+            fixes up the guest state, and possibly calls
+            VG_(post_syscall).  Once that's done, control drops back
+            to the scheduler.  */
+
+         /* Reacquire the lock */
+         VG_(acquire_BigLock)(tid, "VG_(client_syscall)[async]");
+
+         /* Even more impedance matching.  Extract the syscall status
+            from the guest state. */
+         getSyscallStatusFromGuestState( &sci->status, &tst->arch.vex );
+         vg_assert(sci->status.what == SsComplete);
+
+         PRINT("SYSCALL[%d,%d](%3ld) ... [async] --> %s(0x%llx)\n",
+               VG_(getpid)(), tid, sysno, 
+               sci->status.sres.isError ? "Failure" : "Success",
+               sci->status.sres.isError ? (ULong)sci->status.sres.err
+                                        : (ULong)sci->status.sres.res );
+
+      } else {
+
+         /* run the syscall directly */
+         /* The pre-handler may have modified the syscall args, but
+            since we're passing values in ->args directly to the
+            kernel, there's no point in flushing them back to the
+            guest state.  Indeed doing so could be construed as
+            incorrect. */
+         SysRes sres 
+            = VG_(do_syscall)(sysno, sci->args.arg1, sci->args.arg2, 
+                                     sci->args.arg3, sci->args.arg4, 
+                                     sci->args.arg5, sci->args.arg6,
+                                     sci->args.arg7, sci->args.arg8 );
+         sci->status = convert_SysRes_to_SyscallStatus(sres);
+
+         PRINT("[sync] --> %s(0x%llx)\n",
+               sci->status.sres.isError ? "Failure" : "Success",
+               sci->status.sres.isError ? (ULong)sci->status.sres.err
+                                        : (ULong)sci->status.sres.res );
+      }
+   }
+
+   vg_assert(sci->status.what == SsComplete);
+
+   vg_assert(VG_(is_running_thread)(tid));
+
+   /* Dump the syscall result back in the guest state.  This is
+      a platform-specific action. */
+   if (!(sci->flags & SfNoWriteResult))
+      putSyscallStatusIntoGuestState( &sci->status, &tst->arch.vex );
+
+   /* Situation now:
+      - the guest state is now correctly modified following the syscall
+      - modified args, original args and syscall status are still
+        available in the syscallInfo[] entry for this syscall.
+
+      Now go on to do the post-syscall actions (read on down ..)
+   */
+   VG_(post_syscall)(tid);
+}
+
+
+/* Perform post syscall actions.  The expected state on entry is
+   precisely as at the end of VG_(client_syscall), that is:
+
+   - guest state up to date following the syscall
+   - modified args, original args and syscall status are still
+     available in the syscallInfo[] entry for this syscall.
+   - syscall status matches what's in the guest state.
+
+   There are two ways to get here: the normal way -- being called by
+   VG_(client_syscall), and the unusual way, from
+   VG_(fixup_guest_state_after_syscall_interrupted).
+*/
+void VG_(post_syscall) (ThreadId tid)
+{
+   SyscallArgLayout         layout;
+   SyscallInfo*             sci;
+   const SyscallTableEntry* ent;
+   SyscallStatus            test_status;
+   ThreadState*             tst;
+   UWord sysno;
+
+   /* Preliminaries */
+   vg_assert(VG_(is_valid_tid)(tid));
+   vg_assert(tid >= 1 && tid < VG_N_THREADS);
+   vg_assert(VG_(is_running_thread)(tid));
+
+   tst = VG_(get_ThreadState)(tid);
+   sci = & syscallInfo[tid];
+
+   /* m_signals.sigvgkill_handler might call here even when not in
+      a syscall. */
+   if (sci->status.what == SsIdle || sci->status.what == SsHandToKernel) {
+      sci->status.what = SsIdle;
+      return;
+   }
+
+   /* Validate current syscallInfo entry.  In particular we require
+      that the current .status matches what's actually in the guest
+      state.  At least in the normal case where we have actually
+      previously written the result into the guest state. */
+   vg_assert(sci->status.what == SsComplete);
+
+   getSyscallStatusFromGuestState( &test_status, &tst->arch.vex );
+   if (!(sci->flags & SfNoWriteResult))
+      vg_assert(eq_SyscallStatus( &sci->status, &test_status ));
+   /* Ok, looks sane */
+
+   /* Get the system call number.  Because the pre-handler isn't
+      allowed to mess with it, it should be the same for both the
+      original and potentially-modified args. */
+   vg_assert(sci->args.sysno == sci->orig_args.sysno);
+   sysno = sci->args.sysno;
+   ent = get_syscall_entry(sysno);
+
+   /* We need the arg layout .. sigh */
+   getSyscallArgLayout( &layout );
+
+   /* Tell the tool that the assignment has occurred, so it can update
+      shadow regs as necessary. */
+   VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, layout.o_retval, 
+                                                  sizeof(UWord) );
+
+   /* pre: status == Complete (asserted above) */
+   /* Consider either success or failure.  Now run the post handler if:
+      - it exists, and
+      - Success or (Failure and PostOnFail is set)
+   */
+   if (ent->after
+       && ((!sci->status.sres.isError)
+           || (sci->status.sres.isError
+               && (sci->flags & SfPostOnFail) ))) {
+
+      (ent->after)( tid, &sci->args, &sci->status );
+   }
+
+   /* Because the post handler might have changed the status (eg, the
+      post-handler for sys_open can change the result from success to
+      failure if the kernel supplied a fd that it doesn't like), once
+      again dump the syscall result back in the guest state.*/
+   if (!(sci->flags & SfNoWriteResult))
+      putSyscallStatusIntoGuestState( &sci->status, &tst->arch.vex );
+
+   /* Do any post-syscall actions required by the tool. */
+   if (VG_(needs).syscall_wrapper)
+      VG_TDICT_CALL(tool_post_syscall, tid, sysno, sci->status.sres);
+
+   /* The syscall is done. */
+   vg_assert(sci->status.what == SsComplete);
+   sci->status.what = SsIdle;
+
+   /* The pre/post wrappers may have concluded that pending signals
+      might have been created, and will have set SfPollAfter to
+      request a poll for them once the syscall is done. */
+   if (sci->flags & SfPollAfter)
+      VG_(poll_signals)(tid);
+
+   /* Similarly, the wrappers might have asked for a yield
+      afterwards. */
+   if (sci->flags & SfYieldAfter)
+      VG_(vg_yield)();
+}
+
+
+/* ---------------------------------------------------------------------
+   Dealing with syscalls which get interrupted by a signal:
+   VG_(fixup_guest_state_after_syscall_interrupted)
+   ------------------------------------------------------------------ */
+
+/* Syscalls done on behalf of the client are finally handed off to the
+   kernel in VG_(client_syscall) above, either by calling
+   do_syscall_for_client (the async case), or by calling
+   VG_(do_syscall6) (the sync case).
+
+   If the syscall is not interrupted by a signal (it may block and
+   later unblock, but that's irrelevant here) then those functions
+   eventually return and so control is passed to VG_(post_syscall).
+   NB: not sure if the sync case can actually get interrupted, as it
+   operates with all signals masked.
+
+   However, the syscall may get interrupted by an async-signal.  In
+   that case do_syscall_for_client/VG_(do_syscall6) do not
+   return.  Instead we wind up in m_signals.async_sighandler.  We need
+   to fix up the guest state to make it look like the syscall was
+   interrupted for guest.  So async_sighandler calls here, and this
+   does the fixup.  Note that from here we wind up calling
+   VG_(post_syscall) too.
+*/
+
+
+/* These are addresses within ML_(do_syscall_for_client_WRK).  See
+   syscall-$PLAT.S for details. 
+*/
+extern const Addr ML_(blksys_setup);
+extern const Addr ML_(blksys_restart);
+extern const Addr ML_(blksys_complete);
+extern const Addr ML_(blksys_committed);
+extern const Addr ML_(blksys_finished);
+
+
+/* Back up guest state to restart a system call. */
+
+void ML_(fixup_guest_state_to_restart_syscall) ( ThreadArchState* arch )
+{
+#if defined(VGP_x86_linux)
+   arch->vex.guest_EIP -= 2;             // sizeof(int $0x80)
+
+   /* Make sure our caller is actually sane, and we're really backing
+      back over a syscall.
+
+      int $0x80 == CD 80 
+   */
+   {
+      UChar *p = (UChar *)arch->vex.guest_EIP;
+      
+      if (p[0] != 0xcd || p[1] != 0x80)
+         VG_(message)(Vg_DebugMsg,
+                      "?! restarting over syscall at %#x %02x %02x\n",
+                      arch->vex.guest_EIP, p[0], p[1]); 
+
+      vg_assert(p[0] == 0xcd && p[1] == 0x80);
+   }
+
+#elif defined(VGP_amd64_linux)
+   arch->vex.guest_RIP -= 2;             // sizeof(syscall)
+
+   /* Make sure our caller is actually sane, and we're really backing
+      back over a syscall.
+
+      syscall == 0F 05 
+   */
+   {
+      UChar *p = (UChar *)arch->vex.guest_RIP;
+      
+      if (p[0] != 0x0F || p[1] != 0x05)
+         VG_(message)(Vg_DebugMsg,
+                      "?! restarting over syscall at %#llx %02x %02x\n",
+                      arch->vex.guest_RIP, p[0], p[1]); 
+
+      vg_assert(p[0] == 0x0F && p[1] == 0x05);
+   }
+
+#elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
+   arch->vex.guest_CIA -= 4;             // sizeof(ppc32 instr)
+
+   /* Make sure our caller is actually sane, and we're really backing
+      back over a syscall.
+
+      sc == 44 00 00 02
+   */
+   {
+      UChar *p = (UChar *)arch->vex.guest_CIA;
+
+      if (p[0] != 0x44 || p[1] != 0x0 || p[2] != 0x0 || p[3] != 0x02)
+         VG_(message)(Vg_DebugMsg,
+                      "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
+                      arch->vex.guest_CIA + 0ULL, p[0], p[1], p[2], p[3]);
+
+      vg_assert(p[0] == 0x44 && p[1] == 0x0 && p[2] == 0x0 && p[3] == 0x2);
+   }
+
+#elif defined(VGP_ppc32_aix5) || defined(VGP_ppc64_aix5)
+   /* Hmm.  This is problematic, because on AIX the kernel resumes
+      after a syscall at LR, not at the insn following SC.  Hence
+      there is no obvious way to figure out where the SC is.  Current
+      solution is to have a pseudo-register in the guest state,
+      CIA_AT_SC, which holds the address of the most recent SC
+      executed.  Backing up to that syscall then simply involves
+      copying that value back into CIA (the program counter). */
+   arch->vex.guest_CIA = arch->vex.guest_CIA_AT_SC;
+
+   /* Make sure our caller is actually sane, and we're really backing
+      back over a syscall.
+
+      sc == 44 00 00 02
+   */
+   {
+      UChar *p = (UChar *)arch->vex.guest_CIA;
+
+      if (p[0] != 0x44 || p[1] != 0x0 || p[2] != 0x0 || p[3] != 0x02)
+         VG_(message)(Vg_DebugMsg,
+                      "?! restarting over syscall at %#lx %02x %02x %02x %02x\n",
+                      (UWord)arch->vex.guest_CIA, p[0], p[1], p[2], p[3]);
+
+      vg_assert(p[0] == 0x44 && p[1] == 0x0 && p[2] == 0x0 && p[3] == 0x2);
+   }
+
+#else
+#  error "ML_(fixup_guest_state_to_restart_syscall): unknown plat"
+#endif
+}
+
+/* 
+   Fix up the guest state when a syscall is interrupted by a signal
+   and so has been forced to return 'sysret'.
+
+   To do this, we determine the precise state of the syscall by
+   looking at the (real) IP at the time the signal happened.  The
+   syscall sequence looks like:
+
+     1. unblock signals
+     2. perform syscall
+     3. save result to guest state (EAX, RAX, R3+CR0.SO)
+     4. re-block signals
+
+   If a signal
+   happens at      Then     Why?
+   [1-2)           restart  nothing has happened (restart syscall)
+   [2]             restart  syscall hasn't started, or kernel wants to restart
+   [2-3)           save     syscall complete, but results not saved
+   [3-4)           syscall complete, results saved
+
+   Sometimes we never want to restart an interrupted syscall (because
+   sigaction says not to), so we only restart if "restart" is True.
+
+   This will also call VG_(post_syscall) if the syscall has actually
+   completed (either because it was interrupted, or because it
+   actually finished).  It will not call VG_(post_syscall) if the
+   syscall is set up for restart, which means that the pre-wrapper may
+   get called multiple times.
+*/
+
+void 
+VG_(fixup_guest_state_after_syscall_interrupted)( ThreadId tid, 
+                                                  Addr     ip, 
+                                                  UWord    sysnum, 
+                                                  SysRes   sres,
+                                                  Bool     restart)
+{
+   /* Note that the sysnum arg seems to contain not-dependable-on info
+      (I think it depends on the state the real syscall was in at
+      interrupt) and so is ignored, apart from in the following
+      printf. */
+
+   static const Bool debug = False;
+
+   ThreadState*     tst;
+   SyscallStatus    canonical;
+   ThreadArchState* th_regs;
+   SyscallInfo*     sci;
+
+   if (debug)
+      VG_(printf)( "interrupted_syscall %d: tid=%d, IP=0x%llx, "
+                   "restart=%s, sysret.isError=%s, sysret.val=%lld\n", 
+                   (Int)sysnum,
+                   (Int)tid,
+                   (ULong)ip, 
+                   restart ? "True" : "False", 
+                   sres.isError ? "True" : "False",
+                   (Long)(Word)(sres.isError ? sres.err : sres.res) );
+
+   vg_assert(VG_(is_valid_tid)(tid));
+   vg_assert(tid >= 1 && tid < VG_N_THREADS);
+   vg_assert(VG_(is_running_thread)(tid));
+
+   tst     = VG_(get_ThreadState)(tid);
+   th_regs = &tst->arch;
+   sci     = & syscallInfo[tid];
+
+   /* Figure out what the state of the syscall was by examining the
+      (real) IP at the time of the signal, and act accordingly. */
+
+   if (ip < ML_(blksys_setup) || ip >= ML_(blksys_finished)) {
+      VG_(printf)("  not in syscall (%#lx - %#lx)\n",
+                  ML_(blksys_setup), ML_(blksys_finished));
+      /* Looks like we weren't in a syscall at all.  Hmm. */
+      vg_assert(sci->status.what != SsIdle);
+      return;
+   }
+
+   /* We should not be here unless this thread had first started up
+      the machinery for a syscall by calling VG_(client_syscall).
+      Hence: */
+   vg_assert(sci->status.what != SsIdle);
+
+   if (ip >= ML_(blksys_setup) && ip < ML_(blksys_restart)) {
+      /* syscall hasn't even started; go around again */
+      if (debug)
+         VG_(printf)("  not started: restart\n");
+      vg_assert(sci->status.what == SsHandToKernel);
+      ML_(fixup_guest_state_to_restart_syscall)(th_regs);
+   } 
+
+   else 
+   if (ip == ML_(blksys_restart)) {
+      /* We're either about to run the syscall, or it was interrupted
+         and the kernel restarted it.  Restart if asked, otherwise
+         EINTR it. */
+      if (restart)
+         ML_(fixup_guest_state_to_restart_syscall)(th_regs);
+      else {
+         canonical = convert_SysRes_to_SyscallStatus( 
+                        VG_(mk_SysRes_Error)( VKI_EINTR ) 
+                     );
+         if (!(sci->flags & SfNoWriteResult))
+            putSyscallStatusIntoGuestState( &canonical, &th_regs->vex );
+         sci->status = canonical;
+         VG_(post_syscall)(tid);
+      }
+   }
+
+   else 
+   if (ip >= ML_(blksys_complete) && ip < ML_(blksys_committed)) {
+      /* Syscall complete, but result hasn't been written back yet.
+         Write the SysRes we were supplied with back to the guest
+         state. */
+      if (debug)
+         VG_(printf)("  completed\n");
+      canonical = convert_SysRes_to_SyscallStatus( sres );
+      if (!(sci->flags & SfNoWriteResult))
+         putSyscallStatusIntoGuestState( &canonical, &th_regs->vex );
+      sci->status = canonical;
+      VG_(post_syscall)(tid);
+   } 
+
+   else 
+   if (ip >= ML_(blksys_committed) && ip < ML_(blksys_finished)) {
+      /* Result committed, but the signal mask has not been restored;
+         we expect our caller (the signal handler) will have fixed
+         this up. */
+      if (debug)
+         VG_(printf)("  all done\n");
+      VG_(post_syscall)(tid);
+   } 
+
+   else
+      VG_(core_panic)("?? strange syscall interrupt state?");
+
+   /* In all cases, the syscall is now finished (even if we called
+      ML_(fixup_guest_state_to_restart_syscall), since that just
+      re-positions the guest's IP for another go at it).  So we need
+      to record that fact. */
+   sci->status.what = SsIdle;
+}
+
+
+/* ---------------------------------------------------------------------
+   A place to store the where-to-call-when-really-done pointer
+   ------------------------------------------------------------------ */
+
+// When the final thread is done, where shall I call to shutdown the
+// system cleanly?  Is set once at startup (in m_main) and never
+// changes after that.  Is basically a pointer to the exit
+// continuation.  This is all just a nasty hack to avoid calling
+// directly from m_syswrap to m_main at exit, since that would cause
+// m_main to become part of a module cycle, which is silly.
+void (* VG_(address_of_m_main_shutdown_actions_NORETURN) )
+       (ThreadId,VgSchedReturnCode)
+   = NULL;
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/