summaryrefslogtreecommitdiff
path: root/coregrind/m_dispatch/.svn/text-base/dispatch-ppc64-linux.S.svn-base
diff options
context:
space:
mode:
Diffstat (limited to 'coregrind/m_dispatch/.svn/text-base/dispatch-ppc64-linux.S.svn-base')
-rw-r--r--coregrind/m_dispatch/.svn/text-base/dispatch-ppc64-linux.S.svn-base661
1 files changed, 661 insertions, 0 deletions
diff --git a/coregrind/m_dispatch/.svn/text-base/dispatch-ppc64-linux.S.svn-base b/coregrind/m_dispatch/.svn/text-base/dispatch-ppc64-linux.S.svn-base
new file mode 100644
index 0000000..f04e4b5
--- /dev/null
+++ b/coregrind/m_dispatch/.svn/text-base/dispatch-ppc64-linux.S.svn-base
@@ -0,0 +1,661 @@
+
+/*--------------------------------------------------------------------*/
+/*--- The core dispatch loop, for jumping to a code address. ---*/
+/*--- dispatch-ppc64-linux.S ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2005-2009 Cerion Armour-Brown <cerion@open-works.co.uk>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "pub_core_basics_asm.h"
+#include "pub_core_dispatch_asm.h"
+#include "pub_core_transtab_asm.h"
+#include "libvex_guest_offsets.h" /* for OFFSET_ppc64_CIA */
+
+
+/* References to globals via the TOC */
+
+/*
+ .globl vgPlain_tt_fast
+ .lcomm vgPlain_tt_fast,4,4
+ .type vgPlain_tt_fast, @object
+*/
+ .section ".toc","aw"
+.tocent__vgPlain_tt_fast:
+ .tc vgPlain_tt_fast[TC],vgPlain_tt_fast
+.tocent__vgPlain_tt_fastN:
+ .tc vgPlain_tt_fastN[TC],vgPlain_tt_fastN
+.tocent__vgPlain_dispatch_ctr:
+ .tc vgPlain_dispatch_ctr[TC],vgPlain_dispatch_ctr
+.tocent__vgPlain_machine_ppc64_has_VMX:
+ .tc vgPlain_machine_ppc64_has_VMX[TC],vgPlain_machine_ppc64_has_VMX
+
+/*------------------------------------------------------------*/
+/*--- ---*/
+/*--- The dispatch loop. VG_(run_innerloop) is used to ---*/
+/*--- run all translations except no-redir ones. ---*/
+/*--- ---*/
+/*------------------------------------------------------------*/
+
+/*----------------------------------------------------*/
+/*--- Preamble (set everything up) ---*/
+/*----------------------------------------------------*/
+
+/* signature:
+UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling );
+*/
+
+.section ".text"
+.align 2
+.globl VG_(run_innerloop)
+.section ".opd","aw"
+.align 3
+VG_(run_innerloop):
+.quad .VG_(run_innerloop),.TOC.@tocbase,0
+.previous
+.type .VG_(run_innerloop),@function
+.globl .VG_(run_innerloop)
+.VG_(run_innerloop):
+ /* r3 holds guest_state */
+ /* r4 holds do_profiling */
+
+ /* ----- entry point to VG_(run_innerloop) ----- */
+ /* PPC64 ABI saves LR->16(prt_sp), CR->8(prt_sp)) */
+
+ /* Save lr, cr */
+ mflr 0
+ std 0,16(1)
+ mfcr 0
+ std 0,8(1)
+
+ /* New stack frame */
+ stdu 1,-624(1) /* sp should maintain 16-byte alignment */
+
+ /* Save callee-saved registers... */
+
+ /* Floating-point reg save area : 144 bytes */
+ stfd 31,616(1)
+ stfd 30,608(1)
+ stfd 29,600(1)
+ stfd 28,592(1)
+ stfd 27,584(1)
+ stfd 26,576(1)
+ stfd 25,568(1)
+ stfd 24,560(1)
+ stfd 23,552(1)
+ stfd 22,544(1)
+ stfd 21,536(1)
+ stfd 20,528(1)
+ stfd 19,520(1)
+ stfd 18,512(1)
+ stfd 17,504(1)
+ stfd 16,496(1)
+ stfd 15,488(1)
+ stfd 14,480(1)
+
+ /* General reg save area : 144 bytes */
+ std 31,472(1)
+ std 30,464(1)
+ std 29,456(1)
+ std 28,448(1)
+ std 27,440(1)
+ std 26,432(1)
+ std 25,424(1)
+ std 24,416(1)
+ std 23,408(1)
+ std 22,400(1)
+ std 21,392(1)
+ std 20,384(1)
+ std 19,376(1)
+ std 18,368(1)
+ std 17,360(1)
+ std 16,352(1)
+ std 15,344(1)
+ std 14,336(1)
+ /* Probably not necessary to save r13 (thread-specific ptr),
+ as VEX stays clear of it... but what the hey. */
+ std 13,328(1)
+
+ /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI.
+ The Linux kernel might not actually use VRSAVE for its intended
+ purpose, but it should be harmless to preserve anyway. */
+ /* r3, r4 are live here, so use r5 */
+ ld 5,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
+ ld 5,0(5)
+ cmpldi 5,0
+ beq .LafterVMX1
+
+ /* VRSAVE save word : 32 bytes */
+ mfspr 5,256 /* vrsave reg is spr number 256 */
+ stw 5,324(1)
+
+ /* Alignment padding : 4 bytes */
+
+ /* Vector reg save area (quadword aligned) : 192 bytes */
+ li 5,304
+ stvx 31,5,1
+ li 5,288
+ stvx 30,5,1
+ li 5,272
+ stvx 29,5,1
+ li 5,256
+ stvx 28,5,1
+ li 5,240
+ stvx 27,5,1
+ li 5,224
+ stvx 26,5,1
+ li 5,208
+ stvx 25,5,1
+ li 5,192
+ stvx 24,5,1
+ li 5,176
+ stvx 23,5,1
+ li 5,160
+ stvx 22,5,1
+ li 5,144
+ stvx 21,5,1
+ li 5,128
+ stvx 20,5,1
+.LafterVMX1:
+
+ /* Local variable space... */
+
+ /* r3 holds guest_state */
+ /* r4 holds do_profiling */
+ mr 31,3
+ std 3,104(1) /* spill orig guest_state ptr */
+
+ /* 96(sp) used later to check FPSCR[RM] */
+ /* 88(sp) used later to load fpscr with zero */
+ /* 48:87(sp) free */
+
+ /* Linkage Area (reserved)
+ 40(sp) : TOC
+ 32(sp) : link editor doubleword
+ 24(sp) : compiler doubleword
+ 16(sp) : LR
+ 8(sp) : CR
+ 0(sp) : back-chain
+ */
+
+// CAB TODO: Use a caller-saved reg for orig guest_state ptr
+// - rem to set non-allocateable in isel.c
+
+ /* hold dispatch_ctr (=32bit value) in r29 */
+ ld 29,.tocent__vgPlain_dispatch_ctr@toc(2)
+ lwz 29,0(29) /* 32-bit zero-extending load */
+
+ /* set host FPU control word to the default mode expected
+ by VEX-generated code. See comments in libvex.h for
+ more info. */
+ /* => get zero into f3 (tedious)
+ fsub 3,3,3 is not a reliable way to do this, since if
+ f3 holds a NaN or similar then we don't necessarily
+ wind up with zero. */
+ li 5,0
+ stw 5,88(1)
+ lfs 3,88(1)
+ mtfsf 0xFF,3 /* fpscr = lo32 of f3 */
+
+ /* set host AltiVec control word to the default mode expected
+ by VEX-generated code. */
+ ld 5,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
+ ld 5,0(5)
+ cmpldi 5,0
+ beq .LafterVMX2
+
+ vspltisw 3,0x0 /* generate zero */
+ mtvscr 3
+.LafterVMX2:
+
+ /* make a stack frame for the code we are calling */
+ stdu 1,-48(1)
+
+ /* fetch %CIA into r3 */
+ ld 3,OFFSET_ppc64_CIA(31)
+
+ /* fall into main loop (the right one) */
+ /* r4 = do_profiling. It's probably trashed after here,
+ but that's OK: we don't need it after here. */
+ cmplwi 4,0
+ beq .VG_(run_innerloop__dispatch_unprofiled)
+ b .VG_(run_innerloop__dispatch_profiled)
+ /*NOTREACHED*/
+
+
+/*----------------------------------------------------*/
+/*--- NO-PROFILING (standard) dispatcher ---*/
+/*----------------------------------------------------*/
+
+ .section ".text"
+ .align 2
+ .globl VG_(run_innerloop__dispatch_unprofiled)
+ .section ".opd","aw"
+ .align 3
+VG_(run_innerloop__dispatch_unprofiled):
+ .quad .VG_(run_innerloop__dispatch_unprofiled),.TOC.@tocbase,0
+ .previous
+ .type .VG_(run_innerloop__dispatch_unprofiled),@function
+ .globl .VG_(run_innerloop__dispatch_unprofiled)
+.VG_(run_innerloop__dispatch_unprofiled):
+ /* At entry: Live regs:
+ r1 (=sp)
+ r2 (toc pointer)
+ r3 (=CIA = next guest address)
+ r29 (=dispatch_ctr)
+ r31 (=guest_state)
+ Stack state:
+ 152(r1) (=orig guest_state)
+ 144(r1) (=var space for FPSCR[RM])
+ */
+ /* Has the guest state pointer been messed with? If yes, exit.
+ Also set up & VG_(tt_fast) early in an attempt at better
+ scheduling. */
+ ld 9,152(1) /* original guest_state ptr */
+ ld 5, .tocent__vgPlain_tt_fast@toc(2) /* &VG_(tt_fast) */
+ cmpd 9,31
+ bne .gsp_changed
+
+ /* save the jump address in the guest state */
+ std 3,OFFSET_ppc64_CIA(31)
+
+ /* Are we out of timeslice? If yes, defer to scheduler. */
+ subi 29,29,1
+ cmpldi 29,0
+ beq .counter_is_zero
+
+ /* try a fast lookup in the translation cache */
+ /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry)
+ = ((r3 >>u 2) & VG_TT_FAST_MASK) << 4 */
+ rldicl 4,3, 62, 64-VG_TT_FAST_BITS /* entry# */
+ sldi 4,4,4 /* entry# * sizeof(FastCacheEntry) */
+ add 5,5,4 /* & VG_(tt_fast)[entry#] */
+ ld 6,0(5) /* .guest */
+ ld 7,8(5) /* .host */
+ cmpd 3,6
+ bne .fast_lookup_failed
+
+ /* Found a match. Call .host. */
+ mtctr 7
+ bctrl
+
+ /* On return from guest code:
+ r3 holds destination (original) address.
+ r31 may be unchanged (guest_state), or may indicate further
+ details of the control transfer requested to *r3.
+ */
+ /* start over */
+ b .VG_(run_innerloop__dispatch_unprofiled)
+ /*NOTREACHED*/
+ .size VG_(run_innerloop), .-VG_(run_innerloop)
+
+
+/*----------------------------------------------------*/
+/*--- PROFILING dispatcher (can be much slower) ---*/
+/*----------------------------------------------------*/
+
+ .section ".text"
+ .align 2
+ .globl VG_(run_innerloop__dispatch_profiled)
+ .section ".opd","aw"
+ .align 3
+VG_(run_innerloop__dispatch_profiled):
+ .quad .VG_(run_innerloop__dispatch_profiled),.TOC.@tocbase,0
+ .previous
+ .type .VG_(run_innerloop__dispatch_profiled),@function
+ .globl .VG_(run_innerloop__dispatch_profiled)
+.VG_(run_innerloop__dispatch_profiled):
+ /* At entry: Live regs:
+ r1 (=sp)
+ r2 (toc pointer)
+ r3 (=CIA = next guest address)
+ r29 (=dispatch_ctr)
+ r31 (=guest_state)
+ Stack state:
+ 152(r1) (=orig guest_state)
+ 144(r1) (=var space for FPSCR[RM])
+ */
+ /* Has the guest state pointer been messed with? If yes, exit.
+ Also set up & VG_(tt_fast) early in an attempt at better
+ scheduling. */
+ ld 9,152(1) /* original guest_state ptr */
+ ld 5, .tocent__vgPlain_tt_fast@toc(2) /* &VG_(tt_fast) */
+ cmpd 9,31
+ bne .gsp_changed
+
+ /* save the jump address in the guest state */
+ std 3,OFFSET_ppc64_CIA(31)
+
+ /* Are we out of timeslice? If yes, defer to scheduler. */
+ subi 29,29,1
+ cmpldi 29,0
+ beq .counter_is_zero
+
+ /* try a fast lookup in the translation cache */
+ /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry)
+ = ((r3 >>u 2) & VG_TT_FAST_MASK) << 4 */
+ rldicl 4,3, 62, 64-VG_TT_FAST_BITS /* entry# */
+ sldi 4,4,4 /* entry# * sizeof(FastCacheEntry) */
+ add 5,5,4 /* & VG_(tt_fast)[entry#] */
+ ld 6,0(5) /* .guest */
+ ld 7,8(5) /* .host */
+ cmpd 3,6
+ bne .fast_lookup_failed
+
+ /* increment bb profile counter VG_(tt_fastN)[x] (=32bit val) */
+ ld 9, .tocent__vgPlain_tt_fastN@toc(2)
+ srdi 4, 4,1 /* entry# * sizeof(UInt*) */
+ ldx 9, 9,4 /* r7 = VG_(tt_fastN)[VG_TT_HASH(addr)] */
+ lwz 6, 0(9) /* *(UInt*)r7 ++ */
+ addi 6, 6,1
+ stw 6, 0(9)
+
+ /* Found a match. Call .host. */
+ mtctr 7
+ bctrl
+
+ /* On return from guest code:
+ r3 holds destination (original) address.
+ r31 may be unchanged (guest_state), or may indicate further
+ details of the control transfer requested to *r3.
+ */
+ /* start over */
+ b .VG_(run_innerloop__dispatch_profiled)
+ /*NOTREACHED*/
+ .size VG_(run_a_noredir_translation), .-VG_(run_a_noredir_translation)
+
+
+/*----------------------------------------------------*/
+/*--- exit points ---*/
+/*----------------------------------------------------*/
+
+.gsp_changed:
+ /* Someone messed with the gsp (in r31). Have to
+ defer to scheduler to resolve this. dispatch ctr
+ is not yet decremented, so no need to increment. */
+ /* %CIA is NOT up to date here. First, need to write
+ %r3 back to %CIA, but without trashing %r31 since
+ that holds the value we want to return to the scheduler.
+ Hence use %r5 transiently for the guest state pointer. */
+ ld 5,152(1) /* original guest_state ptr */
+ std 3,OFFSET_ppc64_CIA(5)
+ mr 3,31 /* r3 = new gsp value */
+ b .run_innerloop_exit
+ /*NOTREACHED*/
+
+.counter_is_zero:
+ /* %CIA is up to date */
+ /* back out decrement of the dispatch counter */
+ addi 29,29,1
+ li 3,VG_TRC_INNER_COUNTERZERO
+ b .run_innerloop_exit
+
+.fast_lookup_failed:
+ /* %CIA is up to date */
+ /* back out decrement of the dispatch counter */
+ addi 29,29,1
+ li 3,VG_TRC_INNER_FASTMISS
+ b .run_innerloop_exit
+
+
+
+/* All exits from the dispatcher go through here.
+ r3 holds the return value.
+*/
+.run_innerloop_exit:
+ /* We're leaving. Check that nobody messed with
+ VSCR or FPSCR. */
+
+ /* Set fpscr back to a known state, since vex-generated code
+ may have messed with fpscr[rm]. */
+ li 5,0
+ addi 1,1,-16
+ stw 5,0(1)
+ lfs 3,0(1)
+ addi 1,1,16
+ mtfsf 0xFF,3 /* fpscr = f3 */
+
+ /* Using r11 - value used again further on, so don't trash! */
+ ld 11,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
+ ld 11,0(11)
+ cmpldi 11,0
+ beq .LafterVMX8
+
+ /* Check VSCR[NJ] == 1 */
+ /* first generate 4x 0x00010000 */
+ vspltisw 4,0x1 /* 4x 0x00000001 */
+ vspltisw 5,0x0 /* zero */
+ vsldoi 6,4,5,0x2 /* <<2*8 => 4x 0x00010000 */
+ /* retrieve VSCR and mask wanted bits */
+ mfvscr 7
+ vand 7,7,6 /* gives NJ flag */
+ vspltw 7,7,0x3 /* flags-word to all lanes */
+ vcmpequw. 8,6,7 /* CR[24] = 1 if v6 == v7 */
+ bt 24,.invariant_violation /* branch if all_equal */
+.LafterVMX8:
+
+ /* otherwise we're OK */
+ b .run_innerloop_exit_REALLY
+
+
+.invariant_violation:
+ li 3,VG_TRC_INVARIANT_FAILED
+ b .run_innerloop_exit_REALLY
+
+.run_innerloop_exit_REALLY:
+ /* r3 holds VG_TRC_* value to return */
+
+ /* Return to parent stack */
+ addi 1,1,48
+
+ /* Write ctr to VG_(dispatch_ctr) (=32bit value) */
+ ld 5,.tocent__vgPlain_dispatch_ctr@toc(2)
+ stw 29,0(5)
+
+ /* Restore cr */
+ lwz 0,44(1)
+ mtcr 0
+
+ /* Restore callee-saved registers... */
+
+ /* Floating-point regs */
+ lfd 31,616(1)
+ lfd 30,608(1)
+ lfd 29,600(1)
+ lfd 28,592(1)
+ lfd 27,584(1)
+ lfd 26,576(1)
+ lfd 25,568(1)
+ lfd 24,560(1)
+ lfd 23,552(1)
+ lfd 22,544(1)
+ lfd 21,536(1)
+ lfd 20,528(1)
+ lfd 19,520(1)
+ lfd 18,512(1)
+ lfd 17,504(1)
+ lfd 16,496(1)
+ lfd 15,488(1)
+ lfd 14,480(1)
+
+ /* General regs */
+ ld 31,472(1)
+ ld 30,464(1)
+ ld 29,456(1)
+ ld 28,448(1)
+ ld 27,440(1)
+ ld 26,432(1)
+ ld 25,424(1)
+ ld 24,416(1)
+ ld 23,408(1)
+ ld 22,400(1)
+ ld 21,392(1)
+ ld 20,384(1)
+ ld 19,376(1)
+ ld 18,368(1)
+ ld 17,360(1)
+ ld 16,352(1)
+ ld 15,344(1)
+ ld 14,336(1)
+ ld 13,328(1)
+
+ /* r11 already holds VG_(machine_ppc64_has_VMX) value */
+ cmpldi 11,0
+ beq .LafterVMX9
+
+ /* VRSAVE */
+ lwz 4,324(1)
+ mfspr 4,256 /* VRSAVE reg is spr number 256 */
+
+ /* Vector regs */
+ li 4,304
+ lvx 31,4,1
+ li 4,288
+ lvx 30,4,1
+ li 4,272
+ lvx 29,4,1
+ li 4,256
+ lvx 28,4,1
+ li 4,240
+ lvx 27,4,1
+ li 4,224
+ lvx 26,4,1
+ li 4,208
+ lvx 25,4,1
+ li 4,192
+ lvx 24,4,1
+ li 4,176
+ lvx 23,4,1
+ li 4,160
+ lvx 22,4,1
+ li 4,144
+ lvx 21,4,1
+ li 4,128
+ lvx 20,4,1
+.LafterVMX9:
+
+ /* reset cr, lr, sp */
+ ld 0,632(1) /* stack_size + 8 */
+ mtcr 0
+ ld 0,640(1) /* stack_size + 16 */
+ mtlr 0
+ addi 1,1,624 /* stack_size */
+ blr
+
+
+/*------------------------------------------------------------*/
+/*--- ---*/
+/*--- A special dispatcher, for running no-redir ---*/
+/*--- translations. Just runs the given translation once. ---*/
+/*--- ---*/
+/*------------------------------------------------------------*/
+
+/* signature:
+void VG_(run_a_noredir_translation) ( UWord* argblock );
+*/
+
+/* Run a no-redir translation. argblock points to 4 UWords, 2 to carry args
+ and 2 to carry results:
+ 0: input: ptr to translation
+ 1: input: ptr to guest state
+ 2: output: next guest PC
+ 3: output: guest state pointer afterwards (== thread return code)
+*/
+.section ".text"
+.align 2
+.globl VG_(run_a_noredir_translation)
+.section ".opd","aw"
+.align 3
+VG_(run_a_noredir_translation):
+.quad .VG_(run_a_noredir_translation),.TOC.@tocbase,0
+.previous
+.type .VG_(run_a_noredir_translation),@function
+.globl .VG_(run_a_noredir_translation)
+.VG_(run_a_noredir_translation):
+ /* save callee-save int regs, & lr */
+ stdu 1,-512(1)
+ std 14,256(1)
+ std 15,264(1)
+ std 16,272(1)
+ std 17,280(1)
+ std 18,288(1)
+ std 19,296(1)
+ std 20,304(1)
+ std 21,312(1)
+ std 22,320(1)
+ std 23,328(1)
+ std 24,336(1)
+ std 25,344(1)
+ std 26,352(1)
+ std 27,360(1)
+ std 28,368(1)
+ std 29,376(1)
+ std 30,384(1)
+ std 31,392(1)
+ mflr 31
+ std 31,400(1)
+ std 2,408(1) /* also preserve R2, just in case .. */
+
+ std 3,416(1)
+ ld 31,8(3)
+ ld 30,0(3)
+ mtlr 30
+ blrl
+
+ ld 4,416(1)
+ std 3, 16(4)
+ std 31,24(4)
+
+ ld 14,256(1)
+ ld 15,264(1)
+ ld 16,272(1)
+ ld 17,280(1)
+ ld 18,288(1)
+ ld 19,296(1)
+ ld 20,304(1)
+ ld 21,312(1)
+ ld 22,320(1)
+ ld 23,328(1)
+ ld 24,336(1)
+ ld 25,344(1)
+ ld 26,352(1)
+ ld 27,360(1)
+ ld 28,368(1)
+ ld 29,376(1)
+ ld 30,384(1)
+ ld 31,400(1)
+ mtlr 31
+ ld 31,392(1)
+ ld 2,408(1) /* also preserve R2, just in case .. */
+
+ addi 1,1,512
+ blr
+
+
+/* Let the linker know we don't need an executable stack */
+.section .note.GNU-stack,"",@progbits
+
+/*--------------------------------------------------------------------*/
+/*--- end ---*/
+/*--------------------------------------------------------------------*/