summaryrefslogtreecommitdiff
path: root/cachegrind
diff options
context:
space:
mode:
authorsewardj <sewardj@a5019735-40e9-0310-863c-91ae7b9d1cf9>2005-10-12 10:09:23 +0000
committersewardj <sewardj@a5019735-40e9-0310-863c-91ae7b9d1cf9>2005-10-12 10:09:23 +0000
commit5155dec2ac1d0855fcdc5060503e1d98a281b26a (patch)
tree9ac3f6390ae6c3458ced4e6b8a42aae4c6d59e24 /cachegrind
parent0b6941ee8667b5c92b0c553be14d3a1d8183c70d (diff)
Redo the way cachegrind generates instrumentation code, so that it can
deal with any IR that happens to show up. This makes it work on ppc32 and should fix occasionally-reported bugs on x86/amd64 where it bombs due to having to deal with multiple date references in a single instruction. The new scheme is based around the idea of a queue of memory events which are outstanding, in the sense that no IR has yet been generated to do the relevant helper calls. The presence of the queue -- currently 16 entries deep -- gives cachegrind more scope for combining multiple memory references into a single helper function call. As a result it runs 3%-5% faster than the previous version, on x86. This commit also changes the type of the tool interface function 'tool_discard_basic_block_info' and clarifies its meaning. See comments in include/pub_tool_tooliface.h. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@4903 a5019735-40e9-0310-863c-91ae7b9d1cf9
Diffstat (limited to 'cachegrind')
-rw-r--r--cachegrind/cg_main.c917
1 files changed, 601 insertions, 316 deletions
diff --git a/cachegrind/cg_main.c b/cachegrind/cg_main.c
index d98c7e3e..f36d3743 100644
--- a/cachegrind/cg_main.c
+++ b/cachegrind/cg_main.c
@@ -51,6 +51,9 @@
/*--- Constants ---*/
/*------------------------------------------------------------*/
+/* Set to 1 for very verbose debugging */
+#define DEBUG_CG 0
+
#define MIN_LINE_SIZE 16
#define FILE_LEN 256
#define FN_LEN 256
@@ -131,7 +134,6 @@ typedef struct _instr_info instr_info;
struct _instr_info {
Addr instr_addr;
UChar instr_len;
- UChar data_size;
lineCC* parent; // parent line-CC
};
@@ -292,8 +294,8 @@ static lineCC* get_lineCC(Addr origAddr)
static VG_REGPARM(1)
void log_1I_0D_cache_access(instr_info* n)
{
- //VG_(printf)("1I_0D : CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
- // n, n->instr_addr, n->instr_len);
+ //VG_(printf)("1I_0D : CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n",
+ // n, n->instr_addr, n->instr_len);
VGP_PUSHCC(VgpCacheSimulate);
cachesim_I1_doref(n->instr_addr, n->instr_len,
&n->parent->Ir.m1, &n->parent->Ir.m2);
@@ -302,62 +304,214 @@ void log_1I_0D_cache_access(instr_info* n)
}
static VG_REGPARM(2)
-void log_1I_1Dr_cache_access(instr_info* n, Addr data_addr)
+void log_2I_0D_cache_access(instr_info* n, instr_info* n2)
+{
+ //VG_(printf)("2I_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
+ // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n",
+ // n, n->instr_addr, n->instr_len,
+ // n2, n2->instr_addr, n2->instr_len);
+ VGP_PUSHCC(VgpCacheSimulate);
+ cachesim_I1_doref(n->instr_addr, n->instr_len,
+ &n->parent->Ir.m1, &n->parent->Ir.m2);
+ n->parent->Ir.a++;
+ cachesim_I1_doref(n2->instr_addr, n2->instr_len,
+ &n2->parent->Ir.m1, &n2->parent->Ir.m2);
+ n2->parent->Ir.a++;
+ VGP_POPCC(VgpCacheSimulate);
+}
+
+static VG_REGPARM(3)
+void log_3I_0D_cache_access(instr_info* n, instr_info* n2, instr_info* n3)
{
- //VG_(printf)("1I_1Dr: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
- // n, n->instr_addr, n->instr_len, data_addr, n->data_size);
+ //VG_(printf)("3I_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
+ // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n"
+ // " CC3addr=0x%010lx, i3addr=0x%010lx, i3size=%lu\n",
+ // n, n->instr_addr, n->instr_len,
+ // n2, n2->instr_addr, n2->instr_len,
+ // n3, n3->instr_addr, n3->instr_len);
VGP_PUSHCC(VgpCacheSimulate);
cachesim_I1_doref(n->instr_addr, n->instr_len,
&n->parent->Ir.m1, &n->parent->Ir.m2);
n->parent->Ir.a++;
+ cachesim_I1_doref(n2->instr_addr, n2->instr_len,
+ &n2->parent->Ir.m1, &n2->parent->Ir.m2);
+ n2->parent->Ir.a++;
+ cachesim_I1_doref(n3->instr_addr, n3->instr_len,
+ &n3->parent->Ir.m1, &n3->parent->Ir.m2);
+ n3->parent->Ir.a++;
+ VGP_POPCC(VgpCacheSimulate);
+}
- cachesim_D1_doref(data_addr, n->data_size,
+static VG_REGPARM(3)
+void log_1I_1Dr_cache_access(instr_info* n, Addr data_addr, Word data_size)
+{
+ //VG_(printf)("1I_1Dr: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n"
+ // " daddr=0x%010lx, dsize=%lu\n",
+ // n, n->instr_addr, n->instr_len, data_addr, data_size);
+ VGP_PUSHCC(VgpCacheSimulate);
+ cachesim_I1_doref(n->instr_addr, n->instr_len,
+ &n->parent->Ir.m1, &n->parent->Ir.m2);
+ n->parent->Ir.a++;
+
+ cachesim_D1_doref(data_addr, data_size,
&n->parent->Dr.m1, &n->parent->Dr.m2);
n->parent->Dr.a++;
VGP_POPCC(VgpCacheSimulate);
}
-static VG_REGPARM(2)
-void log_1I_1Dw_cache_access(instr_info* n, Addr data_addr)
+static VG_REGPARM(3)
+void log_1I_1Dw_cache_access(instr_info* n, Addr data_addr, Word data_size)
{
- //VG_(printf)("1I_1Dw: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
- // n, n->instr_addr, n->instr_len, data_addr, n->data_size);
+ //VG_(printf)("1I_1Dw: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n"
+ // " daddr=0x%010lx, dsize=%lu\n",
+ // n, n->instr_addr, n->instr_len, data_addr, data_size);
VGP_PUSHCC(VgpCacheSimulate);
cachesim_I1_doref(n->instr_addr, n->instr_len,
&n->parent->Ir.m1, &n->parent->Ir.m2);
n->parent->Ir.a++;
- cachesim_D1_doref(data_addr, n->data_size,
+ cachesim_D1_doref(data_addr, data_size,
&n->parent->Dw.m1, &n->parent->Dw.m2);
n->parent->Dw.a++;
VGP_POPCC(VgpCacheSimulate);
}
static VG_REGPARM(3)
-void log_1I_2D_cache_access(instr_info* n, Addr data_addr1, Addr data_addr2)
+void log_0I_1Dr_cache_access(instr_info* n, Addr data_addr, Word data_size)
{
- //VG_(printf)("1I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=%p, daddr2=%p, dsize=%u\n",
- // n, n->instr_addr, n->instr_len, data_addr1, data_addr2, n->data_size);
+ //VG_(printf)("0I_1Dr: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n",
+ // n, data_addr, data_size);
VGP_PUSHCC(VgpCacheSimulate);
- cachesim_I1_doref(n->instr_addr, n->instr_len,
- &n->parent->Ir.m1, &n->parent->Ir.m2);
- n->parent->Ir.a++;
-
- cachesim_D1_doref(data_addr1, n->data_size,
+ cachesim_D1_doref(data_addr, data_size,
&n->parent->Dr.m1, &n->parent->Dr.m2);
n->parent->Dr.a++;
- cachesim_D1_doref(data_addr2, n->data_size,
+ VGP_POPCC(VgpCacheSimulate);
+}
+
+static VG_REGPARM(3)
+void log_0I_1Dw_cache_access(instr_info* n, Addr data_addr, Word data_size)
+{
+ //VG_(printf)("0I_1Dw: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n",
+ // n, data_addr, data_size);
+ VGP_PUSHCC(VgpCacheSimulate);
+ cachesim_D1_doref(data_addr, data_size,
&n->parent->Dw.m1, &n->parent->Dw.m2);
n->parent->Dw.a++;
VGP_POPCC(VgpCacheSimulate);
}
/*------------------------------------------------------------*/
-/*--- Instrumentation ---*/
+/*--- Instrumentation types and structures ---*/
+/*------------------------------------------------------------*/
+
+/* Maintain an ordered list of memory events which are outstanding, in
+ the sense that no IR has yet been generated to do the relevant
+ helper calls. The BB is scanned top to bottom and memory events
+ are added to the end of the list, merging with the most recent
+ notified event where possible (Dw immediately following Dr and
+ having the same size and EA can be merged).
+
+ This merging is done so that for architectures which have
+ load-op-store instructions (x86, amd64), the insn is treated as if
+ it makes just one memory reference (a modify), rather than two (a
+ read followed by a write at the same address).
+
+ At various points the list will need to be flushed, that is, IR
+ generated from it. That must happen before any possible exit from
+ the block (the end, or an IRStmt_Exit). Flushing also takes place
+ when there is no space to add a new event.
+
+ If we require the simulation statistics to be up to date with
+ respect to possible memory exceptions, then the list would have to
+ be flushed before each memory reference. That would however lose
+ performance by inhibiting event-merging during flushing.
+
+ Flushing the list consists of walking it start to end and emitting
+ instrumentation IR for each event, in the order in which they
+ appear. It may be possible to emit a single call for two adjacent
+ events in order to reduce the number of helper function calls made.
+ For example, it could well be profitable to handle two adjacent Ir
+ events with a single helper call. */
+
+typedef
+ IRExpr
+ IRAtom;
+
+typedef
+ enum { Event_Ir=0, Event_Dr=1, Event_Dw=2, Event_Dm=3 }
+ EventKind;
+
+typedef
+ struct {
+ EventKind ekind;
+ Int size; /* ALL */
+ Addr64 iaddr; /* ALL. For Dr/Dw/Dm is & of parent insn. */
+ IRAtom* dataEA; /* Dr/Dw/Dm only */ /* IR ATOM ONLY */
+ }
+ Event;
+
+/* Up to this many unnotified events are allowed. Number is
+ arbitrary. Larger numbers allow more event merging to occur, but
+ potentially induce more spilling due to extending live ranges of
+ address temporaries. */
+#define N_EVENTS 16
+
+
+/* A struct which holds all the running state during instrumentation.
+ Mostly to avoid passing loads of parameters everywhere. */
+typedef
+ struct {
+ /* The current outstanding-memory-event list. */
+ Event events[N_EVENTS];
+ Int events_used;
+
+ /* The array of instr_info bins for the BB. */
+ BB_info* bbInfo;
+
+ /* Number instr_info bins 'used' so far. */
+ Int bbInfo_i;
+
+ /* Not sure what this is for (jrs 20051009) */
+ Bool bbSeenBefore;
+
+ /* The output BB being constructed. */
+ IRBB* bbOut;
+ }
+ CgState;
+
+
+static Int index3 ( EventKind k1, EventKind k2, EventKind k3 )
+{
+ Int i1 = k1;
+ Int i2 = k2;
+ Int i3 = k3;
+ Int r;
+ tl_assert(i1 >= 0 && i1 < 4);
+ tl_assert(i2 >= 0 && i2 < 4);
+ tl_assert(i3 >= 0 && i3 < 4);
+ r = 16*i1 + 4*i2 + i3;
+ tl_assert(r >= 0 && r < 64);
+ return r;
+}
+
+static void show3 ( Int idx )
+{
+ HChar* names = "IRWM";
+ Int i1 = (idx >> 4) & 3;
+ Int i2 = (idx >> 2) & 3;
+ Int i3 = idx & 3;
+ VG_(printf)("%c%c%c", names[i1], names[i2], names[i3]);
+}
+
+static Int trigrams[64];
+
+
+/*------------------------------------------------------------*/
+/*--- Instrumentation main ---*/
/*------------------------------------------------------------*/
static
-BB_info* get_BB_info(IRBB* bbIn, Addr origAddr, Bool* bbSeenBefore)
+BB_info* get_BB_info(IRBB* bbIn, Addr origAddr, /*OUT*/Bool* bbSeenBefore)
{
Int i, n_instrs;
IRStmt* st;
@@ -389,143 +543,14 @@ BB_info* get_BB_info(IRBB* bbIn, Addr origAddr, Bool* bbSeenBefore)
return bbInfo;
}
-static
-Bool handleOneStatement(IRTypeEnv* tyenv, IRBB* bbOut, IRStmt* st, IRStmt* st2,
- Addr* instrAddr, UInt* instrLen,
- IRExpr** loadAddrExpr, IRExpr** storeAddrExpr,
- UInt* dataSize)
-{
- tl_assert(isFlatIRStmt(st));
-
- switch (st->tag) {
- case Ist_NoOp:
- case Ist_AbiHint:
- case Ist_Put:
- case Ist_PutI:
- case Ist_MFence:
- break;
-
- case Ist_Exit: {
- // This is a conditional jump. Most of the time, we want to add the
- // instrumentation before it, to ensure it gets executed. Eg, (1) if
- // this conditional jump is just before an IMark:
- //
- // t108 = Not1(t107)
- // [add instrumentation here]
- // if (t108) goto {Boring} 0x3A96637D:I32
- // ------ IMark(0x3A966370, 7) ------
- //
- // or (2) if this conditional jump is the last thing before the
- // block-ending unconditional jump:
- //
- // t111 = Not1(t110)
- // [add instrumentation here]
- // if (t111) goto {Boring} 0x3A96637D:I32
- // goto {Boring} 0x3A966370:I32
- //
- // One case (3) where we want the instrumentation after the conditional
- // jump is when the conditional jump is for an x86 REP instruction:
- //
- // ------ IMark(0x3A967F13, 2) ------
- // t1 = GET:I32(4)
- // t6 = CmpEQ32(t1,0x0:I32)
- // if (t6) goto {Boring} 0x3A967F15:I32 # ignore this cond jmp
- // t7 = Sub32(t1,0x1:I32)
- // PUT(4) = t7
- // ...
- // t56 = Not1(t55)
- // [add instrumentation here]
- // if (t56) goto {Boring} 0x3A967F15:I32
- //
- // Therefore, we return true if the next statement is an IMark, or if
- // there is no next statement (which matches case (2), as the final
- // unconditional jump is not represented in the IRStmt list).
- //
- // Note that this approach won't do in the long run for supporting
- // PPC, but it's good enough for x86/AMD64 for the 3.0.X series.
- if (NULL == st2 || Ist_IMark == st2->tag)
- return True;
- else
- return False;
- }
-
- case Ist_IMark:
- /* st->Ist.IMark.addr is a 64-bit int. ULong_to_Ptr casts this
- to the host's native pointer type; if that is 32 bits then it
- discards the upper 32 bits. If we are cachegrinding on a
- 32-bit host then we are also ensured that the guest word size
- is 32 bits, due to the assertion in cg_instrument that the
- host and guest word sizes must be the same. Hence
- st->Ist.IMark.addr will have been derived from a 32-bit guest
- code address and truncation of it is safe. I believe this
- assignment should be correct for both 32- and 64-bit
- machines. */
- *instrAddr = (Addr)ULong_to_Ptr(st->Ist.IMark.addr);
- *instrLen = st->Ist.IMark.len;
- break;
-
- case Ist_Tmp: {
- IRExpr* data = st->Ist.Tmp.data;
- if (data->tag == Iex_Load) {
- IRExpr* aexpr = data->Iex.Load.addr;
- tl_assert( isIRAtom(aexpr) );
- // Note also, endianness info is ignored. I guess that's not
- // interesting.
- // XXX: repe cmpsb does two loads... the first one is ignored here!
- //tl_assert( NULL == *loadAddrExpr ); // XXX: ???
- *loadAddrExpr = aexpr;
- *dataSize = sizeofIRType(data->Iex.Load.ty);
- }
- break;
- }
-
- case Ist_Store: {
- IRExpr* data = st->Ist.Store.data;
- IRExpr* aexpr = st->Ist.Store.addr;
- tl_assert( isIRAtom(aexpr) );
- tl_assert( NULL == *storeAddrExpr ); // XXX: ???
- *storeAddrExpr = aexpr;
- *dataSize = sizeofIRType(typeOfIRExpr(tyenv, data));
- break;
- }
-
- case Ist_Dirty: {
- IRDirty* d = st->Ist.Dirty.details;
- if (d->mFx != Ifx_None) {
- /* This dirty helper accesses memory. Collect the
- details. */
- tl_assert(d->mAddr != NULL);
- tl_assert(d->mSize != 0);
- *dataSize = d->mSize;
- if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
- *loadAddrExpr = d->mAddr;
- if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
- *storeAddrExpr = d->mAddr;
- } else {
- tl_assert(d->mAddr == NULL);
- tl_assert(d->mSize == 0);
- }
- break;
- }
-
- default:
- VG_(printf)("\n");
- ppIRStmt(st);
- VG_(printf)("\n");
- VG_(tool_panic)("Cachegrind: unhandled IRStmt");
- }
-
- return False;
-}
static
-void do_details( instr_info* n, Bool bbSeenBefore,
- Addr instr_addr, Int instr_len, Int data_size )
+void init_instr_info( instr_info* n, Bool bbSeenBefore,
+ Addr instr_addr, Int instr_len )
{
if (bbSeenBefore) {
tl_assert( n->instr_addr == instr_addr );
tl_assert( n->instr_len == instr_len );
- tl_assert( n->data_size == data_size );
// Don't check that (n->parent == parent)... it's conceivable that
// the debug info might change; the other asserts should be enough to
// detect anything strange.
@@ -533,126 +558,318 @@ void do_details( instr_info* n, Bool bbSeenBefore,
lineCC* parent = get_lineCC(instr_addr);
n->instr_addr = instr_addr;
n->instr_len = instr_len;
- n->data_size = data_size;
n->parent = parent;
}
}
-static Bool loadStoreAddrsMatch(IRExpr* loadAddrExpr, IRExpr* storeAddrExpr)
+static void showEvent ( Event* ev )
{
- // I'm assuming that for 'modify' instructions, that Vex always makes
- // the loadAddrExpr and storeAddrExpr be of the same type, ie. both Tmp
- // expressions, or both Const expressions.
- tl_assert(isIRAtom(loadAddrExpr));
- tl_assert(isIRAtom(storeAddrExpr));
- return eqIRAtom(loadAddrExpr, storeAddrExpr);
+ switch (ev->ekind) {
+ case Event_Ir:
+ VG_(printf)("Ir %d 0x%llx\n", ev->size, ev->iaddr);
+ break;
+ case Event_Dr:
+ VG_(printf)("Dr %d 0x%llx EA=", ev->size, ev->iaddr);
+ ppIRExpr(ev->dataEA);
+ VG_(printf)("\n");
+ break;
+ case Event_Dw:
+ VG_(printf)("Dw %d 0x%llx EA=", ev->size, ev->iaddr);
+ ppIRExpr(ev->dataEA);
+ VG_(printf)("\n");
+ break;
+ case Event_Dm:
+ VG_(printf)("Dm %d 0x%llx EA=", ev->size, ev->iaddr);
+ ppIRExpr(ev->dataEA);
+ VG_(printf)("\n");
+ break;
+ default:
+ tl_assert(0);
+ break;
+ }
}
-// Instrumentation for the end of each original instruction.
-static
-void instrumentInstr(IRBB* bbOut, instr_info* i_node, Bool bbSeenBefore,
- UInt instrAddr, UInt instrLen, UInt dataSize,
- IRExpr* loadAddrExpr, IRExpr* storeAddrExpr)
+/* Reserve instr_info for the first mention of a new insn. */
+
+static instr_info* reserve_instr_info ( CgState* cgs )
{
- IRDirty* di;
- IRExpr *arg1, *arg2, *arg3, **argv;
- Int argc;
- Char* helperName;
- void* helperAddr;
- IRType wordTy;
-
- // Stay sane ...
- tl_assert(sizeof(HWord) == sizeof(void*));
- if (sizeof(HWord) == 4) {
- wordTy = Ity_I32;
- } else
- if (sizeof(HWord) == 8) {
- wordTy = Ity_I64;
- } else {
- VG_(tool_panic)("instrumentInstr: strange word size");
- }
+ instr_info* i_node;
+ tl_assert(cgs->bbInfo_i >= 0);
+ tl_assert(cgs->bbInfo_i < cgs->bbInfo->n_instrs);
+ i_node = &cgs->bbInfo->instrs[ cgs->bbInfo_i ];
+ cgs->bbInfo_i++;
+ return i_node;
+}
- if (loadAddrExpr)
- tl_assert(wordTy == typeOfIRExpr(bbOut->tyenv, loadAddrExpr));
- if (storeAddrExpr)
- tl_assert(wordTy == typeOfIRExpr(bbOut->tyenv, storeAddrExpr));
-
- // Large (eg. 28B, 108B, 512B on x86) data-sized instructions will be
- // done inaccurately, but they're very rare and this avoids errors from
- // hitting more than two cache lines in the simulation.
- if (dataSize > MIN_LINE_SIZE) dataSize = MIN_LINE_SIZE;
-
- // Setup 1st arg: instr_info node's address
- // Believed to be 64-bit clean
- do_details(i_node, bbSeenBefore, instrAddr, instrLen, dataSize );
- arg1 = mkIRExpr_HWord( (HWord)i_node );
-
- if (!loadAddrExpr && !storeAddrExpr) {
- // no load/store
- tl_assert(0 == dataSize);
- helperName = "log_1I_0D_cache_access";
- helperAddr = &log_1I_0D_cache_access;
- argc = 1;
- argv = mkIRExprVec_1(arg1);
-
- } else if (loadAddrExpr && !storeAddrExpr) {
- // load
- tl_assert( isIRAtom(loadAddrExpr) );
- helperName = "log_1I_1Dr_cache_access";
- helperAddr = &log_1I_1Dr_cache_access;
- argc = 2;
- arg2 = loadAddrExpr;
- argv = mkIRExprVec_2(arg1, arg2);
-
- } else if (!loadAddrExpr && storeAddrExpr) {
- // store
- tl_assert( isIRAtom(storeAddrExpr) );
- helperName = "log_1I_1Dw_cache_access";
- helperAddr = &log_1I_1Dw_cache_access;
- argc = 2;
- arg2 = storeAddrExpr;
- argv = mkIRExprVec_2(arg1, arg2);
-
- } else {
- tl_assert( loadAddrExpr && storeAddrExpr );
- tl_assert( isIRAtom(loadAddrExpr) );
- tl_assert( isIRAtom(storeAddrExpr) );
-
- if ( loadStoreAddrsMatch(loadAddrExpr, storeAddrExpr) ) {
- // modify
- helperName = "log_1I_1Dr_cache_access";
- helperAddr = &log_1I_1Dr_cache_access;
- argc = 2;
- arg2 = loadAddrExpr;
- argv = mkIRExprVec_2(arg1, arg2);
+/* Find the most recently allocated instr_info. */
+
+static instr_info* find_most_recent_instr_info ( CgState* cgs )
+{
+ tl_assert(cgs->bbInfo_i >= 0);
+ tl_assert(cgs->bbInfo_i <= cgs->bbInfo->n_instrs);
+ if (cgs->bbInfo_i == 0)
+ return NULL;
+ else
+ return &cgs->bbInfo->instrs[ cgs->bbInfo_i - 1 ];
+}
+
+
+/* Generate code for all outstanding memory events, and mark the queue
+ empty. Code is generated into cgs->bbOut, and this activity
+ 'consumes' slots in cgs->bbInfo. */
+
+static void flushEvents ( CgState* cgs )
+{
+ Int i, regparms;
+ Char* helperName;
+ void* helperAddr;
+ IRExpr** argv;
+ IRExpr* i_node_expr;
+ IRExpr* i_node2_expr;
+ IRExpr* i_node3_expr;
+ IRDirty* di;
+ instr_info* i_node;
+ instr_info* i_node2;
+ instr_info* i_node3;
+
+ for (i = 0; i < cgs->events_used-2; i++)
+ trigrams [ index3( cgs->events[i].ekind, cgs->events[i+1].ekind,cgs->events[i+2].ekind ) ]++;
+
+ i = 0;
+ while (i < cgs->events_used) {
+
+ helperName = NULL;
+ helperAddr = NULL;
+ argv = NULL;
+ regparms = 0;
+
+ /* generate IR to notify event i and possibly the ones
+ immediately following it. */
+ tl_assert(i >= 0 && i < cgs->events_used);
+ if (DEBUG_CG) {
+ VG_(printf)(" flush ");
+ showEvent( &cgs->events[i] );
+ }
+
+ /* For any event we find the relevant instr_info. The following
+ assumes that Event_Ir is the first event to refer to any
+ specific insn, and so a new entry in the cgs->bbInfo->instrs
+ is allocated. All other events (Dr,Dw,Dm) must refer to the
+ most recently encountered IMark and so we use the
+ most-recently allocated instrs[] entry, which must exist. */
+
+ if (cgs->events[i].ekind == Event_Ir) {
+ /* allocate an instr_info and fill in its addr/size. */
+ i_node = reserve_instr_info( cgs );
+ tl_assert(i_node);
+ init_instr_info( i_node, cgs->bbSeenBefore,
+ (Addr)cgs->events[i].iaddr, /* i addr */
+ cgs->events[i].size /* i size */);
} else {
- // load/store
- helperName = "log_1I_2D_cache_access";
- helperAddr = &log_1I_2D_cache_access;
- argc = 3;
- arg2 = loadAddrExpr;
- arg3 = storeAddrExpr;
- argv = mkIRExprVec_3(arg1, arg2, arg3);
+ /* use the most-recently allocated i_node but don't mess with
+ its internals */
+ i_node = find_most_recent_instr_info( cgs );
+ /* it must actually exist */
+ tl_assert(i_node);
+ /* it must match the declared parent instruction of this
+ event. */
+ tl_assert(i_node->instr_addr == cgs->events[i].iaddr);
}
+
+ i_node_expr = mkIRExpr_HWord( (HWord)i_node );
+
+ /* Decide on helper fn to call and args to pass it, and advance
+ i appropriately. */
+ switch (cgs->events[i].ekind) {
+ case Event_Ir:
+ /* Merge with a following Dr/Dm if it is from this insn. */
+ if (i < cgs->events_used-1
+ && cgs->events[i+1].iaddr == cgs->events[i].iaddr
+ && (cgs->events[i+1].ekind == Event_Dr
+ || cgs->events[i+1].ekind == Event_Dm)) {
+ helperName = "log_1I_1Dr_cache_access";
+ helperAddr = &log_1I_1Dr_cache_access;
+ argv = mkIRExprVec_3( i_node_expr,
+ cgs->events[i+1].dataEA,
+ mkIRExpr_HWord( cgs->events[i+1].size ) );
+ regparms = 3;
+ i += 2;
+ }
+ /* Merge with a following Dw if it is from this insn. */
+ else
+ if (i < cgs->events_used-1
+ && cgs->events[i+1].iaddr == cgs->events[i].iaddr
+ && cgs->events[i+1].ekind == Event_Dw) {
+ helperName = "log_1I_1Dw_cache_access";
+ helperAddr = &log_1I_1Dw_cache_access;
+ argv = mkIRExprVec_3( i_node_expr,
+ cgs->events[i+1].dataEA,
+ mkIRExpr_HWord( cgs->events[i+1].size ) );
+ regparms = 3;
+ i += 2;
+ }
+ /* Merge with two following Irs if possible. */
+ else
+ if (i < cgs->events_used-2
+ && cgs->events[i+1].ekind == Event_Ir
+ && cgs->events[i+2].ekind == Event_Ir) {
+ helperName = "log_3I_0D_cache_access";
+ helperAddr = &log_3I_0D_cache_access;
+
+ i_node2 = reserve_instr_info( cgs );
+ tl_assert(i_node2);
+ init_instr_info( i_node2, cgs->bbSeenBefore,
+ (Addr)cgs->events[i+1].iaddr, /* i addr */
+ cgs->events[i+1].size /* i size */);
+ i_node2_expr = mkIRExpr_HWord( (HWord)i_node2 );
+
+ i_node3 = reserve_instr_info( cgs );
+ tl_assert(i_node3);
+ init_instr_info( i_node3, cgs->bbSeenBefore,
+ (Addr)cgs->events[i+2].iaddr, /* i addr */
+ cgs->events[i+2].size /* i size */);
+ i_node3_expr = mkIRExpr_HWord( (HWord)i_node3 );
+
+ argv = mkIRExprVec_3( i_node_expr, i_node2_expr, i_node3_expr );
+ regparms = 3;
+ i += 3;
+ }
+ /* Merge with a following Ir if possible. */
+ else
+ if (i < cgs->events_used-1
+ && cgs->events[i+1].ekind == Event_Ir) {
+ helperName = "log_2I_0D_cache_access";
+ helperAddr = &log_2I_0D_cache_access;
+ i_node2 = reserve_instr_info( cgs );
+ tl_assert(i_node2);
+ init_instr_info( i_node2, cgs->bbSeenBefore,
+ (Addr)cgs->events[i+1].iaddr, /* i addr */
+ cgs->events[i+1].size /* i size */);
+ i_node2_expr = mkIRExpr_HWord( (HWord)i_node2 );
+ argv = mkIRExprVec_2( i_node_expr, i_node2_expr );
+ regparms = 2;
+ i += 2;
+ }
+ /* No merging possible; emit as-is. */
+ else {
+ helperName = "log_1I_0D_cache_access";
+ helperAddr = &log_1I_0D_cache_access;
+ argv = mkIRExprVec_1( i_node_expr );
+ regparms = 1;
+ i++;
+ }
+ break;
+ case Event_Dr:
+ case Event_Dm:
+ helperName = "log_0I_1Dr_cache_access";
+ helperAddr = &log_0I_1Dr_cache_access;
+ argv = mkIRExprVec_3( i_node_expr,
+ cgs->events[i].dataEA,
+ mkIRExpr_HWord( cgs->events[i].size ) );
+ regparms = 3;
+ i++;
+ break;
+ case Event_Dw:
+ helperName = "log_0I_1Dw_cache_access";
+ helperAddr = &log_0I_1Dw_cache_access;
+ argv = mkIRExprVec_3( i_node_expr,
+ cgs->events[i].dataEA,
+ mkIRExpr_HWord( cgs->events[i].size ) );
+ regparms = 3;
+ i++;
+ break;
+ default:
+ tl_assert(0);
+ }
+
+ /* Add the helper. */
+ tl_assert(helperName);
+ tl_assert(helperAddr);
+ tl_assert(argv);
+ di = unsafeIRDirty_0_N( regparms, helperName, helperAddr, argv);
+ addStmtToIRBB( cgs->bbOut, IRStmt_Dirty(di) );
}
- // Add call to the instrumentation function
- di = unsafeIRDirty_0_N( argc, helperName, helperAddr, argv);
- addStmtToIRBB( bbOut, IRStmt_Dirty(di) );
+ cgs->events_used = 0;
}
+
+static void addEvent_Ir ( CgState* cgs, Int size, Addr64 iaddr )
+{
+ Event* evt;
+ tl_assert(size >= 0 && size <= MIN_LINE_SIZE);
+ if (cgs->events_used == N_EVENTS)
+ flushEvents(cgs);
+ tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
+ /* If vex fails to decode an insn, the size will be zero, but that
+ can't really be true -- the cpu couldn't have determined the
+ insn was undecodable without looking at it. Hence: */
+ if (size == 0)
+ size = 1;
+ evt = &cgs->events[cgs->events_used];
+ evt->ekind = Event_Ir;
+ evt->size = size;
+ evt->iaddr = iaddr;
+ evt->dataEA = NULL; /*paranoia*/
+ cgs->events_used++;
+}
+
+static void addEvent_Dr ( CgState* cgs, Int size, Addr64 iaddr, IRAtom* ea )
+{
+ Event* evt;
+ tl_assert(isIRAtom(ea));
+ tl_assert(size >= 1 && size <= MIN_LINE_SIZE);
+ if (cgs->events_used == N_EVENTS)
+ flushEvents(cgs);
+ tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
+ evt = &cgs->events[cgs->events_used];
+ evt->ekind = Event_Dr;
+ evt->size = size;
+ evt->iaddr = iaddr;
+ evt->dataEA = ea;
+ cgs->events_used++;
+}
+
+static void addEvent_Dw ( CgState* cgs, Int size, Addr64 iaddr, IRAtom* ea )
+{
+ tl_assert(isIRAtom(ea));
+ tl_assert(size >= 1 && size <= MIN_LINE_SIZE);
+
+ /* Is it possible to merge this write into an immediately preceding
+ read? */
+ if (cgs->events_used > 0
+ && cgs->events[cgs->events_used-1].ekind == Event_Dr
+ && cgs->events[cgs->events_used-1].size == size
+ && cgs->events[cgs->events_used-1].iaddr == iaddr
+ && eqIRAtom(cgs->events[cgs->events_used-1].dataEA, ea)) {
+ cgs->events[cgs->events_used-1].ekind = Event_Dm;
+ return;
+ }
+
+ /* No. Add as normal. */
+ if (cgs->events_used == N_EVENTS)
+ flushEvents(cgs);
+ tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
+ cgs->events[cgs->events_used].ekind = Event_Dw;
+ cgs->events[cgs->events_used].size = size;
+ cgs->events[cgs->events_used].iaddr = iaddr;
+ cgs->events[cgs->events_used].dataEA = ea;
+ cgs->events_used++;
+}
+
+////////////////////////////////////////////////////////////
+
+
static IRBB* cg_instrument ( IRBB* bbIn, VexGuestLayout* layout,
IRType gWordTy, IRType hWordTy )
{
- Int i, dataSize = 0, bbInfo_i;
- IRBB* bbOut;
- IRStmt* st;
- BB_info* bbInfo;
- Bool bbSeenBefore = False, addedInstrumentation, addInstNow;
- Addr instrAddr, origAddr;
- UInt instrLen;
- IRExpr *loadAddrExpr, *storeAddrExpr;
+ Int i;
+ IRStmt* st;
+ Addr64 cia; /* address of current insn */
+ CgState cgs;
+ IRTypeEnv* tyenv = bbIn->tyenv;
+
if (gWordTy != hWordTy) {
/* We don't currently support this case. */
@@ -660,75 +877,132 @@ static IRBB* cg_instrument ( IRBB* bbIn, VexGuestLayout* layout,
}
/* Set up BB */
- bbOut = emptyIRBB();
- bbOut->tyenv = dopyIRTypeEnv(bbIn->tyenv);
- bbOut->next = dopyIRExpr(bbIn->next);
- bbOut->jumpkind = bbIn->jumpkind;
+ cgs.bbOut = emptyIRBB();
+ cgs.bbOut->tyenv = dopyIRTypeEnv(tyenv);
- // Get the first statement, and origAddr from it
+ // Get the first statement, and initial cia from it
i = 0;
tl_assert(bbIn->stmts_used > 0);
st = bbIn->stmts[0];
tl_assert(Ist_IMark == st->tag);
- origAddr = (Addr)st->Ist.IMark.addr;
- tl_assert(origAddr == st->Ist.IMark.addr); // XXX: check no overflow
-
- // Get block info
- bbInfo = get_BB_info(bbIn, origAddr, &bbSeenBefore);
- bbInfo_i = 0;
-
- do {
- // We should be at an IMark statement
- tl_assert(Ist_IMark == st->tag);
-
- // Reset stuff for this original instruction
- loadAddrExpr = storeAddrExpr = NULL;
- dataSize = 0;
- addedInstrumentation = False;
-
- // Process all the statements for this original instruction (ie. until
- // the next IMark statement, or the end of the block)
- do {
- IRStmt* st2 = ( i+1 < bbIn->stmts_used ? bbIn->stmts[i+1] : NULL );
-
- addInstNow = handleOneStatement(bbIn->tyenv, bbOut, st, st2,
- &instrAddr, &instrLen, &loadAddrExpr,
- &storeAddrExpr, &dataSize);
- if (addInstNow) {
- tl_assert(!addedInstrumentation);
- addedInstrumentation = True;
-
- // Nb: instrLen will be zero if Vex failed to decode it.
- // Also Client requests can appear to be very large (eg. 18
- // bytes on x86) because they are really multiple instructions.
- tl_assert( 0 == instrLen ||
- bbIn->jumpkind == Ijk_ClientReq ||
- (instrLen >= VG_MIN_INSTR_SZB &&
- instrLen <= VG_MAX_INSTR_SZB) );
-
- // Add instrumentation before this statement.
- instrumentInstr(bbOut, &bbInfo->instrs[ bbInfo_i ], bbSeenBefore,
- instrAddr, instrLen, dataSize, loadAddrExpr, storeAddrExpr);
+ cia = st->Ist.IMark.addr;
+
+ // Set up running state and get block info
+ cgs.events_used = 0;
+ cgs.bbInfo = get_BB_info(bbIn, (Addr)cia, &cgs.bbSeenBefore);
+ cgs.bbInfo_i = 0;
+
+ if (DEBUG_CG)
+ VG_(printf)("\n\n---------- cg_instrument ----------\n");
+
+ // Traverse the block, adding events and flushing as necessary.
+ for (i = 0; i < bbIn->stmts_used; i++) {
+
+ st = bbIn->stmts[i];
+ tl_assert(isFlatIRStmt(st));
+
+ switch (st->tag) {
+ case Ist_NoOp:
+ case Ist_AbiHint:
+ case Ist_Put:
+ case Ist_PutI:
+ case Ist_MFence:
+ break;
+
+ case Ist_IMark:
+ cia = st->Ist.IMark.addr;
+ addEvent_Ir( &cgs, st->Ist.IMark.len, cia );
+ break;
+
+ case Ist_Tmp: {
+ IRExpr* data = st->Ist.Tmp.data;
+ if (data->tag == Iex_Load) {
+ IRExpr* aexpr = data->Iex.Load.addr;
+ tl_assert( isIRAtom(aexpr) );
+ // Note also, endianness info is ignored. I guess
+ // that's not interesting.
+ addEvent_Dr( &cgs, sizeofIRType(data->Iex.Load.ty),
+ cia, aexpr );
+ }
+ break;
+ }
+
+ case Ist_Store: {
+ IRExpr* data = st->Ist.Store.data;
+ IRExpr* aexpr = st->Ist.Store.addr;
+ tl_assert( isIRAtom(aexpr) );
+ addEvent_Dw( &cgs,
+ sizeofIRType(typeOfIRExpr(tyenv, data)),
+ cia, aexpr );
+ break;
}
- addStmtToIRBB( bbOut, st );
+ case Ist_Dirty: {
+ Int dataSize;
+ IRDirty* d = st->Ist.Dirty.details;
+ if (d->mFx != Ifx_None) {
+ /* This dirty helper accesses memory. Collect the
+ details. */
+ tl_assert(d->mAddr != NULL);
+ tl_assert(d->mSize != 0);
+ dataSize = d->mSize;
+ // Large (eg. 28B, 108B, 512B on x86) data-sized
+ // instructions will be done inaccurately, but they're
+ // very rare and this avoids errors from hitting more
+ // than two cache lines in the simulation.
+ if (dataSize > MIN_LINE_SIZE)
+ dataSize = MIN_LINE_SIZE;
+ if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
+ addEvent_Dr( &cgs, dataSize, cia, d->mAddr );
+ if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
+ addEvent_Dw( &cgs, dataSize, cia, d->mAddr );
+ } else {
+ tl_assert(d->mAddr == NULL);
+ tl_assert(d->mSize == 0);
+ }
+ break;
+ }
- i++;
- st = st2;
- }
- while (st && Ist_IMark != st->tag);
+ case Ist_Exit:
+ /* We may never reach the next statement, so need to flush
+ all outstanding transactions now. */
+ flushEvents( &cgs );
+ break;
- if (!addedInstrumentation) {
- // Add instrumentation now, after all the instruction's statements.
- instrumentInstr(bbOut, &bbInfo->instrs[ bbInfo_i ], bbSeenBefore,
- instrAddr, instrLen, dataSize, loadAddrExpr, storeAddrExpr);
+ default:
+ tl_assert(0);
+ break;
}
- bbInfo_i++;
+ /* Copy the original statement */
+ addStmtToIRBB( cgs.bbOut, st );
+
+ if (DEBUG_CG) {
+ ppIRStmt(st);
+ VG_(printf)("\n");
+ }
}
- while (st);
- return bbOut;
+ /* At the end of the bb. Flush outstandings. */
+ tl_assert(isIRAtom(bbIn->next));
+ flushEvents( &cgs );
+
+ /* copy where-next stuff. */
+ cgs.bbOut->next = dopyIRExpr(bbIn->next);
+ cgs.bbOut->jumpkind = bbIn->jumpkind;
+
+ /* done. stay sane ... */
+ tl_assert(cgs.bbInfo_i == cgs.bbInfo->n_instrs);
+
+ if (DEBUG_CG) {
+ VG_(printf)( "goto {");
+ ppIRJumpKind(bbIn->jumpkind);
+ VG_(printf)( "} ");
+ ppIRExpr( bbIn->next );
+ VG_(printf)( "}\n");
+ }
+
+ return cgs.bbOut;
}
/*------------------------------------------------------------*/
@@ -1077,6 +1351,12 @@ static void cg_fini(Int exitcode)
VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d", BB_retranslations);
}
VGP_POPCC(VgpCacheResults);
+
+ if (0) { Int i;
+ for (i = 0; i < 64; i++) {
+ show3(i); VG_(printf)(" %5d\n", trigrams[i] );
+ }
+ }
}
/*--------------------------------------------------------------------*/
@@ -1084,15 +1364,20 @@ static void cg_fini(Int exitcode)
/*--------------------------------------------------------------------*/
// Called when a translation is invalidated due to code unloading.
-static void cg_discard_basic_block_info ( Addr a, SizeT size )
+static void cg_discard_basic_block_info ( VexGuestExtents vge )
{
- VgHashNode* bbInfo;
+ VgHashNode* bbInfo;
- if (0) VG_(printf)( "discard_basic_block_info: %p, %llu\n", a, (ULong)size);
+ tl_assert(vge.n_used > 0);
+
+ if (DEBUG_CG)
+ VG_(printf)( "discard_basic_block_info: %p, %llu\n",
+ (void*)(Addr)vge.base[0], (ULong)vge.len[0]);
// Get BB info, remove from table, free BB info. Simple!
- bbInfo = VG_(HT_remove)(instr_info_table, a);
+ bbInfo = VG_(HT_remove)(instr_info_table, (UWord)vge.base[0]);
tl_assert(NULL != bbInfo);
+
VG_(free)(bbInfo);
}
@@ -1192,7 +1477,7 @@ static void cg_pre_clo_init(void)
VG_(details_copyright_author)(
"Copyright (C) 2002-2005, and GNU GPL'd, by Nicholas Nethercote et al.");
VG_(details_bug_reports_to) (VG_BUGS_TO);
- VG_(details_avg_translation_sizeB) ( 155 );
+ VG_(details_avg_translation_sizeB) ( 245 );
VG_(basic_tool_funcs) (cg_post_clo_init,
cg_instrument,