Initial import of fatgrind.HEAD master

author: Stephane Marchesin <marchesin@icps.u-strasbg.fr> 2009-05-04 19:05:59 +0200
committer: Stephane Marchesin <marchesin@icps.u-strasbg.fr> 2009-05-04 19:05:59 +0200
commit: 6e410b3bb6ff51580897431105aae14591cbf7fb (patch)
tree: f8aeba9352710f10cd6b1d5138c8fc3ece91c8c3 /cachegrind
139 files changed, 22211 insertions, 0 deletions
diff --git a/cachegrind/.deps/cachegrind_amd64_linux-cg-amd64.Po b/cachegrind/.deps/cachegrind_amd64_linux-cg-amd64.Po
new file mode 100644
index 0000000..7507d25
--- /dev/null
+++ b/cachegrind/.deps/cachegrind_amd64_linux-cg-amd64.Po
@@ -0,0 +1,26 @@
+cachegrind_amd64_linux-cg-amd64.o: cg-amd64.c cg-x86.c \
+  ../include/pub_tool_basics.h ../VEX/pub/libvex_basictypes.h \
+  /usr/lib/gcc/x86_64-redhat-linux/4.3.2/include/stdarg.h ../config.h \
+  ../include/pub_tool_cpuid.h ../include/pub_tool_libcbase.h \
+  ../include/pub_tool_libcassert.h ../include/pub_tool_libcprint.h \
+  cg_arch.h
+
+cg-x86.c:
+
+../include/pub_tool_basics.h:
+
+../VEX/pub/libvex_basictypes.h:
+
+/usr/lib/gcc/x86_64-redhat-linux/4.3.2/include/stdarg.h:
+
+../config.h:
+
+../include/pub_tool_cpuid.h:
+
+../include/pub_tool_libcbase.h:
+
+../include/pub_tool_libcassert.h:
+
+../include/pub_tool_libcprint.h:
+
+cg_arch.h:
diff --git a/cachegrind/.deps/cachegrind_amd64_linux-cg_main.Po b/cachegrind/.deps/cachegrind_amd64_linux-cg_main.Po
new file mode 100644
index 0000000..3e4503d
--- /dev/null
+++ b/cachegrind/.deps/cachegrind_amd64_linux-cg_main.Po
@@ -0,0 +1,73 @@
+cachegrind_amd64_linux-cg_main.o: cg_main.c ../include/pub_tool_basics.h \
+  ../VEX/pub/libvex_basictypes.h \
+  /usr/lib/gcc/x86_64-redhat-linux/4.3.2/include/stdarg.h ../config.h \
+  ../include/pub_tool_vki.h ../include/vki/vki-linux.h \
+  ../include/vki/vki-posixtypes-amd64-linux.h \
+  ../include/vki/vki-amd64-linux.h ../include/pub_tool_debuginfo.h \
+  ../include/pub_tool_libcbase.h ../include/pub_tool_libcassert.h \
+  ../include/pub_tool_libcfile.h ../include/pub_tool_libcprint.h \
+  ../include/pub_tool_libcproc.h ../include/pub_tool_machine.h \
+  ../include/pub_tool_mallocfree.h ../include/pub_tool_options.h \
+  ../VEX/pub/libvex.h ../VEX/pub/libvex_basictypes.h \
+  ../VEX/pub/libvex_ir.h ../include/pub_tool_oset.h \
+  ../include/pub_tool_tooliface.h ../include/pub_tool_errormgr.h \
+  ../include/pub_tool_execontext.h ../include/pub_tool_xarray.h \
+  ../include/pub_tool_clientstate.h cg_arch.h cg_sim.c cg_branchpred.c
+
+../include/pub_tool_basics.h:
+
+../VEX/pub/libvex_basictypes.h:
+
+/usr/lib/gcc/x86_64-redhat-linux/4.3.2/include/stdarg.h:
+
+../config.h:
+
+../include/pub_tool_vki.h:
+
+../include/vki/vki-linux.h:
+
+../include/vki/vki-posixtypes-amd64-linux.h:
+
+../include/vki/vki-amd64-linux.h:
+
+../include/pub_tool_debuginfo.h:
+
+../include/pub_tool_libcbase.h:
+
+../include/pub_tool_libcassert.h:
+
+../include/pub_tool_libcfile.h:
+
+../include/pub_tool_libcprint.h:
+
+../include/pub_tool_libcproc.h:
+
+../include/pub_tool_machine.h:
+
+../include/pub_tool_mallocfree.h:
+
+../include/pub_tool_options.h:
+
+../VEX/pub/libvex.h:
+
+../VEX/pub/libvex_basictypes.h:
+
+../VEX/pub/libvex_ir.h:
+
+../include/pub_tool_oset.h:
+
+../include/pub_tool_tooliface.h:
+
+../include/pub_tool_errormgr.h:
+
+../include/pub_tool_execontext.h:
+
+../include/pub_tool_xarray.h:
+
+../include/pub_tool_clientstate.h:
+
+cg_arch.h:
+
+cg_sim.c:
+
+cg_branchpred.c:
diff --git a/cachegrind/.deps/cachegrind_ppc32_aix5-cg-ppc32.Po b/cachegrind/.deps/cachegrind_ppc32_aix5-cg-ppc32.Po
new file mode 100644
index 0000000..9ce06a8
--- /dev/null
+++ b/cachegrind/.deps/cachegrind_ppc32_aix5-cg-ppc32.Po
@@ -0,0 +1 @@
+# dummy
diff --git a/cachegrind/.deps/cachegrind_ppc32_aix5-cg_main.Po b/cachegrind/.deps/cachegrind_ppc32_aix5-cg_main.Po
new file mode 100644
index 0000000..9ce06a8
--- /dev/null
+++ b/cachegrind/.deps/cachegrind_ppc32_aix5-cg_main.Po
@@ -0,0 +1 @@
+# dummy
diff --git a/cachegrind/.deps/cachegrind_ppc32_linux-cg-ppc32.Po b/cachegrind/.deps/cachegrind_ppc32_linux-cg-ppc32.Po
new file mode 100644
index 0000000..9ce06a8
--- /dev/null
+++ b/cachegrind/.deps/cachegrind_ppc32_linux-cg-ppc32.Po
@@ -0,0 +1 @@
+# dummy
diff --git a/cachegrind/.deps/cachegrind_ppc32_linux-cg_main.Po b/cachegrind/.deps/cachegrind_ppc32_linux-cg_main.Po
new file mode 100644
index 0000000..9ce06a8
--- /dev/null
+++ b/cachegrind/.deps/cachegrind_ppc32_linux-cg_main.Po
@@ -0,0 +1 @@
+# dummy
diff --git a/cachegrind/.deps/cachegrind_ppc64_aix5-cg-ppc64.Po b/cachegrind/.deps/cachegrind_ppc64_aix5-cg-ppc64.Po
new file mode 100644
index 0000000..9ce06a8
--- /dev/null
+++ b/cachegrind/.deps/cachegrind_ppc64_aix5-cg-ppc64.Po
@@ -0,0 +1 @@
+# dummy
diff --git a/cachegrind/.deps/cachegrind_ppc64_aix5-cg_main.Po b/cachegrind/.deps/cachegrind_ppc64_aix5-cg_main.Po
new file mode 100644
index 0000000..9ce06a8
--- /dev/null
+++ b/cachegrind/.deps/cachegrind_ppc64_aix5-cg_main.Po
@@ -0,0 +1 @@
+# dummy
diff --git a/cachegrind/.deps/cachegrind_ppc64_linux-cg-ppc64.Po b/cachegrind/.deps/cachegrind_ppc64_linux-cg-ppc64.Po
new file mode 100644
index 0000000..9ce06a8
--- /dev/null
+++ b/cachegrind/.deps/cachegrind_ppc64_linux-cg-ppc64.Po
@@ -0,0 +1 @@
+# dummy
diff --git a/cachegrind/.deps/cachegrind_ppc64_linux-cg_main.Po b/cachegrind/.deps/cachegrind_ppc64_linux-cg_main.Po
new file mode 100644
index 0000000..9ce06a8
--- /dev/null
+++ b/cachegrind/.deps/cachegrind_ppc64_linux-cg_main.Po
@@ -0,0 +1 @@
+# dummy
diff --git a/cachegrind/.deps/cachegrind_x86_linux-cg-x86.Po b/cachegrind/.deps/cachegrind_x86_linux-cg-x86.Po
new file mode 100644
index 0000000..aba8da9
--- /dev/null
+++ b/cachegrind/.deps/cachegrind_x86_linux-cg-x86.Po
@@ -0,0 +1,26 @@
+cachegrind_x86_linux-cg-x86.o: cg-x86.c ../include/pub_tool_basics.h \
+  ../VEX/pub/libvex_basictypes.h ../include/pub_tool_basics_asm.h \
+  /ptmp/marchesi/opt/bin/../lib/gcc/i686-pc-linux-gnu/4.3.2/include/stdarg.h \
+  ../config.h ../include/pub_tool_cpuid.h ../include/pub_tool_libcbase.h \
+  ../include/pub_tool_libcassert.h ../include/pub_tool_libcprint.h \
+  cg_arch.h
+
+../include/pub_tool_basics.h:
+
+../VEX/pub/libvex_basictypes.h:
+
+../include/pub_tool_basics_asm.h:
+
+/ptmp/marchesi/opt/bin/../lib/gcc/i686-pc-linux-gnu/4.3.2/include/stdarg.h:
+
+../config.h:
+
+../include/pub_tool_cpuid.h:
+
+../include/pub_tool_libcbase.h:
+
+../include/pub_tool_libcassert.h:
+
+../include/pub_tool_libcprint.h:
+
+cg_arch.h:
diff --git a/cachegrind/.deps/cachegrind_x86_linux-cg_main.Po b/cachegrind/.deps/cachegrind_x86_linux-cg_main.Po
new file mode 100644
index 0000000..cc9ba68
--- /dev/null
+++ b/cachegrind/.deps/cachegrind_x86_linux-cg_main.Po
@@ -0,0 +1,75 @@
+cachegrind_x86_linux-cg_main.o: cg_main.c ../include/pub_tool_basics.h \
+  ../VEX/pub/libvex_basictypes.h ../include/pub_tool_basics_asm.h \
+  /ptmp/marchesi/opt/bin/../lib/gcc/i686-pc-linux-gnu/4.3.2/include/stdarg.h \
+  ../config.h ../include/pub_tool_vki.h ../include/vki/vki-linux.h \
+  ../include/vki/vki-posixtypes-x86-linux.h \
+  ../include/vki/vki-x86-linux.h ../include/pub_tool_debuginfo.h \
+  ../include/pub_tool_libcbase.h ../include/pub_tool_libcassert.h \
+  ../include/pub_tool_libcfile.h ../include/pub_tool_libcprint.h \
+  ../include/pub_tool_libcproc.h ../include/pub_tool_machine.h \
+  ../include/pub_tool_mallocfree.h ../include/pub_tool_options.h \
+  ../VEX/pub/libvex.h ../VEX/pub/libvex_basictypes.h \
+  ../VEX/pub/libvex_ir.h ../include/pub_tool_oset.h \
+  ../include/pub_tool_tooliface.h ../include/pub_tool_errormgr.h \
+  ../include/pub_tool_execontext.h ../include/pub_tool_xarray.h \
+  ../include/pub_tool_clientstate.h cg_arch.h cg_sim.c cg_branchpred.c
+
+../include/pub_tool_basics.h:
+
+../VEX/pub/libvex_basictypes.h:
+
+../include/pub_tool_basics_asm.h:
+
+/ptmp/marchesi/opt/bin/../lib/gcc/i686-pc-linux-gnu/4.3.2/include/stdarg.h:
+
+../config.h:
+
+../include/pub_tool_vki.h:
+
+../include/vki/vki-linux.h:
+
+../include/vki/vki-posixtypes-x86-linux.h:
+
+../include/vki/vki-x86-linux.h:
+
+../include/pub_tool_debuginfo.h:
+
+../include/pub_tool_libcbase.h:
+
+../include/pub_tool_libcassert.h:
+
+../include/pub_tool_libcfile.h:
+
+../include/pub_tool_libcprint.h:
+
+../include/pub_tool_libcproc.h:
+
+../include/pub_tool_machine.h:
+
+../include/pub_tool_mallocfree.h:
+
+../include/pub_tool_options.h:
+
+../VEX/pub/libvex.h:
+
+../VEX/pub/libvex_basictypes.h:
+
+../VEX/pub/libvex_ir.h:
+
+../include/pub_tool_oset.h:
+
+../include/pub_tool_tooliface.h:
+
+../include/pub_tool_errormgr.h:
+
+../include/pub_tool_execontext.h:
+
+../include/pub_tool_xarray.h:
+
+../include/pub_tool_clientstate.h:
+
+cg_arch.h:
+
+cg_sim.c:
+
+cg_branchpred.c:
diff --git a/cachegrind/.deps/cg_merge-cg_merge.Po b/cachegrind/.deps/cg_merge-cg_merge.Po
new file mode 100644
index 0000000..f8120d0
--- /dev/null
+++ b/cachegrind/.deps/cg_merge-cg_merge.Po
@@ -0,0 +1,83 @@
+cg_merge-cg_merge.o: cg_merge.c /usr/include/stdio.h \
+  /usr/include/features.h /usr/include/sys/cdefs.h \
+  /usr/include/bits/wordsize.h /usr/include/gnu/stubs.h \
+  /usr/include/gnu/stubs-64.h \
+  /usr/lib/gcc/x86_64-redhat-linux/4.3.2/include/stddef.h \
+  /usr/include/bits/types.h /usr/include/bits/typesizes.h \
+  /usr/include/libio.h /usr/include/_G_config.h /usr/include/wchar.h \
+  /usr/lib/gcc/x86_64-redhat-linux/4.3.2/include/stdarg.h \
+  /usr/include/bits/stdio_lim.h /usr/include/bits/sys_errlist.h \
+  /usr/include/bits/stdio.h /usr/include/stdlib.h \
+  /usr/include/sys/types.h /usr/include/time.h /usr/include/endian.h \
+  /usr/include/bits/endian.h /usr/include/sys/select.h \
+  /usr/include/bits/select.h /usr/include/bits/sigset.h \
+  /usr/include/bits/time.h /usr/include/sys/sysmacros.h \
+  /usr/include/bits/pthreadtypes.h /usr/include/alloca.h \
+  /usr/include/assert.h /usr/include/string.h /usr/include/bits/string.h \
+  /usr/include/bits/string2.h /usr/include/ctype.h
+
+/usr/include/stdio.h:
+
+/usr/include/features.h:
+
+/usr/include/sys/cdefs.h:
+
+/usr/include/bits/wordsize.h:
+
+/usr/include/gnu/stubs.h:
+
+/usr/include/gnu/stubs-64.h:
+
+/usr/lib/gcc/x86_64-redhat-linux/4.3.2/include/stddef.h:
+
+/usr/include/bits/types.h:
+
+/usr/include/bits/typesizes.h:
+
+/usr/include/libio.h:
+
+/usr/include/_G_config.h:
+
+/usr/include/wchar.h:
+
+/usr/lib/gcc/x86_64-redhat-linux/4.3.2/include/stdarg.h:
+
+/usr/include/bits/stdio_lim.h:
+
+/usr/include/bits/sys_errlist.h:
+
+/usr/include/bits/stdio.h:
+
+/usr/include/stdlib.h:
+
+/usr/include/sys/types.h:
+
+/usr/include/time.h:
+
+/usr/include/endian.h:
+
+/usr/include/bits/endian.h:
+
+/usr/include/sys/select.h:
+
+/usr/include/bits/select.h:
+
+/usr/include/bits/sigset.h:
+
+/usr/include/bits/time.h:
+
+/usr/include/sys/sysmacros.h:
+
+/usr/include/bits/pthreadtypes.h:
+
+/usr/include/alloca.h:
+
+/usr/include/assert.h:
+
+/usr/include/string.h:
+
+/usr/include/bits/string.h:
+
+/usr/include/bits/string2.h:
+
+/usr/include/ctype.h:
diff --git a/cachegrind/.svn/dir-prop-base b/cachegrind/.svn/dir-prop-base
new file mode 100644
index 0000000..6095617
--- /dev/null
+++ b/cachegrind/.svn/dir-prop-base
@@ -0,0 +1,15 @@
+K 10
+svn:ignore
+V 143
+cachegrind-amd64-linux
+cachegrind-ppc32-linux
+cachegrind-ppc64-linux
+cachegrind-x86-linux
+cg_annotate
+cg_merge
+.deps
+Makefile
+Makefile.in
+*.so
+
+END
diff --git a/cachegrind/.svn/entries b/cachegrind/.svn/entries
new file mode 100644
index 0000000..64850c2
--- /dev/null
+++ b/cachegrind/.svn/entries
@@ -0,0 +1,172 @@
+8
+
+dir
+9703
+svn://svn.valgrind.org/valgrind/trunk/cachegrind
+svn://svn.valgrind.org/valgrind
+
+
+
+2009-04-24T20:17:07.643509Z
+9611
+njn
+has-props
+
+svn:special svn:externals svn:needs-lock
+
+
+
+
+
+
+
+
+
+
+
+a5019735-40e9-0310-863c-91ae7b9d1cf9
+
+cg-ppc32.c
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+994af6e3f51f39b80708828d6fb8d01e
+2009-03-12T00:07:35.482249Z
+9368
+njn
+
+tests
+dir
+
+cg_branchpred.c
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+1abafa6efeba8b09a2e907de58a799d6
+2009-03-10T22:02:09.669944Z
+9344
+njn
+
+cg_sim.c
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+9fdecd4227cf74c48bbfdb518e776294
+2009-03-10T22:02:09.669944Z
+9344
+njn
+has-props
+
+cg-amd64.c
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+5fe1b65c49490b3d699188eebc351888
+2009-03-10T22:02:09.669944Z
+9344
+njn
+
+cg-ppc64.c
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+c079631909d52a52b889b16a0b6bc3f8
+2009-03-12T00:07:35.482249Z
+9368
+njn
+
+cg_annotate.in
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+e557d2a59de2cfe28e5acb6a4685e67d
+2007-11-23T01:41:32.983154Z
+7202
+njn
+has-props
+
+docs
+dir
+
+Makefile.am
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+efb0550ce42ea1a7dcf91e087e794d48
+2009-01-22T21:56:32.234907Z
+9031
+njn
+has-props
+
+cg_merge.c
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+931acad018c70d7bc42be9fad49a129a
+2009-03-10T22:02:09.669944Z
+9344
+njn
+
+cg_arch.h
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+13a93cfa469ead0ecddde5c7b6760732
+2009-03-10T22:02:09.669944Z
+9344
+njn
+has-props
+
+cg-x86.c
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+a0439fa448f20a0dd71cd5e470599e3d
+2009-03-12T00:06:45.156153Z
+9367
+njn
+has-props
+
+cg_main.c
+file
+
+
+
+
+2009-04-30T16:44:00.000000Z
+92d236204237e606eaacbaf1010724ef
+2009-03-15T23:25:38.213170Z
+9416
+njn
+has-props
+
diff --git a/cachegrind/.svn/format b/cachegrind/.svn/format
new file mode 100644
index 0000000..45a4fb7
--- /dev/null
+++ b/cachegrind/.svn/format
@@ -0,0 +1 @@
+8
diff --git a/cachegrind/.svn/prop-base/Makefile.am.svn-base b/cachegrind/.svn/prop-base/Makefile.am.svn-base
new file mode 100644
index 0000000..df54a06
--- /dev/null
+++ b/cachegrind/.svn/prop-base/Makefile.am.svn-base
@@ -0,0 +1,9 @@
+K 13
+svn:eol-style
+V 6
+native
+K 12
+svn:keywords
+V 23
+author date id revision
+END
diff --git a/cachegrind/.svn/prop-base/cg-x86.c.svn-base b/cachegrind/.svn/prop-base/cg-x86.c.svn-base
new file mode 100644
index 0000000..df54a06
--- /dev/null
+++ b/cachegrind/.svn/prop-base/cg-x86.c.svn-base
@@ -0,0 +1,9 @@
+K 13
+svn:eol-style
+V 6
+native
+K 12
+svn:keywords
+V 23
+author date id revision
+END
diff --git a/cachegrind/.svn/prop-base/cg_annotate.in.svn-base b/cachegrind/.svn/prop-base/cg_annotate.in.svn-base
new file mode 100644
index 0000000..df54a06
--- /dev/null
+++ b/cachegrind/.svn/prop-base/cg_annotate.in.svn-base
@@ -0,0 +1,9 @@
+K 13
+svn:eol-style
+V 6
+native
+K 12
+svn:keywords
+V 23
+author date id revision
+END
diff --git a/cachegrind/.svn/prop-base/cg_arch.h.svn-base b/cachegrind/.svn/prop-base/cg_arch.h.svn-base
new file mode 100644
index 0000000..df54a06
--- /dev/null
+++ b/cachegrind/.svn/prop-base/cg_arch.h.svn-base
@@ -0,0 +1,9 @@
+K 13
+svn:eol-style
+V 6
+native
+K 12
+svn:keywords
+V 23
+author date id revision
+END
diff --git a/cachegrind/.svn/prop-base/cg_main.c.svn-base b/cachegrind/.svn/prop-base/cg_main.c.svn-base
new file mode 100644
index 0000000..df54a06
--- /dev/null
+++ b/cachegrind/.svn/prop-base/cg_main.c.svn-base
@@ -0,0 +1,9 @@
+K 13
+svn:eol-style
+V 6
+native
+K 12
+svn:keywords
+V 23
+author date id revision
+END
diff --git a/cachegrind/.svn/prop-base/cg_sim.c.svn-base b/cachegrind/.svn/prop-base/cg_sim.c.svn-base
new file mode 100644
index 0000000..df54a06
--- /dev/null
+++ b/cachegrind/.svn/prop-base/cg_sim.c.svn-base
@@ -0,0 +1,9 @@
+K 13
+svn:eol-style
+V 6
+native
+K 12
+svn:keywords
+V 23
+author date id revision
+END
diff --git a/cachegrind/.svn/text-base/Makefile.am.svn-base b/cachegrind/.svn/text-base/Makefile.am.svn-base
new file mode 100644
index 0000000..eac2825
--- /dev/null
+++ b/cachegrind/.svn/text-base/Makefile.am.svn-base
@@ -0,0 +1,82 @@
+include $(top_srcdir)/Makefile.tool.am
+
+bin_SCRIPTS = cg_annotate
+
+noinst_HEADERS = cg_arch.h cg_sim.c cg_branchpred.c
+
+noinst_PROGRAMS = 
+if VGCONF_PLATFORMS_INCLUDE_X86_LINUX
+noinst_PROGRAMS += cachegrind-x86-linux
+endif
+if VGCONF_PLATFORMS_INCLUDE_AMD64_LINUX
+noinst_PROGRAMS += cachegrind-amd64-linux
+endif
+if VGCONF_PLATFORMS_INCLUDE_PPC32_LINUX
+noinst_PROGRAMS += cachegrind-ppc32-linux
+endif
+if VGCONF_PLATFORMS_INCLUDE_PPC64_LINUX
+noinst_PROGRAMS += cachegrind-ppc64-linux
+endif
+if VGCONF_PLATFORMS_INCLUDE_PPC32_AIX5
+noinst_PROGRAMS += cachegrind-ppc32-aix5
+endif
+if VGCONF_PLATFORMS_INCLUDE_PPC64_AIX5
+noinst_PROGRAMS += cachegrind-ppc64-aix5
+endif
+
+# Build cg_merge for the primary target only.
+bin_PROGRAMS = cg_merge
+cg_merge_SOURCES = cg_merge.c
+cg_merge_CPPFLAGS  = $(AM_CPPFLAGS_PRI)
+cg_merge_CFLAGS    = $(AM_CFLAGS_PRI)
+cg_merge_CCASFLAGS = $(AM_CCASFLAGS_PRI)
+cg_merge_LDFLAGS   = $(AM_CFLAGS_PRI)
+
+
+CACHEGRIND_SOURCES_COMMON = cg_main.c
+CACHEGRIND_SOURCES_X86 = cg-x86.c
+CACHEGRIND_SOURCES_AMD64 = cg-amd64.c
+CACHEGRIND_SOURCES_PPC32 = cg-ppc32.c
+CACHEGRIND_SOURCES_PPC64 = cg-ppc64.c
+
+cachegrind_x86_linux_SOURCES      = $(CACHEGRIND_SOURCES_COMMON) $(CACHEGRIND_SOURCES_X86)
+cachegrind_x86_linux_CPPFLAGS     = $(AM_CPPFLAGS_X86_LINUX)
+cachegrind_x86_linux_CFLAGS       = $(AM_CFLAGS_X86_LINUX)
+cachegrind_x86_linux_DEPENDENCIES = $(COREGRIND_LIBS_X86_LINUX)
+cachegrind_x86_linux_LDADD        = $(TOOL_LDADD_X86_LINUX)
+cachegrind_x86_linux_LDFLAGS      = $(TOOL_LDFLAGS_X86_LINUX)
+
+cachegrind_amd64_linux_SOURCES      = $(CACHEGRIND_SOURCES_COMMON) $(CACHEGRIND_SOURCES_AMD64)
+cachegrind_amd64_linux_CPPFLAGS     = $(AM_CPPFLAGS_AMD64_LINUX)
+cachegrind_amd64_linux_CFLAGS       = $(AM_CFLAGS_AMD64_LINUX)
+cachegrind_amd64_linux_DEPENDENCIES = $(COREGRIND_LIBS_AMD64_LINUX)
+cachegrind_amd64_linux_LDADD        = $(TOOL_LDADD_AMD64_LINUX)
+cachegrind_amd64_linux_LDFLAGS      = $(TOOL_LDFLAGS_AMD64_LINUX)
+
+cachegrind_ppc32_linux_SOURCES      = $(CACHEGRIND_SOURCES_COMMON) $(CACHEGRIND_SOURCES_PPC32)
+cachegrind_ppc32_linux_CPPFLAGS     = $(AM_CPPFLAGS_PPC32_LINUX)
+cachegrind_ppc32_linux_CFLAGS       = $(AM_CFLAGS_PPC32_LINUX)
+cachegrind_ppc32_linux_DEPENDENCIES = $(COREGRIND_LIBS_PPC32_LINUX)
+cachegrind_ppc32_linux_LDADD        = $(TOOL_LDADD_PPC32_LINUX)
+cachegrind_ppc32_linux_LDFLAGS      = $(TOOL_LDFLAGS_PPC32_LINUX)
+
+cachegrind_ppc64_linux_SOURCES      = $(CACHEGRIND_SOURCES_COMMON) $(CACHEGRIND_SOURCES_PPC64)
+cachegrind_ppc64_linux_CPPFLAGS     = $(AM_CPPFLAGS_PPC64_LINUX)
+cachegrind_ppc64_linux_CFLAGS       = $(AM_CFLAGS_PPC64_LINUX)
+cachegrind_ppc64_linux_DEPENDENCIES = $(COREGRIND_LIBS_PPC64_LINUX)
+cachegrind_ppc64_linux_LDADD        = $(TOOL_LDADD_PPC64_LINUX)
+cachegrind_ppc64_linux_LDFLAGS      = $(TOOL_LDFLAGS_PPC64_LINUX)
+
+cachegrind_ppc32_aix5_SOURCES      = $(CACHEGRIND_SOURCES_COMMON) $(CACHEGRIND_SOURCES_PPC32)
+cachegrind_ppc32_aix5_CPPFLAGS     = $(AM_CPPFLAGS_PPC32_AIX5)
+cachegrind_ppc32_aix5_CFLAGS       = $(AM_CFLAGS_PPC32_AIX5)
+cachegrind_ppc32_aix5_DEPENDENCIES = $(COREGRIND_LIBS_PPC32_AIX5)
+cachegrind_ppc32_aix5_LDADD        = $(TOOL_LDADD_PPC32_AIX5)
+cachegrind_ppc32_aix5_LDFLAGS      = $(TOOL_LDFLAGS_PPC32_AIX5)
+
+cachegrind_ppc64_aix5_SOURCES      = $(CACHEGRIND_SOURCES_COMMON) $(CACHEGRIND_SOURCES_PPC64)
+cachegrind_ppc64_aix5_CPPFLAGS     = $(AM_CPPFLAGS_PPC64_AIX5)
+cachegrind_ppc64_aix5_CFLAGS       = $(AM_CFLAGS_PPC64_AIX5)
+cachegrind_ppc64_aix5_DEPENDENCIES = $(COREGRIND_LIBS_PPC64_AIX5)
+cachegrind_ppc64_aix5_LDADD        = $(TOOL_LDADD_PPC64_AIX5)
+cachegrind_ppc64_aix5_LDFLAGS      = $(TOOL_LDFLAGS_PPC64_AIX5)
diff --git a/cachegrind/.svn/text-base/cg-amd64.c.svn-base b/cachegrind/.svn/text-base/cg-amd64.c.svn-base
new file mode 100644
index 0000000..9b0c653
--- /dev/null
+++ b/cachegrind/.svn/text-base/cg-amd64.c.svn-base
@@ -0,0 +1,35 @@
+
+/*--------------------------------------------------------------------*/
+/*--- AMD64-specific definitions.                       cg-amd64.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Cachegrind, a Valgrind tool for cache
+   profiling programs.
+
+   Copyright (C) 2002-2009 Nicholas Nethercote
+      njn@valgrind.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "cg-x86.c"
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
diff --git a/cachegrind/.svn/text-base/cg-ppc32.c.svn-base b/cachegrind/.svn/text-base/cg-ppc32.c.svn-base
new file mode 100644
index 0000000..570e208
--- /dev/null
+++ b/cachegrind/.svn/text-base/cg-ppc32.c.svn-base
@@ -0,0 +1,64 @@
+
+/*--------------------------------------------------------------------*/
+/*--- PPC32-specific definitions.                       cg-ppc32.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Cachegrind, a Valgrind tool for cache
+   profiling programs.
+
+   Copyright (C) 2005-2009 Nicholas Nethercote
+      njn@valgrind.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "pub_tool_basics.h"
+#include "pub_tool_libcbase.h"
+#include "pub_tool_libcassert.h"
+#include "pub_tool_libcprint.h"
+
+#include "cg_arch.h"
+
+void VG_(configure_caches)(cache_t* I1c, cache_t* D1c, cache_t* L2c,
+                           Bool all_caches_clo_defined)
+{
+   // Set caches to default.
+   *I1c = (cache_t) {  65536, 2, 64 };
+   *D1c = (cache_t) {  65536, 2, 64 };
+   *L2c = (cache_t) { 262144, 8, 64 };
+
+   // Warn if config not completely specified from cmd line.  Note that
+   // this message is slightly different from the one we give on x86/AMD64
+   // when auto-detection fails;  this lets us filter out this one (which is
+   // not important) in the regression test suite without filtering the
+   // x86/AMD64 one (which we want to see if it ever occurs in the
+   // regression test suite).
+   //
+   // If you change this message, please update
+   // cachegrind/tests/filter_stderr!
+   //
+   if (!all_caches_clo_defined) {
+      VG_DMSG("Warning: Cannot auto-detect cache config on PPC32, using one "
+              "or more defaults ");
+   }
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
diff --git a/cachegrind/.svn/text-base/cg-ppc64.c.svn-base b/cachegrind/.svn/text-base/cg-ppc64.c.svn-base
new file mode 100644
index 0000000..beb1f34
--- /dev/null
+++ b/cachegrind/.svn/text-base/cg-ppc64.c.svn-base
@@ -0,0 +1,64 @@
+
+/*--------------------------------------------------------------------*/
+/*--- PPC64-specific definitions.                       cg-ppc64.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Cachegrind, a Valgrind tool for cache
+   profiling programs.
+
+   Copyright (C) 2005-2009 Nicholas Nethercote
+      njn@valgrind.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "pub_tool_basics.h"
+#include "pub_tool_libcbase.h"
+#include "pub_tool_libcassert.h"
+#include "pub_tool_libcprint.h"
+
+#include "cg_arch.h"
+
+void VG_(configure_caches)(cache_t* I1c, cache_t* D1c, cache_t* L2c,
+                           Bool all_caches_clo_defined)
+{
+   // Set caches to default.
+   *I1c = (cache_t) {  65536, 2, 64 };
+   *D1c = (cache_t) {  65536, 2, 64 };
+   *L2c = (cache_t) { 262144, 8, 64 };
+
+   // Warn if config not completely specified from cmd line.  Note that
+   // this message is slightly different from the one we give on x86/AMD64
+   // when auto-detection fails;  this lets us filter out this one (which is
+   // not important) in the regression test suite without filtering the
+   // x86/AMD64 one (which we want to see if it ever occurs in the
+   // regression test suite).
+   //
+   // If you change this message, please update
+   // cachegrind/tests/filter_stderr!
+   //
+   if (!all_caches_clo_defined) {
+      VG_DMSG("Warning: Cannot auto-detect cache config on PPC64, using one "
+              "or more defaults ");
+   }
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
diff --git a/cachegrind/.svn/text-base/cg-x86.c.svn-base b/cachegrind/.svn/text-base/cg-x86.c.svn-base
new file mode 100644
index 0000000..be5eb82
--- /dev/null
+++ b/cachegrind/.svn/text-base/cg-x86.c.svn-base
@@ -0,0 +1,352 @@
+
+/*--------------------------------------------------------------------*/
+/*--- x86-specific (and AMD64-specific) definitions.      cg-x86.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Cachegrind, a Valgrind tool for cache
+   profiling programs.
+
+   Copyright (C) 2002-2009 Nicholas Nethercote
+      njn@valgrind.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "pub_tool_basics.h"
+#include "pub_tool_cpuid.h"
+#include "pub_tool_libcbase.h"
+#include "pub_tool_libcassert.h"
+#include "pub_tool_libcprint.h"
+
+#include "cg_arch.h"
+
+// All CPUID info taken from sandpile.org/a32/cpuid.htm */
+// Probably only works for Intel and AMD chips, and probably only for some of
+// them. 
+
+static void micro_ops_warn(Int actual_size, Int used_size, Int line_size)
+{
+   VG_DMSG("warning: Pentium 4 with %d KB micro-op instruction trace cache", 
+           actual_size);
+   VG_DMSG("         Simulating a %d KB I-cache with %d B lines", 
+           used_size, line_size);
+}
+
+/* Intel method is truly wretched.  We have to do an insane indexing into an
+ * array of pre-defined configurations for various parts of the memory
+ * hierarchy.
+ * According to Intel Processor Identification, App Note 485.
+ */
+static
+Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* L2c)
+{
+   Int cpuid1_eax;
+   Int cpuid1_ignore;
+   Int family;
+   Int model;
+   UChar info[16];
+   Int   i, trials;
+   Bool  L2_found = False;
+
+   if (level < 2) {
+      VG_DMSG("warning: CPUID level < 2 for Intel processor (%d)", level);
+      return -1;
+   }
+
+   /* family/model needed to distinguish code reuse (currently 0x49) */
+   VG_(cpuid)(1, &cpuid1_eax, &cpuid1_ignore,
+	      &cpuid1_ignore, &cpuid1_ignore);
+   family = (((cpuid1_eax >> 20) & 0xff) << 4) + ((cpuid1_eax >> 8) & 0xf);
+   model =  (((cpuid1_eax >> 16) & 0xf) << 4) + ((cpuid1_eax >> 4) & 0xf);
+
+   VG_(cpuid)(2, (Int*)&info[0], (Int*)&info[4], 
+                 (Int*)&info[8], (Int*)&info[12]);
+   trials  = info[0] - 1;   /* AL register - bits 0..7 of %eax */
+   info[0] = 0x0;           /* reset AL */
+
+   if (0 != trials) {
+      VG_DMSG("warning: non-zero CPUID trials for Intel processor (%d)",
+              trials);
+      return -1;
+   }
+
+   for (i = 0; i < 16; i++) {
+
+      switch (info[i]) {
+
+      case 0x0:       /* ignore zeros */
+          break;
+          
+      /* TLB info, ignore */
+      case 0x01: case 0x02: case 0x03: case 0x04: case 0x05:
+      case 0x4f: case 0x50: case 0x51: case 0x52:
+      case 0x56: case 0x57: case 0x59:
+      case 0x5b: case 0x5c: case 0x5d:
+      case 0xb0: case 0xb1:
+      case 0xb3: case 0xb4: case 0xba: case 0xc0:
+          break;      
+
+      case 0x06: *I1c = (cache_t) {  8, 4, 32 }; break;
+      case 0x08: *I1c = (cache_t) { 16, 4, 32 }; break;
+      case 0x30: *I1c = (cache_t) { 32, 8, 64 }; break;
+
+      case 0x0a: *D1c = (cache_t) {  8, 2, 32 }; break;
+      case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break;
+      case 0x0e: *D1c = (cache_t) { 24, 6, 64 }; break;
+      case 0x2c: *D1c = (cache_t) { 32, 8, 64 }; break;
+
+      /* IA-64 info -- panic! */
+      case 0x10: case 0x15: case 0x1a: 
+      case 0x88: case 0x89: case 0x8a: case 0x8d:
+      case 0x90: case 0x96: case 0x9b:
+         VG_(tool_panic)("IA-64 cache detected?!");
+
+      case 0x22: case 0x23: case 0x25: case 0x29:
+      case 0x46: case 0x47: case 0x4a: case 0x4b: case 0x4c: case 0x4d:
+          VG_DMSG("warning: L3 cache detected but ignored");
+          break;
+
+      /* These are sectored, whatever that means */
+      case 0x39: *L2c = (cache_t) {  128, 4, 64 }; L2_found = True; break;
+      case 0x3c: *L2c = (cache_t) {  256, 4, 64 }; L2_found = True; break;
+
+      /* If a P6 core, this means "no L2 cache".  
+         If a P4 core, this means "no L3 cache".
+         We don't know what core it is, so don't issue a warning.  To detect
+         a missing L2 cache, we use 'L2_found'. */
+      case 0x40:
+          break;
+
+      case 0x41: *L2c = (cache_t) {  128, 4, 32 }; L2_found = True; break;
+      case 0x42: *L2c = (cache_t) {  256, 4, 32 }; L2_found = True; break;
+      case 0x43: *L2c = (cache_t) {  512, 4, 32 }; L2_found = True; break;
+      case 0x44: *L2c = (cache_t) { 1024, 4, 32 }; L2_found = True; break;
+      case 0x45: *L2c = (cache_t) { 2048, 4, 32 }; L2_found = True; break;
+      case 0x48: *L2c = (cache_t) { 3072,12, 64 }; L2_found = True; break;
+      case 0x49:
+	  if ((family == 15) && (model == 6))
+	      /* On Xeon MP (family F, model 6), this is for L3 */
+	      VG_DMSG("warning: L3 cache detected but ignored");
+	  else
+	      *L2c = (cache_t) { 4096, 16, 64 }; L2_found = True;
+	  break;
+      case 0x4e: *L2c = (cache_t) { 6144, 24, 64 }; L2_found = True; break;
+
+      /* These are sectored, whatever that means */
+      case 0x60: *D1c = (cache_t) { 16, 8, 64 };  break;      /* sectored */
+      case 0x66: *D1c = (cache_t) {  8, 4, 64 };  break;      /* sectored */
+      case 0x67: *D1c = (cache_t) { 16, 4, 64 };  break;      /* sectored */
+      case 0x68: *D1c = (cache_t) { 32, 4, 64 };  break;      /* sectored */
+
+      /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
+       * conversion to byte size is a total guess;  treat the 12K and 16K
+       * cases the same since the cache byte size must be a power of two for
+       * everything to work!.  Also guessing 32 bytes for the line size... 
+       */
+      case 0x70:    /* 12K micro-ops, 8-way */
+         *I1c = (cache_t) { 16, 8, 32 };  
+         micro_ops_warn(12, 16, 32);
+         break;  
+      case 0x71:    /* 16K micro-ops, 8-way */
+         *I1c = (cache_t) { 16, 8, 32 };  
+         micro_ops_warn(16, 16, 32); 
+         break;  
+      case 0x72:    /* 32K micro-ops, 8-way */
+         *I1c = (cache_t) { 32, 8, 32 };  
+         micro_ops_warn(32, 32, 32); 
+         break;  
+
+      /* These are sectored, whatever that means */
+      case 0x79: *L2c = (cache_t) {  128, 8,  64 }; L2_found = True;  break;
+      case 0x7a: *L2c = (cache_t) {  256, 8,  64 }; L2_found = True;  break;
+      case 0x7b: *L2c = (cache_t) {  512, 8,  64 }; L2_found = True;  break;
+      case 0x7c: *L2c = (cache_t) { 1024, 8,  64 }; L2_found = True;  break;
+      case 0x7d: *L2c = (cache_t) { 2048, 8,  64 }; L2_found = True;  break;
+      case 0x7e: *L2c = (cache_t) {  256, 8, 128 }; L2_found = True;  break;
+
+      case 0x7f: *L2c = (cache_t) {  512, 2, 64 };  L2_found = True;  break;
+      case 0x80: *L2c = (cache_t) {  512, 8, 64 };  L2_found = True;  break;
+
+      case 0x81: *L2c = (cache_t) {  128, 8, 32 };  L2_found = True;  break;
+      case 0x82: *L2c = (cache_t) {  256, 8, 32 };  L2_found = True;  break;
+      case 0x83: *L2c = (cache_t) {  512, 8, 32 };  L2_found = True;  break;
+      case 0x84: *L2c = (cache_t) { 1024, 8, 32 };  L2_found = True;  break;
+      case 0x85: *L2c = (cache_t) { 2048, 8, 32 };  L2_found = True;  break;
+      case 0x86: *L2c = (cache_t) {  512, 4, 64 };  L2_found = True;  break;
+      case 0x87: *L2c = (cache_t) { 1024, 8, 64 };  L2_found = True;  break;
+
+      /* Ignore prefetch information */
+      case 0xf0: case 0xf1:
+         break;
+
+      default:
+         VG_DMSG("warning: Unknown Intel cache config value (0x%x), ignoring",
+                 info[i]);
+         break;
+      }
+   }
+
+   if (!L2_found)
+      VG_DMSG("warning: L2 cache not installed, ignore L2 results.");
+
+   return 0;
+}
+
+/* AMD method is straightforward, just extract appropriate bits from the
+ * result registers.
+ *
+ * Bits, for D1 and I1:
+ *  31..24  data L1 cache size in KBs    
+ *  23..16  data L1 cache associativity (FFh=full)    
+ *  15.. 8  data L1 cache lines per tag    
+ *   7.. 0  data L1 cache line size in bytes
+ *
+ * Bits, for L2:
+ *  31..16  unified L2 cache size in KBs
+ *  15..12  unified L2 cache associativity (0=off, FFh=full)
+ *  11.. 8  unified L2 cache lines per tag    
+ *   7.. 0  unified L2 cache line size in bytes
+ *
+ * #3  The AMD K7 processor's L2 cache must be configured prior to relying 
+ *     upon this information. (Whatever that means -- njn)
+ *
+ * Also, according to Cyrille Chepelov, Duron stepping A0 processors (model
+ * 0x630) have a bug and misreport their L2 size as 1KB (it's really 64KB),
+ * so we detect that.
+ * 
+ * Returns 0 on success, non-zero on failure.
+ */
+static
+Int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* L2c)
+{
+   UInt ext_level;
+   UInt dummy, model;
+   UInt I1i, D1i, L2i;
+   
+   VG_(cpuid)(0x80000000, &ext_level, &dummy, &dummy, &dummy);
+
+   if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) {
+      VG_DMSG("warning: ext_level < 0x80000006 for AMD processor (0x%x)", 
+              ext_level);
+      return -1;
+   }
+
+   VG_(cpuid)(0x80000005, &dummy, &dummy, &D1i, &I1i);
+   VG_(cpuid)(0x80000006, &dummy, &dummy, &L2i, &dummy);
+
+   VG_(cpuid)(0x1, &model, &dummy, &dummy, &dummy);
+
+   /* Check for Duron bug */
+   if (model == 0x630) {
+      VG_DMSG("warning: Buggy Duron stepping A0. Assuming L2 size=65536 bytes");
+      L2i = (64 << 16) | (L2i & 0xffff);
+   }
+
+   D1c->size      = (D1i >> 24) & 0xff;
+   D1c->assoc     = (D1i >> 16) & 0xff;
+   D1c->line_size = (D1i >>  0) & 0xff;
+
+   I1c->size      = (I1i >> 24) & 0xff;
+   I1c->assoc     = (I1i >> 16) & 0xff;
+   I1c->line_size = (I1i >>  0) & 0xff;
+
+   L2c->size      = (L2i >> 16) & 0xffff; /* Nb: different bits used for L2 */
+   L2c->assoc     = (L2i >> 12) & 0xf;
+   L2c->line_size = (L2i >>  0) & 0xff;
+
+   return 0;
+}
+
+static 
+Int get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* L2c)
+{
+   Int  level, ret;
+   Char vendor_id[13];
+
+   if (!VG_(has_cpuid)()) {
+      VG_DMSG("CPUID instruction not supported");
+      return -1;
+   }
+
+   VG_(cpuid)(0, &level, (int*)&vendor_id[0], 
+	      (int*)&vendor_id[8], (int*)&vendor_id[4]);    
+   vendor_id[12] = '\0';
+
+   if (0 == level) {
+      VG_DMSG("CPUID level is 0, early Pentium?");
+      return -1;
+   }
+
+   /* Only handling Intel and AMD chips... no Cyrix, Transmeta, etc */
+   if (0 == VG_(strcmp)(vendor_id, "GenuineIntel")) {
+      ret = Intel_cache_info(level, I1c, D1c, L2c);
+
+   } else if (0 == VG_(strcmp)(vendor_id, "AuthenticAMD")) {
+      ret = AMD_cache_info(I1c, D1c, L2c);
+
+   } else if (0 == VG_(strcmp)(vendor_id, "CentaurHauls")) {
+      /* Total kludge.  Pretend to be a VIA Nehemiah. */
+      D1c->size      = 64;
+      D1c->assoc     = 16;
+      D1c->line_size = 16;
+      I1c->size      = 64;
+      I1c->assoc     = 4;
+      I1c->line_size = 16;
+      L2c->size      = 64;
+      L2c->assoc     = 16;
+      L2c->line_size = 16;
+      ret = 0;
+
+   } else {
+      VG_DMSG("CPU vendor ID not recognised (%s)", vendor_id);
+      return -1;
+   }
+
+   /* Successful!  Convert sizes from KB to bytes */
+   I1c->size *= 1024;
+   D1c->size *= 1024;
+   L2c->size *= 1024;
+      
+   return ret;
+}
+
+
+void VG_(configure_caches)(cache_t* I1c, cache_t* D1c, cache_t* L2c,
+                           Bool all_caches_clo_defined)
+{
+   Int res;
+   
+   // Set caches to default.
+   *I1c = (cache_t) {  65536, 2, 64 };
+   *D1c = (cache_t) {  65536, 2, 64 };
+   *L2c = (cache_t) { 262144, 8, 64 };
+
+   // Then replace with any info we can get from CPUID.
+   res = get_caches_from_CPUID(I1c, D1c, L2c);
+
+   // Warn if CPUID failed and config not completely specified from cmd line.
+   if (res != 0 && !all_caches_clo_defined) {
+      VG_DMSG("Warning: Couldn't auto-detect cache config, using one "
+              "or more defaults ");
+   }
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
diff --git a/cachegrind/.svn/text-base/cg_annotate.in.svn-base b/cachegrind/.svn/text-base/cg_annotate.in.svn-base
new file mode 100644
index 0000000..31e9506
--- /dev/null
+++ b/cachegrind/.svn/text-base/cg_annotate.in.svn-base
@@ -0,0 +1,905 @@
+#! @PERL@
+
+##--------------------------------------------------------------------##
+##--- Cachegrind's annotator.                       cg_annotate.in ---##
+##--------------------------------------------------------------------##
+
+#  This file is part of Cachegrind, a Valgrind tool for cache
+#  profiling programs.
+#
+#  Copyright (C) 2002-2005 Nicholas Nethercote
+#     njn@valgrind.org
+#
+#  This program is free software; you can redistribute it and/or
+#  modify it under the terms of the GNU General Public License as
+#  published by the Free Software Foundation; either version 2 of the
+#  License, or (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful, but
+#  WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#  General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software
+#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+#  02111-1307, USA.
+#
+#  The GNU General Public License is contained in the file COPYING.
+
+#----------------------------------------------------------------------------
+# The file format is simple, basically printing the cost centre for every
+# source line, grouped by files and functions.  The details are in
+# Cachegrind's manual.
+
+#----------------------------------------------------------------------------
+# Performance improvements record, using cachegrind.out for cacheprof, doing no
+# source annotation (irrelevant ones removed):
+#                                                               user time
+# 1. turned off warnings in add_hash_a_to_b()                   3.81 --> 3.48s
+#    [now add_array_a_to_b()]
+# 6. make line_to_CC() return a ref instead of a hash           3.01 --> 2.77s
+#
+#10. changed file format to avoid file/fn name repetition       2.40s
+#    (not sure why higher;  maybe due to new '.' entries?)
+#11. changed file format to drop unnecessary end-line "."s      2.36s
+#    (shrunk file by about 37%)
+#12. switched from hash CCs to array CCs                        1.61s
+#13. only adding b[i] to a[i] if b[i] defined (was doing it if
+#    either a[i] or b[i] was defined, but if b[i] was undefined
+#    it just added 0)                                           1.48s
+#14. Stopped converting "." entries to undef and then back      1.16s
+#15. Using foreach $i (x..y) instead of for ($i = 0...) in
+#    add_array_a_to_b()                                         1.11s
+#
+# Auto-annotating primes:
+#16. Finding count lengths by int((length-1)/3), not by
+#    commifying (halves the number of commify calls)            1.68s --> 1.47s
+
+use warnings;
+use strict;
+
+#----------------------------------------------------------------------------
+# Overview: the running example in the comments is for:
+#   - events = A,B,C,D
+#   - --show=C,A,D
+#   - --sort=D,C
+#----------------------------------------------------------------------------
+
+#----------------------------------------------------------------------------
+# Global variables, main data structures
+#----------------------------------------------------------------------------
+# CCs are arrays, the counts corresponding to @events, with 'undef'
+# representing '.'.  This makes things fast (faster than using hashes for CCs)
+# but we have to use @sort_order and @show_order below to handle the --sort and
+# --show options, which is a bit tricky.
+#----------------------------------------------------------------------------
+
+# Total counts for summary (an array reference).
+my $summary_CC;
+
+# Totals for each function, for overall summary.
+# hash(filename:fn_name => CC array)
+my %fn_totals;
+
+# Individual CCs, organised by filename and line_num for easy annotation.
+# hash(filename => hash(line_num => CC array))
+my %all_ind_CCs;
+
+# Files chosen for annotation on the command line.  
+# key = basename (trimmed of any directory), value = full filename
+my %user_ann_files;
+
+# Generic description string.
+my $desc = "";
+
+# Command line of profiled program.
+my $cmd;
+
+# Events in input file, eg. (A,B,C,D)
+my @events;
+
+# Events to show, from command line, eg. (C,A,D)
+my @show_events;
+
+# Map from @show_events indices to @events indices, eg. (2,0,3).  Gives the
+# order in which we must traverse @events in order to show the @show_events, 
+# eg. (@events[$show_order[1]], @events[$show_order[2]]...) = @show_events.
+# (Might help to think of it like a hash (0 => 2, 1 => 0, 2 => 3).)
+my @show_order;
+
+# Print out the function totals sorted by these events, eg. (D,C).
+my @sort_events;
+
+# Map from @sort_events indices to @events indices, eg. (3,2).  Same idea as
+# for @show_order.
+my @sort_order;
+
+# Thresholds, one for each sort event (or default to 1 if no sort events
+# specified).  We print out functions and do auto-annotations until we've
+# handled this proportion of all the events thresholded.
+my @thresholds;
+
+my $default_threshold = 99;
+
+my $single_threshold  = $default_threshold;
+
+# If on, automatically annotates all files that are involved in getting over
+# all the threshold counts.
+my $auto_annotate = 0;
+
+# Number of lines to show around each annotated line.
+my $context = 8;
+
+# Directories in which to look for annotation files.
+my @include_dirs = ("");
+
+# Input file name
+my $input_file = undef;
+
+# Version number
+my $version = "@VERSION@";
+
+# Usage message.
+my $usage = <<END
+usage: cg_annotate [options] output-file [source-files]
+
+  options for the user, with defaults in [ ], are:
+    -h --help             show this message
+    -v --version          show version
+    --show=A,B,C          only show figures for events A,B,C [all]
+    --sort=A,B,C          sort columns by events A,B,C [event column order]
+    --threshold=<0--100>  percentage of counts (of primary sort event) we
+                          are interested in [$default_threshold%]
+    --auto=yes|no         annotate all source files containing functions
+                          that helped reach the event count threshold [no]
+    --context=N           print N lines of context before and after
+                          annotated lines [8]
+    -I<d> --include=<d>   add <d> to list of directories to search for 
+                          source files
+
+  cg_annotate is Copyright (C) 2002-2007 Nicholas Nethercote.
+  and licensed under the GNU General Public License, version 2.
+  Bug reports, feedback, admiration, abuse, etc, to: njn\@valgrind.org.
+                                                
+END
+;
+
+# Used in various places of output.
+my $fancy = '-' x 80 . "\n";
+
+#-----------------------------------------------------------------------------
+# Argument and option handling
+#-----------------------------------------------------------------------------
+sub process_cmd_line() 
+{
+    for my $arg (@ARGV) { 
+
+        # Option handling
+        if ($arg =~ /^-/) {
+
+            # --version
+            if ($arg =~ /^-v$|^--version$/) {
+                die("cg_annotate-$version\n");
+
+            # --show=A,B,C
+            } elsif ($arg =~ /^--show=(.*)$/) {
+                @show_events = split(/,/, $1);
+
+            # --sort=A,B,C
+            #   Nb: You can specify thresholds individually, eg.
+            #   --sort=A:99,B:95,C:90.  These will override any --threshold
+            #   argument.
+            } elsif ($arg =~ /^--sort=(.*)$/) {
+                @sort_events = split(/,/, $1);
+                my $th_specified = 0;
+                foreach my $i (0 .. scalar @sort_events - 1) {
+                    if ($sort_events[$i] =~ /.*:([\d\.]+)%?$/) {
+                        my $th = $1;
+                        ($th >= 0 && $th <= 100) or die($usage);
+                        $sort_events[$i] =~ s/:.*//;
+                        $thresholds[$i] = $th;
+                        $th_specified = 1;
+                    } else {
+                        $thresholds[$i] = 0;
+                    }
+                }
+                if (not $th_specified) {
+                    @thresholds = ();
+                }
+
+            # --threshold=X (tolerates a trailing '%')
+            } elsif ($arg =~ /^--threshold=([\d\.]+)%?$/) {
+                $single_threshold = $1;
+                ($1 >= 0 && $1 <= 100) or die($usage);
+
+            # --auto=yes|no
+            } elsif ($arg =~ /^--auto=yes$/) {
+                $auto_annotate = 1;
+            } elsif ($arg =~ /^--auto=no$/) {
+                $auto_annotate = 0;
+
+            # --context=N
+            } elsif ($arg =~ /^--context=([\d\.]+)$/) {
+                $context = $1;
+                if ($context < 0) {
+                    die($usage);
+                }
+
+            # We don't handle "-I name" -- there can be no space.
+            } elsif ($arg =~ /^-I$/) {
+                die("Sorry, no space is allowed after a -I flag\n");
+            
+            # --include=A,B,C.  Allow -I=name for backwards compatibility.
+            } elsif ($arg =~ /^(-I=|-I|--include=)(.*)$/) {
+                my $inc = $2;
+                $inc =~ s|/$||;         # trim trailing '/'
+                push(@include_dirs, "$inc/");
+
+            } else {            # -h and --help fall under this case
+                die($usage);
+            }
+
+        # Argument handling -- annotation file checking and selection.
+        # Stick filenames into a hash for quick 'n easy lookup throughout.
+        } else {
+            if (not defined $input_file) {
+                # First non-option argument is the output file.
+                $input_file = $arg;
+            } else {
+                # Subsequent non-option arguments are source files.
+                my $readable = 0;
+                foreach my $include_dir (@include_dirs) {
+                    if (-r $include_dir . $arg) {
+                        $readable = 1;
+                    }
+                }
+                $readable or die("File $arg not found in any of: @include_dirs\n");
+                $user_ann_files{$arg} = 1;
+            }
+        }
+    }
+
+    # Must have chosen an input file
+    if (not defined $input_file) {
+        die($usage);
+    }
+}
+
+#-----------------------------------------------------------------------------
+# Reading of input file
+#-----------------------------------------------------------------------------
+sub max ($$) 
+{
+    my ($x, $y) = @_;
+    return ($x > $y ? $x : $y);
+}
+
+# Add the two arrays;  any '.' entries are ignored.  Two tricky things:
+# 1. If $a2->[$i] is undefined, it defaults to 0 which is what we want; we turn
+#    off warnings to allow this.  This makes things about 10% faster than
+#    checking for definedness ourselves.
+# 2. We don't add an undefined count or a ".", even though it's value is 0,
+#    because we don't want to make an $a2->[$i] that is undef become 0
+#    unnecessarily.
+sub add_array_a_to_b ($$) 
+{
+    my ($a1, $a2) = @_;
+
+    my $n = max(scalar @$a1, scalar @$a2);
+    $^W = 0;
+    foreach my $i (0 .. $n-1) {
+        $a2->[$i] += $a1->[$i] if (defined $a1->[$i] && "." ne $a1->[$i]);
+    }
+    $^W = 1;
+}
+
+# Add each event count to the CC array.  '.' counts become undef, as do
+# missing entries (implicitly).
+sub line_to_CC ($)
+{
+    my @CC = (split /\s+/, $_[0]);
+    (@CC <= @events) or die("Line $.: too many event counts\n");
+    return \@CC;
+}
+
+sub read_input_file() 
+{
+    open(INPUTFILE, "< $input_file") 
+         || die "Cannot open $input_file for reading\n";
+
+    # Read "desc:" lines.
+    my $line;
+    while ($line = <INPUTFILE>) {
+        if ($line =~ s/desc:\s+//) {
+            $desc .= $line;
+        } else {
+            last;
+        }
+    }
+
+    # Read "cmd:" line (Nb: will already be in $line from "desc:" loop above).
+    ($line =~ s/^cmd:\s+//) or die("Line $.: missing command line\n");
+    $cmd = $line;
+    chomp($cmd);    # Remove newline
+
+    # Read "events:" line.  We make a temporary hash in which the Nth event's
+    # value is N, which is useful for handling --show/--sort options below.
+    $line = <INPUTFILE>;
+    (defined $line && $line =~ s/^events:\s+//) 
+        or die("Line $.: missing events line\n");
+    @events = split(/\s+/, $line);
+    my %events;
+    my $n = 0;
+    foreach my $event (@events) {
+        $events{$event} = $n;
+        $n++
+    }
+
+    # If no --show arg give, default to showing all events in the file.
+    # If --show option is used, check all specified events appeared in the
+    # "events:" line.  Then initialise @show_order.
+    if (@show_events) {
+        foreach my $show_event (@show_events) {
+            (defined $events{$show_event}) or 
+                die("--show event `$show_event' did not appear in input\n");
+        }
+    } else {
+        @show_events = @events;
+    }
+    foreach my $show_event (@show_events) {
+        push(@show_order, $events{$show_event});
+    }
+
+    # Do as for --show, but if no --sort arg given, default to sorting by
+    # column order (ie. first column event is primary sort key, 2nd column is
+    # 2ndary key, etc).
+    if (@sort_events) {
+        foreach my $sort_event (@sort_events) {
+            (defined $events{$sort_event}) or 
+                die("--sort event `$sort_event' did not appear in input\n");
+        }
+    } else {
+        @sort_events = @events;
+    }
+    foreach my $sort_event (@sort_events) {
+        push(@sort_order, $events{$sort_event});
+    }
+
+    # If multiple threshold args weren't given via --sort, stick in the single
+    # threshold (either from --threshold if used, or the default otherwise) for
+    # the primary sort event, and 0% for the rest.
+    if (not @thresholds) {
+        foreach my $e (@sort_order) {
+            push(@thresholds, 0);
+        }
+        $thresholds[0] = $single_threshold;
+    }
+
+    my $curr_file;
+    my $curr_fn;
+    my $curr_name;
+
+    my $curr_fn_CC = [];
+    my $curr_file_ind_CCs = {};     # hash(line_num => CC)
+
+    # Read body of input file.
+    while (<INPUTFILE>) {
+        s/#.*$//;   # remove comments
+        if (s/^(\d+)\s+//) {
+            my $line_num = $1;
+            my $CC = line_to_CC($_);
+            add_array_a_to_b($CC, $curr_fn_CC);
+            
+            # If curr_file is selected, add CC to curr_file list.  We look for
+            # full filename matches;  or, if auto-annotating, we have to
+            # remember everything -- we won't know until the end what's needed.
+            if ($auto_annotate || defined $user_ann_files{$curr_file}) {
+                my $tmp = $curr_file_ind_CCs->{$line_num};
+                $tmp = [] unless defined $tmp;
+                add_array_a_to_b($CC, $tmp);
+                $curr_file_ind_CCs->{$line_num} = $tmp;
+            }
+
+        } elsif (s/^fn=(.*)$//) {
+            # Commit result from previous function
+            $fn_totals{$curr_name} = $curr_fn_CC if (defined $curr_name);
+
+            # Setup new one
+            $curr_fn = $1;
+            $curr_name = "$curr_file:$curr_fn";
+            $curr_fn_CC = $fn_totals{$curr_name};
+            $curr_fn_CC = [] unless (defined $curr_fn_CC);
+
+        } elsif (s/^fl=(.*)$//) {
+            $all_ind_CCs{$curr_file} = $curr_file_ind_CCs 
+                if (defined $curr_file);
+
+            $curr_file = $1;
+            $curr_file_ind_CCs = $all_ind_CCs{$curr_file};
+            $curr_file_ind_CCs = {} unless (defined $curr_file_ind_CCs);
+
+        } elsif (s/^\s*$//) {
+            # blank, do nothing
+        
+        } elsif (s/^summary:\s+//) {
+            # Finish up handling final filename/fn_name counts
+            $fn_totals{"$curr_file:$curr_fn"} = $curr_fn_CC 
+                if (defined $curr_file && defined $curr_fn);
+            $all_ind_CCs{$curr_file} = 
+                $curr_file_ind_CCs if (defined $curr_file);
+
+            $summary_CC = line_to_CC($_);
+            (scalar(@$summary_CC) == @events) 
+                or die("Line $.: summary event and total event mismatch\n");
+
+        } else {
+            warn("WARNING: line $. malformed, ignoring\n");
+        }
+    }
+
+    # Check if summary line was present
+    if (not defined $summary_CC) {
+        die("missing final summary line, aborting\n");
+    }
+
+    close(INPUTFILE);
+}
+
+#-----------------------------------------------------------------------------
+# Print options used
+#-----------------------------------------------------------------------------
+sub print_options ()
+{
+    print($fancy);
+    print($desc);
+    print("Command:          $cmd\n");
+    print("Data file:        $input_file\n");
+    print("Events recorded:  @events\n");
+    print("Events shown:     @show_events\n");
+    print("Event sort order: @sort_events\n");
+    print("Thresholds:       @thresholds\n");
+
+    my @include_dirs2 = @include_dirs;  # copy @include_dirs
+    shift(@include_dirs2);       # remove "" entry, which is always the first
+    unshift(@include_dirs2, "") if (0 == @include_dirs2); 
+    my $include_dir = shift(@include_dirs2);
+    print("Include dirs:     $include_dir\n");
+    foreach my $include_dir (@include_dirs2) {
+        print("                  $include_dir\n");
+    }
+
+    my @user_ann_files = keys %user_ann_files;
+    unshift(@user_ann_files, "") if (0 == @user_ann_files); 
+    my $user_ann_file = shift(@user_ann_files);
+    print("User annotated:   $user_ann_file\n");
+    foreach $user_ann_file (@user_ann_files) {
+        print("                  $user_ann_file\n");
+    }
+
+    my $is_on = ($auto_annotate ? "on" : "off");
+    print("Auto-annotation:  $is_on\n");
+    print("\n");
+}
+
+#-----------------------------------------------------------------------------
+# Print summary and sorted function totals
+#-----------------------------------------------------------------------------
+sub mycmp ($$) 
+{
+    my ($c, $d) = @_;
+
+    # Iterate through sort events (eg. 3,2); return result if two are different
+    foreach my $i (@sort_order) {
+        my ($x, $y);
+        $x = $c->[$i];
+        $y = $d->[$i];
+        $x = -1 unless defined $x;
+        $y = -1 unless defined $y;
+
+        my $cmp = $y <=> $x;        # reverse sort
+        if (0 != $cmp) {
+            return $cmp;
+        }
+    }
+    # Exhausted events, equal
+    return 0;
+}
+
+sub commify ($) {
+    my ($val) = @_;
+    1 while ($val =~ s/^(\d+)(\d{3})/$1,$2/);
+    return $val;
+}
+
+# Because the counts can get very big, and we don't want to waste screen space
+# and make lines too long, we compute exactly how wide each column needs to be
+# by finding the widest entry for each one.
+sub compute_CC_col_widths (@) 
+{
+    my @CCs = @_;
+    my $CC_col_widths = [];
+
+    # Initialise with minimum widths (from event names)
+    foreach my $event (@events) {
+        push(@$CC_col_widths, length($event));
+    }
+    
+    # Find maximum width count for each column.  @CC_col_width positions
+    # correspond to @CC positions.
+    foreach my $CC (@CCs) {
+        foreach my $i (0 .. scalar(@$CC)-1) {
+            if (defined $CC->[$i]) {
+                # Find length, accounting for commas that will be added
+                my $length = length $CC->[$i];
+                my $clength = $length + int(($length - 1) / 3);
+                $CC_col_widths->[$i] = max($CC_col_widths->[$i], $clength); 
+            }
+        }
+    }
+    return $CC_col_widths;
+}
+
+# Print the CC with each column's size dictated by $CC_col_widths.
+sub print_CC ($$) 
+{
+    my ($CC, $CC_col_widths) = @_;
+
+    foreach my $i (@show_order) {
+        my $count = (defined $CC->[$i] ? commify($CC->[$i]) : ".");
+        my $space = ' ' x ($CC_col_widths->[$i] - length($count));
+        print("$space$count ");
+    }
+}
+
+sub print_events ($)
+{
+    my ($CC_col_widths) = @_;
+
+    foreach my $i (@show_order) { 
+        my $event       = $events[$i];
+        my $event_width = length($event);
+        my $col_width   = $CC_col_widths->[$i];
+        my $space       = ' ' x ($col_width - $event_width);
+        print("$space$event ");
+    }
+}
+
+# Prints summary and function totals (with separate column widths, so that
+# function names aren't pushed over unnecessarily by huge summary figures).
+# Also returns a hash containing all the files that are involved in getting the
+# events count above the thresholds (ie. all the interesting ones).
+sub print_summary_and_fn_totals ()
+{
+    my @fn_fullnames = keys   %fn_totals;
+
+    # Work out the size of each column for printing (summary and functions
+    # separately).
+    my $summary_CC_col_widths = compute_CC_col_widths($summary_CC);
+    my      $fn_CC_col_widths = compute_CC_col_widths(values %fn_totals);
+
+    # Header and counts for summary
+    print($fancy);
+    print_events($summary_CC_col_widths);
+    print("\n");
+    print($fancy);
+    print_CC($summary_CC, $summary_CC_col_widths);
+    print(" PROGRAM TOTALS\n");
+    print("\n");
+
+    # Header for functions
+    print($fancy);
+    print_events($fn_CC_col_widths);
+    print(" file:function\n");
+    print($fancy);
+
+    # Sort function names into order dictated by --sort option.
+    @fn_fullnames = sort {
+        mycmp($fn_totals{$a}, $fn_totals{$b})
+    } @fn_fullnames;
+
+
+    # Assertion
+    (scalar @sort_order == scalar @thresholds) or 
+        die("sort_order length != thresholds length:\n",
+            "  @sort_order\n  @thresholds\n");
+
+    my $threshold_files       = {};
+    # @curr_totals has the same shape as @sort_order and @thresholds
+    my @curr_totals = ();
+    foreach my $e (@thresholds) {
+        push(@curr_totals, 0);
+    }
+
+    # Print functions, stopping when the threshold has been reached.
+    foreach my $fn_name (@fn_fullnames) {
+
+        # Stop when we've reached all the thresholds
+        my $reached_all_thresholds = 1;
+        foreach my $i (0 .. scalar @thresholds - 1) {
+            my $prop = $curr_totals[$i] * 100 / $summary_CC->[$sort_order[$i]];
+            $reached_all_thresholds &&= ($prop >= $thresholds[$i]);
+        }
+        last if $reached_all_thresholds;
+
+        # Print function results
+        my $fn_CC = $fn_totals{$fn_name};
+        print_CC($fn_CC, $fn_CC_col_widths);
+        print(" $fn_name\n");
+
+        # Update the threshold counts
+        my $filename = $fn_name;
+        $filename =~ s/:.+$//;    # remove function name
+        $threshold_files->{$filename} = 1;
+        foreach my $i (0 .. scalar @sort_order - 1) {
+            $curr_totals[$i] += $fn_CC->[$sort_order[$i]] 
+                if (defined $fn_CC->[$sort_order[$i]]);
+        }
+    }
+    print("\n");
+
+    return $threshold_files;
+}
+
+#-----------------------------------------------------------------------------
+# Annotate selected files
+#-----------------------------------------------------------------------------
+
+# Issue a warning that the source file is more recent than the input file. 
+sub warning_on_src_more_recent_than_inputfile ($)
+{
+    my $src_file = $_[0];
+
+    my $warning = <<END
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@ Source file '$src_file' is more recent than input file '$input_file'.
+@ Annotations may not be correct.
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+END
+;
+    print($warning);
+}
+
+# If there is information about lines not in the file, issue a warning
+# explaining possible causes.
+sub warning_on_nonexistent_lines ($$$)
+{
+    my ($src_more_recent_than_inputfile, $src_file, $excess_line_nums) = @_;
+    my $cause_and_solution;
+
+    if ($src_more_recent_than_inputfile) {
+        $cause_and_solution = <<END
+@@ cause:    '$src_file' has changed since information was gathered.
+@@           If so, a warning will have already been issued about this.
+@@ solution: Recompile program and rerun under "valgrind --cachesim=yes" to 
+@@           gather new information.
+END
+    # We suppress warnings about .h files
+    } elsif ($src_file =~ /\.h$/) {
+        $cause_and_solution = <<END
+@@ cause:    bug in the Valgrind's debug info reader that screws up with .h
+@@           files sometimes
+@@ solution: none, sorry
+END
+    } else {
+        $cause_and_solution = <<END
+@@ cause:    not sure, sorry
+END
+    }
+
+    my $warning = <<END
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@@
+@@ Information recorded about lines past the end of '$src_file'.
+@@
+@@ Probable cause and solution:
+$cause_and_solution@@
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+END
+;
+    print($warning);
+}
+
+sub annotate_ann_files($)
+{
+    my ($threshold_files) = @_; 
+
+    my %all_ann_files;
+    my @unfound_auto_annotate_files;
+    my $printed_totals_CC = [];
+
+    # If auto-annotating, add interesting files (but not "???")
+    if ($auto_annotate) {
+        delete $threshold_files->{"???"};
+        %all_ann_files = (%user_ann_files, %$threshold_files) 
+    } else {
+        %all_ann_files = %user_ann_files;
+    }
+
+    # Track if we did any annotations.
+    my $did_annotations = 0;
+
+    LOOP:
+    foreach my $src_file (keys %all_ann_files) {
+
+        my $opened_file = "";
+        my $full_file_name = "";
+        # Nb: include_dirs already includes "", so it works in the case
+        # where the filename has the full path.
+        foreach my $include_dir (@include_dirs) {
+            my $try_name = $include_dir . $src_file;
+            if (open(INPUTFILE, "< $try_name")) {
+                $opened_file    = $try_name;
+                $full_file_name = ($include_dir eq "" 
+                                  ? $src_file 
+                                  : "$include_dir + $src_file"); 
+                last;
+            }
+        }
+        
+        if (not $opened_file) {
+            # Failed to open the file.  If chosen on the command line, die.
+            # If arose from auto-annotation, print a little message.
+            if (defined $user_ann_files{$src_file}) {
+                die("File $src_file not opened in any of: @include_dirs\n");
+
+            } else {
+                push(@unfound_auto_annotate_files, $src_file);
+            }
+
+        } else {
+            # File header (distinguish between user- and auto-selected files).
+            print("$fancy");
+            my $ann_type = 
+                (defined $user_ann_files{$src_file} ? "User" : "Auto");
+            print("-- $ann_type-annotated source: $full_file_name\n");
+            print("$fancy");
+
+            # Get file's CCs
+            my $src_file_CCs = $all_ind_CCs{$src_file};
+            if (!defined $src_file_CCs) {
+                print("  No information has been collected for $src_file\n\n");
+                next LOOP;
+            }
+        
+            $did_annotations = 1;
+            
+            # Numeric, not lexicographic sort!
+            my @line_nums = sort {$a <=> $b} keys %$src_file_CCs;  
+
+            # If $src_file more recent than cachegrind.out, issue warning
+            my $src_more_recent_than_inputfile = 0;
+            if ((stat $opened_file)[9] > (stat $input_file)[9]) {
+                $src_more_recent_than_inputfile = 1;
+                warning_on_src_more_recent_than_inputfile($src_file);
+            }
+
+            # Work out the size of each column for printing
+            my $CC_col_widths = compute_CC_col_widths(values %$src_file_CCs);
+
+            # Events header
+            print_events($CC_col_widths);
+            print("\n\n");
+
+            # Shift out 0 if it's in the line numbers (from unknown entries,
+            # likely due to bugs in Valgrind's stabs debug info reader)
+            shift(@line_nums) if (0 == $line_nums[0]);
+
+            # Finds interesting line ranges -- all lines with a CC, and all
+            # lines within $context lines of a line with a CC.
+            my $n = @line_nums;
+            my @pairs;
+            for (my $i = 0; $i < $n; $i++) {
+                push(@pairs, $line_nums[$i] - $context);   # lower marker
+                while ($i < $n-1 && 
+                       $line_nums[$i] + 2*$context >= $line_nums[$i+1]) {
+                    $i++;
+                }
+                push(@pairs, $line_nums[$i] + $context);   # upper marker
+            }
+
+            # Annotate chosen lines, tracking total counts of lines printed
+            $pairs[0] = 1 if ($pairs[0] < 1);
+            while (@pairs) {
+                my $low  = shift @pairs;
+                my $high = shift @pairs;
+                while ($. < $low-1) {
+                    my $tmp = <INPUTFILE>;
+                    last unless (defined $tmp);     # hack to detect EOF
+                }
+                my $src_line;
+                # Print line number, unless start of file
+                print("-- line $low " . '-' x 40 . "\n") if ($low != 1);
+                while (($. < $high) && ($src_line = <INPUTFILE>)) {
+                    if (defined $line_nums[0] && $. == $line_nums[0]) {
+                        print_CC($src_file_CCs->{$.}, $CC_col_widths);
+                        add_array_a_to_b($src_file_CCs->{$.}, 
+                                         $printed_totals_CC);
+                        shift(@line_nums);
+
+                    } else {
+                        print_CC( [], $CC_col_widths);
+                    }
+
+                    print(" $src_line");
+                }
+                # Print line number, unless EOF
+                if ($src_line) {
+                    print("-- line $high " . '-' x 40 . "\n");
+                } else {
+                    last;
+                }
+            }
+
+            # If there was info on lines past the end of the file...
+            if (@line_nums) {
+                foreach my $line_num (@line_nums) {
+                    print_CC($src_file_CCs->{$line_num}, $CC_col_widths);
+                    print(" <bogus line $line_num>\n");
+                }
+                print("\n");
+                warning_on_nonexistent_lines($src_more_recent_than_inputfile,
+                                             $src_file, \@line_nums);
+            }
+            print("\n");
+
+            # Print summary of counts attributed to file but not to any
+            # particular line (due to incomplete debug info).
+            if ($src_file_CCs->{0}) {
+                print_CC($src_file_CCs->{0}, $CC_col_widths);
+                print(" <counts for unidentified lines in $src_file>\n\n");
+            }
+            
+            close(INPUTFILE);
+        }
+    }
+
+    # Print list of unfound auto-annotate selected files.
+    if (@unfound_auto_annotate_files) {
+        print("$fancy");
+        print("The following files chosen for auto-annotation could not be found:\n");
+        print($fancy);
+        foreach my $f (@unfound_auto_annotate_files) {
+            print("  $f\n");
+        }
+        print("\n");
+    }
+
+    # If we did any annotating, print what proportion of events were covered by
+    # annotated lines above.
+    if ($did_annotations) {
+        my $percent_printed_CC;
+        foreach (my $i = 0; $i < @$summary_CC; $i++) {
+            $percent_printed_CC->[$i] = 
+                sprintf("%.0f", 
+                        $printed_totals_CC->[$i] / $summary_CC->[$i] * 100);
+        }
+        my $pp_CC_col_widths = compute_CC_col_widths($percent_printed_CC);
+        print($fancy);
+        print_events($pp_CC_col_widths);
+        print("\n");
+        print($fancy);
+        print_CC($percent_printed_CC, $pp_CC_col_widths);
+        print(" percentage of events annotated\n\n");
+    }
+}
+
+#----------------------------------------------------------------------------
+# "main()"
+#----------------------------------------------------------------------------
+process_cmd_line();
+read_input_file();
+print_options();
+my $threshold_files = print_summary_and_fn_totals();
+annotate_ann_files($threshold_files);
+
+##--------------------------------------------------------------------##
+##--- end                                           cg_annotate.in ---##
+##--------------------------------------------------------------------##
+
+
diff --git a/cachegrind/.svn/text-base/cg_arch.h.svn-base b/cachegrind/.svn/text-base/cg_arch.h.svn-base
new file mode 100644
index 0000000..9090908
--- /dev/null
+++ b/cachegrind/.svn/text-base/cg_arch.h.svn-base
@@ -0,0 +1,50 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Arch-specific declarations.                        cg_arch.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Cachegrind, a Valgrind tool for cache
+   profiling programs.
+
+   Copyright (C) 2002-2009 Nicholas Nethercote
+      njn@valgrind.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __CG_ARCH_H
+#define __CG_ARCH_H
+
+// For cache simulation
+typedef struct {
+   int size;       // bytes
+   int assoc;
+   int line_size;  // bytes
+} cache_t;
+
+// Gives the configuration of I1, D1 and L2 caches.  They get overridden
+// by any cache configurations specified on the command line.
+void VG_(configure_caches)(cache_t* I1c, cache_t* D1c, cache_t* L2c,
+                           Bool all_caches_clo_defined);
+
+#endif   // __CG_ARCH_H
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
diff --git a/cachegrind/.svn/text-base/cg_branchpred.c.svn-base b/cachegrind/.svn/text-base/cg_branchpred.c.svn-base
new file mode 100644
index 0000000..e19a3d3
--- /dev/null
+++ b/cachegrind/.svn/text-base/cg_branchpred.c.svn-base
@@ -0,0 +1,154 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Branch predictor simulation                  cg_branchpred.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Cachegrind, a Valgrind tool for cache
+   profiling programs.
+
+   Copyright (C) 2002-2009 Nicholas Nethercote
+      njn@valgrind.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+
+/* This file contains the actual branch predictor simulator and its
+   associated state.  As with cg_sim.c it is #included directly into
+   cg_main.c.  It provides:
+
+   - a taken/not-taken predictor for conditional branches
+   - a branch target address predictor for indirect branches
+
+   Function return-address prediction is not modelled, on the basis
+   that return stack predictors almost always predict correctly, and
+   also that it is difficult for Valgrind to robustly identify
+   function calls and returns.
+*/
+
+/* How many bits at the bottom of an instruction address are
+   guaranteed to be zero? */
+#if defined(VGA_ppc32) || defined(VGA_ppc64)
+#  define N_IADDR_LO_ZERO_BITS 2
+#elif defined(VGA_x86) || defined(VGA_amd64)
+#  define N_IADDR_LO_ZERO_BITS 0
+#else
+#  error "Unsupported architecture"
+#endif
+
+
+/* Get a taken/not-taken prediction for the instruction (presumably a
+   conditional branch) at instr_addr.  Once that's done, update the
+   predictor state based on whether or not it was actually taken, as
+   indicated by 'taken'.  Finally, return 1 for a mispredict and 0 for
+   a successful predict.
+
+   The predictor is an array of 16k (== 2^14) 2-bit saturating
+   counters.  Given the address of the branch instruction, the array
+   index to use is computed both from the low order bits of the branch
+   instruction's address, and the global history - that is, from the
+   taken/not-taken behaviour of the most recent few branches.  This
+   makes the predictor able to correlate this branch's behaviour with
+   that of other branches. 
+
+   TODO: use predictor written by someone who understands this stuff.
+   Perhaps it would be better to move to a standard GShare predictor
+   and/or tournament predictor.
+*/
+/* The index is composed of N_HIST bits at the top and N_IADD bits at
+   the bottom.  These numbers chosen somewhat arbitrarily, but note
+   that making N_IADD_BITS too small (eg 4) can cause large amounts of
+   aliasing, and hence misprediction, particularly if the history bits
+   are mostly unchanging. */
+#define N_HIST_BITS 7
+#define N_IADD_BITS 7
+
+#define N_BITS     (N_HIST_BITS + N_IADD_BITS)
+#define N_COUNTERS (1 << N_BITS)
+
+static UWord shift_register = 0;   /* Contains global history */
+static UChar counters[N_COUNTERS]; /* Counter array; presumably auto-zeroed */
+
+
+static ULong do_cond_branch_predict ( Addr instr_addr, Word takenW )
+{
+   UWord indx;
+   Bool  predicted_taken, actually_taken, mispredict;
+
+   const UWord hist_mask = (1 << N_HIST_BITS) - 1;
+   const UWord iadd_mask = (1 << N_IADD_BITS) - 1;
+         UWord hist_bits = shift_register & hist_mask;
+         UWord iadd_bits = (instr_addr >> N_IADDR_LO_ZERO_BITS)
+                           & iadd_mask;
+
+   tl_assert(hist_bits <= hist_mask);
+   tl_assert(iadd_bits <= iadd_mask);
+   indx = (hist_bits << N_IADD_BITS) | iadd_bits;
+   tl_assert(indx < N_COUNTERS);
+   if (0) VG_(printf)("index = %d\n", (Int)indx);
+
+   tl_assert(takenW <= 1);
+   predicted_taken = counters[ indx ] >= 2;
+   actually_taken  = takenW > 0;
+
+   mispredict = (actually_taken && (!predicted_taken))
+                || ((!actually_taken) && predicted_taken);
+
+   shift_register <<= 1;
+   shift_register |= (actually_taken ? 1 : 0);
+
+   if (actually_taken) {
+      if (counters[indx] < 3)
+         counters[indx]++;
+   } else {
+      if (counters[indx] > 0)
+         counters[indx]--;
+   }
+
+   tl_assert(counters[indx] <= 3);
+
+   return mispredict ? 1 : 0;
+}
+
+
+/* A very simple indirect branch predictor.  Use the branch's address
+   to index a table which records the previous target address for this
+   branch (or whatever aliased with it) and use that as the
+   prediction. */
+#define N_BTAC_BITS 9
+#define N_BTAC      (1 << N_BTAC_BITS)
+static Addr btac[N_BTAC]; /* BTAC; presumably auto-zeroed */
+
+static ULong do_ind_branch_predict ( Addr instr_addr, Addr actual )
+{
+   Bool mispredict;
+   const UWord mask = (1 << N_BTAC_BITS) - 1;
+         UWord indx = (instr_addr >> N_IADDR_LO_ZERO_BITS) 
+                      & mask;
+   tl_assert(indx < N_BTAC);
+   mispredict = btac[indx] != actual;
+   btac[indx] = actual;
+   return mispredict ? 1 : 0;
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                          cg_branchpred.c ---*/
+/*--------------------------------------------------------------------*/
+
diff --git a/cachegrind/.svn/text-base/cg_main.c.svn-base b/cachegrind/.svn/text-base/cg_main.c.svn-base
new file mode 100644
index 0000000..6d7ce87
--- /dev/null
+++ b/cachegrind/.svn/text-base/cg_main.c.svn-base
@@ -0,0 +1,1767 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Cachegrind: everything but the simulation itself.            ---*/
+/*---                                                    cg_main.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Cachegrind, a Valgrind tool for cache
+   profiling programs.
+
+   Copyright (C) 2002-2009 Nicholas Nethercote
+      njn@valgrind.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "pub_tool_basics.h"
+#include "pub_tool_vki.h"
+#include "pub_tool_debuginfo.h"
+#include "pub_tool_libcbase.h"
+#include "pub_tool_libcassert.h"
+#include "pub_tool_libcfile.h"
+#include "pub_tool_libcprint.h"
+#include "pub_tool_libcproc.h"
+#include "pub_tool_machine.h"
+#include "pub_tool_mallocfree.h"
+#include "pub_tool_options.h"
+#include "pub_tool_oset.h"
+#include "pub_tool_tooliface.h"
+#include "pub_tool_xarray.h"
+#include "pub_tool_clientstate.h"
+#include "pub_tool_machine.h"      // VG_(fnptr_to_fnentry)
+
+#include "cg_arch.h"
+#include "cg_sim.c"
+#include "cg_branchpred.c"
+
+/*------------------------------------------------------------*/
+/*--- Constants                                            ---*/
+/*------------------------------------------------------------*/
+
+/* Set to 1 for very verbose debugging */
+#define DEBUG_CG 0
+
+#define MIN_LINE_SIZE         16
+#define FILE_LEN              VKI_PATH_MAX
+#define FN_LEN                256
+
+/*------------------------------------------------------------*/
+/*--- Options                                              ---*/
+/*------------------------------------------------------------*/
+
+static Bool  clo_cache_sim  = True;  /* do cache simulation? */
+static Bool  clo_branch_sim = False; /* do branch simulation? */
+static Char* clo_cachegrind_out_file = "cachegrind.out.%p";
+
+/*------------------------------------------------------------*/
+/*--- Types and Data Structures                            ---*/
+/*------------------------------------------------------------*/
+
+typedef
+   struct {
+      ULong a;  /* total # memory accesses of this kind */
+      ULong m1; /* misses in the first level cache */
+      ULong m2; /* misses in the second level cache */
+   }
+   CacheCC;
+
+typedef
+   struct {
+      ULong b;  /* total # branches of this kind */
+      ULong mp; /* number of branches mispredicted */
+   }
+   BranchCC;
+
+//------------------------------------------------------------
+// Primary data structure #1: CC table
+// - Holds the per-source-line hit/miss stats, grouped by file/function/line.
+// - an ordered set of CCs.  CC indexing done by file/function/line (as
+//   determined from the instrAddr).
+// - Traversed for dumping stats at end in file/func/line hierarchy.
+
+typedef struct {
+   Char* file;
+   Char* fn;
+   Int   line;
+}
+CodeLoc;
+
+typedef struct {
+   CodeLoc  loc; /* Source location that these counts pertain to */
+   CacheCC  Ir;  /* Insn read counts */
+   CacheCC  Dr;  /* Data read counts */
+   CacheCC  Dw;  /* Data write/modify counts */
+   BranchCC Bc;  /* Conditional branch counts */
+   BranchCC Bi;  /* Indirect branch counts */
+} LineCC;
+
+// First compare file, then fn, then line.
+static Word cmp_CodeLoc_LineCC(const void *vloc, const void *vcc)
+{
+   Word res;
+   CodeLoc* a = (CodeLoc*)vloc;
+   CodeLoc* b = &(((LineCC*)vcc)->loc);
+
+   res = VG_(strcmp)(a->file, b->file);
+   if (0 != res)
+      return res;
+
+   res = VG_(strcmp)(a->fn, b->fn);
+   if (0 != res)
+      return res;
+
+   return a->line - b->line;
+}
+
+static OSet* CC_table;
+
+//------------------------------------------------------------
+// Primary data structure #2: InstrInfo table
+// - Holds the cached info about each instr that is used for simulation.
+// - table(SB_start_addr, list(InstrInfo))
+// - For each SB, each InstrInfo in the list holds info about the
+//   instruction (instrLen, instrAddr, etc), plus a pointer to its line
+//   CC.  This node is what's passed to the simulation function.
+// - When SBs are discarded the relevant list(instr_details) is freed.
+
+typedef struct _InstrInfo InstrInfo;
+struct _InstrInfo {
+   Addr    instr_addr;
+   UChar   instr_len;
+   LineCC* parent;         // parent line-CC
+};
+
+typedef struct _SB_info SB_info;
+struct _SB_info {
+   Addr      SB_addr;      // key;  MUST BE FIRST
+   Int       n_instrs;
+   InstrInfo instrs[0];
+};
+
+static OSet* instrInfoTable;
+
+//------------------------------------------------------------
+// Secondary data structure: string table
+// - holds strings, avoiding dups
+// - used for filenames and function names, each of which will be
+//   pointed to by one or more CCs.
+// - it also allows equality checks just by pointer comparison, which
+//   is good when printing the output file at the end.
+
+static OSet* stringTable;
+
+//------------------------------------------------------------
+// Stats
+static Int  distinct_files      = 0;
+static Int  distinct_fns        = 0;
+static Int  distinct_lines      = 0;
+static Int  distinct_instrs     = 0;
+
+static Int  full_debugs         = 0;
+static Int  file_line_debugs    = 0;
+static Int  fn_debugs           = 0;
+static Int  no_debugs           = 0;
+
+/*------------------------------------------------------------*/
+/*--- String table operations                              ---*/
+/*------------------------------------------------------------*/
+
+static Word stringCmp( const void* key, const void* elem )
+{
+   return VG_(strcmp)(*(Char**)key, *(Char**)elem);
+}
+
+// Get a permanent string;  either pull it out of the string table if it's
+// been encountered before, or dup it and put it into the string table.
+static Char* get_perm_string(Char* s)
+{
+   Char** s_ptr = VG_(OSetGen_Lookup)(stringTable, &s);
+   if (s_ptr) {
+      return *s_ptr;
+   } else {
+      Char** s_node = VG_(OSetGen_AllocNode)(stringTable, sizeof(Char*));
+      *s_node = VG_(strdup)("cg.main.gps.1", s);
+      VG_(OSetGen_Insert)(stringTable, s_node);
+      return *s_node;
+   }
+}
+
+/*------------------------------------------------------------*/
+/*--- CC table operations                                  ---*/
+/*------------------------------------------------------------*/
+
+static void get_debug_info(Addr instr_addr, Char file[FILE_LEN],
+                           Char fn[FN_LEN], Int* line)
+{
+   Char dir[FILE_LEN];
+   Bool found_dirname;
+   Bool found_file_line = VG_(get_filename_linenum)(
+                             instr_addr, 
+                             file, FILE_LEN,
+                             dir,  FILE_LEN, &found_dirname,
+                             line
+                          );
+   Bool found_fn        = VG_(get_fnname)(instr_addr, fn, FN_LEN);
+
+   if (!found_file_line) {
+      VG_(strcpy)(file, "???");
+      *line = 0;
+   }
+   if (!found_fn) {
+      VG_(strcpy)(fn,  "???");
+   }
+
+   if (found_dirname) {
+      // +1 for the '/'.
+      tl_assert(VG_(strlen)(dir) + VG_(strlen)(file) + 1 < FILE_LEN);
+      VG_(strcat)(dir, "/");     // Append '/'
+      VG_(strcat)(dir, file);    // Append file to dir
+      VG_(strcpy)(file, dir);    // Move dir+file to file
+   }
+   
+   if (found_file_line) {
+      if (found_fn) full_debugs++;
+      else          file_line_debugs++;
+   } else {
+      if (found_fn) fn_debugs++;
+      else          no_debugs++;
+   }
+}
+
+// Do a three step traversal: by file, then fn, then line.
+// Returns a pointer to the line CC, creates a new one if necessary.
+static LineCC* get_lineCC(Addr origAddr)
+{
+   Char    file[FILE_LEN], fn[FN_LEN];
+   Int     line;
+   CodeLoc loc;
+   LineCC* lineCC;
+
+   get_debug_info(origAddr, file, fn, &line);
+
+   loc.file = file;
+   loc.fn   = fn;
+   loc.line = line;
+
+   lineCC = VG_(OSetGen_Lookup)(CC_table, &loc);
+   if (!lineCC) {
+      // Allocate and zero a new node.
+      lineCC           = VG_(OSetGen_AllocNode)(CC_table, sizeof(LineCC));
+      lineCC->loc.file = get_perm_string(loc.file);
+      lineCC->loc.fn   = get_perm_string(loc.fn);
+      lineCC->loc.line = loc.line;
+      lineCC->Ir.a     = 0;
+      lineCC->Ir.m1    = 0;
+      lineCC->Ir.m2    = 0;
+      lineCC->Dr.a     = 0;
+      lineCC->Dr.m1    = 0;
+      lineCC->Dr.m2    = 0;
+      lineCC->Dw.a     = 0;
+      lineCC->Dw.m1    = 0;
+      lineCC->Dw.m2    = 0;
+      lineCC->Bc.b     = 0;
+      lineCC->Bc.mp    = 0;
+      lineCC->Bi.b     = 0;
+      lineCC->Bi.mp    = 0;
+      VG_(OSetGen_Insert)(CC_table, lineCC);
+   }
+
+   return lineCC;
+}
+
+/*------------------------------------------------------------*/
+/*--- Cache simulation functions                           ---*/
+/*------------------------------------------------------------*/
+
+static VG_REGPARM(1)
+void log_1I_0D_cache_access(InstrInfo* n)
+{
+   //VG_(printf)("1I_0D :  CCaddr=0x%010lx,  iaddr=0x%010lx,  isize=%lu\n",
+   //             n, n->instr_addr, n->instr_len);
+   cachesim_I1_doref(n->instr_addr, n->instr_len, 
+                     &n->parent->Ir.m1, &n->parent->Ir.m2);
+   n->parent->Ir.a++;
+}
+
+static VG_REGPARM(2)
+void log_2I_0D_cache_access(InstrInfo* n, InstrInfo* n2)
+{
+   //VG_(printf)("2I_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
+   //            "        CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n",
+   //            n,  n->instr_addr,  n->instr_len,
+   //            n2, n2->instr_addr, n2->instr_len);
+   cachesim_I1_doref(n->instr_addr, n->instr_len, 
+                     &n->parent->Ir.m1, &n->parent->Ir.m2);
+   n->parent->Ir.a++;
+   cachesim_I1_doref(n2->instr_addr, n2->instr_len, 
+                     &n2->parent->Ir.m1, &n2->parent->Ir.m2);
+   n2->parent->Ir.a++;
+}
+
+static VG_REGPARM(3)
+void log_3I_0D_cache_access(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
+{
+   //VG_(printf)("3I_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
+   //            "        CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n"
+   //            "        CC3addr=0x%010lx, i3addr=0x%010lx, i3size=%lu\n",
+   //            n,  n->instr_addr,  n->instr_len,
+   //            n2, n2->instr_addr, n2->instr_len,
+   //            n3, n3->instr_addr, n3->instr_len);
+   cachesim_I1_doref(n->instr_addr, n->instr_len, 
+                     &n->parent->Ir.m1, &n->parent->Ir.m2);
+   n->parent->Ir.a++;
+   cachesim_I1_doref(n2->instr_addr, n2->instr_len, 
+                     &n2->parent->Ir.m1, &n2->parent->Ir.m2);
+   n2->parent->Ir.a++;
+   cachesim_I1_doref(n3->instr_addr, n3->instr_len, 
+                     &n3->parent->Ir.m1, &n3->parent->Ir.m2);
+   n3->parent->Ir.a++;
+}
+
+static VG_REGPARM(3)
+void log_1I_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
+{
+   //VG_(printf)("1I_1Dr:  CCaddr=0x%010lx,  iaddr=0x%010lx,  isize=%lu\n"
+   //            "                               daddr=0x%010lx,  dsize=%lu\n",
+   //            n, n->instr_addr, n->instr_len, data_addr, data_size);
+   cachesim_I1_doref(n->instr_addr, n->instr_len, 
+                     &n->parent->Ir.m1, &n->parent->Ir.m2);
+   n->parent->Ir.a++;
+
+   cachesim_D1_doref(data_addr, data_size, 
+                     &n->parent->Dr.m1, &n->parent->Dr.m2);
+   n->parent->Dr.a++;
+}
+
+static VG_REGPARM(3)
+void log_1I_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
+{
+   //VG_(printf)("1I_1Dw:  CCaddr=0x%010lx,  iaddr=0x%010lx,  isize=%lu\n"
+   //            "                               daddr=0x%010lx,  dsize=%lu\n",
+   //            n, n->instr_addr, n->instr_len, data_addr, data_size);
+   cachesim_I1_doref(n->instr_addr, n->instr_len, 
+                     &n->parent->Ir.m1, &n->parent->Ir.m2);
+   n->parent->Ir.a++;
+
+   cachesim_D1_doref(data_addr, data_size, 
+                     &n->parent->Dw.m1, &n->parent->Dw.m2);
+   n->parent->Dw.a++;
+}
+
+static VG_REGPARM(3)
+void log_0I_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
+{
+   //VG_(printf)("0I_1Dr:  CCaddr=0x%010lx,  daddr=0x%010lx,  dsize=%lu\n",
+   //            n, data_addr, data_size);
+   cachesim_D1_doref(data_addr, data_size, 
+                     &n->parent->Dr.m1, &n->parent->Dr.m2);
+   n->parent->Dr.a++;
+}
+
+static VG_REGPARM(3)
+void log_0I_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
+{
+   //VG_(printf)("0I_1Dw:  CCaddr=0x%010lx,  daddr=0x%010lx,  dsize=%lu\n",
+   //            n, data_addr, data_size);
+   cachesim_D1_doref(data_addr, data_size, 
+                     &n->parent->Dw.m1, &n->parent->Dw.m2);
+   n->parent->Dw.a++;
+}
+
+/* For branches, we consult two different predictors, one which
+   predicts taken/untaken for conditional branches, and the other
+   which predicts the branch target address for indirect branches
+   (jump-to-register style ones). */
+
+static VG_REGPARM(2)
+void log_cond_branch(InstrInfo* n, Word taken)
+{
+   //VG_(printf)("cbrnch:  CCaddr=0x%010lx,  taken=0x%010lx\n",
+   //             n, taken);
+   n->parent->Bc.b++;
+   n->parent->Bc.mp 
+      += (1 & do_cond_branch_predict(n->instr_addr, taken));
+}
+
+static VG_REGPARM(2)
+void log_ind_branch(InstrInfo* n, UWord actual_dst)
+{
+   //VG_(printf)("ibrnch:  CCaddr=0x%010lx,    dst=0x%010lx\n",
+   //             n, actual_dst);
+   n->parent->Bi.b++;
+   n->parent->Bi.mp
+      += (1 & do_ind_branch_predict(n->instr_addr, actual_dst));
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Instrumentation types and structures                 ---*/
+/*------------------------------------------------------------*/
+
+/* Maintain an ordered list of memory events which are outstanding, in
+   the sense that no IR has yet been generated to do the relevant
+   helper calls.  The BB is scanned top to bottom and memory events
+   are added to the end of the list, merging with the most recent
+   notified event where possible (Dw immediately following Dr and
+   having the same size and EA can be merged).
+
+   This merging is done so that for architectures which have
+   load-op-store instructions (x86, amd64), the insn is treated as if
+   it makes just one memory reference (a modify), rather than two (a
+   read followed by a write at the same address).
+
+   At various points the list will need to be flushed, that is, IR
+   generated from it.  That must happen before any possible exit from
+   the block (the end, or an IRStmt_Exit).  Flushing also takes place
+   when there is no space to add a new event.
+
+   If we require the simulation statistics to be up to date with
+   respect to possible memory exceptions, then the list would have to
+   be flushed before each memory reference.  That would however lose
+   performance by inhibiting event-merging during flushing.
+
+   Flushing the list consists of walking it start to end and emitting
+   instrumentation IR for each event, in the order in which they
+   appear.  It may be possible to emit a single call for two adjacent
+   events in order to reduce the number of helper function calls made.
+   For example, it could well be profitable to handle two adjacent Ir
+   events with a single helper call.  */
+
+typedef
+   IRExpr 
+   IRAtom;
+
+typedef 
+   enum { 
+      Ev_Ir,  // Instruction read
+      Ev_Dr,  // Data read
+      Ev_Dw,  // Data write
+      Ev_Dm,  // Data modify (read then write)
+      Ev_Bc,  // branch conditional
+      Ev_Bi   // branch indirect (to unknown destination)
+   }
+   EventTag;
+
+typedef
+   struct {
+      EventTag   tag;
+      InstrInfo* inode;
+      union {
+         struct {
+         } Ir;
+         struct {
+            IRAtom* ea;
+            Int     szB;
+         } Dr;
+         struct {
+            IRAtom* ea;
+            Int     szB;
+         } Dw;
+         struct {
+            IRAtom* ea;
+            Int     szB;
+         } Dm;
+         struct {
+            IRAtom* taken; /* :: Ity_I1 */
+         } Bc;
+         struct {
+            IRAtom* dst;
+         } Bi;
+      } Ev;
+   }
+   Event;
+
+static void init_Event ( Event* ev ) {
+   VG_(memset)(ev, 0, sizeof(Event));
+}
+
+static IRAtom* get_Event_dea ( Event* ev ) {
+   switch (ev->tag) {
+      case Ev_Dr: return ev->Ev.Dr.ea;
+      case Ev_Dw: return ev->Ev.Dw.ea;
+      case Ev_Dm: return ev->Ev.Dm.ea;
+      default:    tl_assert(0);
+   }
+}
+
+static Int get_Event_dszB ( Event* ev ) {
+   switch (ev->tag) {
+      case Ev_Dr: return ev->Ev.Dr.szB;
+      case Ev_Dw: return ev->Ev.Dw.szB;
+      case Ev_Dm: return ev->Ev.Dm.szB;
+      default:    tl_assert(0);
+   }
+}
+
+
+/* Up to this many unnotified events are allowed.  Number is
+   arbitrary.  Larger numbers allow more event merging to occur, but
+   potentially induce more spilling due to extending live ranges of
+   address temporaries. */
+#define N_EVENTS 16
+
+
+/* A struct which holds all the running state during instrumentation.
+   Mostly to avoid passing loads of parameters everywhere. */
+typedef
+   struct {
+      /* The current outstanding-memory-event list. */
+      Event events[N_EVENTS];
+      Int   events_used;
+
+      /* The array of InstrInfo bins for the BB. */
+      SB_info* sbInfo;
+
+      /* Number InstrInfo bins 'used' so far. */
+      Int sbInfo_i;
+
+      /* The output SB being constructed. */
+      IRSB* sbOut;
+   }
+   CgState;
+
+
+/*------------------------------------------------------------*/
+/*--- Instrumentation main                                 ---*/
+/*------------------------------------------------------------*/
+
+// Note that origAddr is the real origAddr, not the address of the first
+// instruction in the block (they can be different due to redirection).
+static
+SB_info* get_SB_info(IRSB* sbIn, Addr origAddr)
+{
+   Int      i, n_instrs;
+   IRStmt*  st;
+   SB_info* sbInfo;
+
+   // Count number of original instrs in SB
+   n_instrs = 0;
+   for (i = 0; i < sbIn->stmts_used; i++) {
+      st = sbIn->stmts[i];
+      if (Ist_IMark == st->tag) n_instrs++;
+   }
+
+   // Check that we don't have an entry for this BB in the instr-info table.
+   // If this assertion fails, there has been some screwup:  some
+   // translations must have been discarded but Cachegrind hasn't discarded
+   // the corresponding entries in the instr-info table.
+   sbInfo = VG_(OSetGen_Lookup)(instrInfoTable, &origAddr);
+   tl_assert(NULL == sbInfo);
+
+   // BB never translated before (at this address, at least;  could have
+   // been unloaded and then reloaded elsewhere in memory)
+   sbInfo = VG_(OSetGen_AllocNode)(instrInfoTable,
+                                sizeof(SB_info) + n_instrs*sizeof(InstrInfo)); 
+   sbInfo->SB_addr  = origAddr;
+   sbInfo->n_instrs = n_instrs;
+   VG_(OSetGen_Insert)( instrInfoTable, sbInfo );
+   distinct_instrs++;
+
+   return sbInfo;
+}
+
+
+static void showEvent ( Event* ev )
+{
+   switch (ev->tag) {
+      case Ev_Ir: 
+         VG_(printf)("Ir %p\n", ev->inode);
+         break;
+      case Ev_Dr:
+         VG_(printf)("Dr %p %d EA=", ev->inode, ev->Ev.Dr.szB);
+         ppIRExpr(ev->Ev.Dr.ea); 
+         VG_(printf)("\n");
+         break;
+      case Ev_Dw:
+         VG_(printf)("Dw %p %d EA=", ev->inode, ev->Ev.Dw.szB);
+         ppIRExpr(ev->Ev.Dw.ea); 
+         VG_(printf)("\n");
+         break;
+      case Ev_Dm:
+         VG_(printf)("Dm %p %d EA=", ev->inode, ev->Ev.Dm.szB);
+         ppIRExpr(ev->Ev.Dm.ea); 
+         VG_(printf)("\n");
+         break;
+      case Ev_Bc:
+         VG_(printf)("Bc %p   GA=", ev->inode);
+         ppIRExpr(ev->Ev.Bc.taken); 
+         VG_(printf)("\n");
+         break;
+      case Ev_Bi:
+         VG_(printf)("Bi %p  DST=", ev->inode);
+         ppIRExpr(ev->Ev.Bi.dst); 
+         VG_(printf)("\n");
+         break;
+      default: 
+         tl_assert(0);
+         break;
+   }
+}
+
+// Reserve and initialise an InstrInfo for the first mention of a new insn.
+static
+InstrInfo* setup_InstrInfo ( CgState* cgs, Addr instr_addr, UInt instr_len )
+{
+   InstrInfo* i_node;
+   tl_assert(cgs->sbInfo_i >= 0);
+   tl_assert(cgs->sbInfo_i < cgs->sbInfo->n_instrs);
+   i_node = &cgs->sbInfo->instrs[ cgs->sbInfo_i ];
+   i_node->instr_addr = instr_addr;
+   i_node->instr_len  = instr_len;
+   i_node->parent     = get_lineCC(instr_addr);
+   cgs->sbInfo_i++;
+   return i_node;
+}
+
+
+/* Generate code for all outstanding memory events, and mark the queue
+   empty.  Code is generated into cgs->bbOut, and this activity
+   'consumes' slots in cgs->sbInfo. */
+
+static void flushEvents ( CgState* cgs )
+{
+   Int        i, regparms;
+   Char*      helperName;
+   void*      helperAddr;
+   IRExpr**   argv;
+   IRExpr*    i_node_expr;
+   IRDirty*   di;
+   Event*     ev;
+   Event*     ev2;
+   Event*     ev3;
+
+   i = 0;
+   while (i < cgs->events_used) {
+
+      helperName = NULL;
+      helperAddr = NULL;
+      argv       = NULL;
+      regparms   = 0;
+
+      /* generate IR to notify event i and possibly the ones
+         immediately following it. */
+      tl_assert(i >= 0 && i < cgs->events_used);
+
+      ev  = &cgs->events[i];
+      ev2 = ( i < cgs->events_used-1 ? &cgs->events[i+1] : NULL );
+      ev3 = ( i < cgs->events_used-2 ? &cgs->events[i+2] : NULL );
+      
+      if (DEBUG_CG) {
+         VG_(printf)("   flush "); 
+         showEvent( ev );
+      }
+
+      i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
+
+      /* Decide on helper fn to call and args to pass it, and advance
+         i appropriately. */
+      switch (ev->tag) {
+         case Ev_Ir:
+            /* Merge an Ir with a following Dr/Dm. */
+            if (ev2 && (ev2->tag == Ev_Dr || ev2->tag == Ev_Dm)) {
+               /* Why is this true?  It's because we're merging an Ir
+                  with a following Dr or Dm.  The Ir derives from the
+                  instruction's IMark and the Dr/Dm from data
+                  references which follow it.  In short it holds
+                  because each insn starts with an IMark, hence an
+                  Ev_Ir, and so these Dr/Dm must pertain to the
+                  immediately preceding Ir.  Same applies to analogous
+                  assertions in the subsequent cases. */
+               tl_assert(ev2->inode == ev->inode);
+               helperName = "log_1I_1Dr_cache_access";
+               helperAddr = &log_1I_1Dr_cache_access;
+               argv = mkIRExprVec_3( i_node_expr,
+                                     get_Event_dea(ev2),
+                                     mkIRExpr_HWord( get_Event_dszB(ev2) ) );
+               regparms = 3;
+               i += 2;
+            }
+            /* Merge an Ir with a following Dw. */
+            else
+            if (ev2 && ev2->tag == Ev_Dw) {
+               tl_assert(ev2->inode == ev->inode);
+               helperName = "log_1I_1Dw_cache_access";
+               helperAddr = &log_1I_1Dw_cache_access;
+               argv = mkIRExprVec_3( i_node_expr,
+                                     get_Event_dea(ev2),
+                                     mkIRExpr_HWord( get_Event_dszB(ev2) ) );
+               regparms = 3;
+               i += 2;
+            }
+            /* Merge an Ir with two following Irs. */
+            else
+            if (ev2 && ev3 && ev2->tag == Ev_Ir && ev3->tag == Ev_Ir)
+            {
+               helperName = "log_3I_0D_cache_access";
+               helperAddr = &log_3I_0D_cache_access;
+               argv = mkIRExprVec_3( i_node_expr, 
+                                     mkIRExpr_HWord( (HWord)ev2->inode ), 
+                                     mkIRExpr_HWord( (HWord)ev3->inode ) );
+               regparms = 3;
+               i += 3;
+            }
+            /* Merge an Ir with one following Ir. */
+            else
+            if (ev2 && ev2->tag == Ev_Ir) {
+               helperName = "log_2I_0D_cache_access";
+               helperAddr = &log_2I_0D_cache_access;
+               argv = mkIRExprVec_2( i_node_expr,
+                                     mkIRExpr_HWord( (HWord)ev2->inode ) );
+               regparms = 2;
+               i += 2;
+            }
+            /* No merging possible; emit as-is. */
+            else {
+               helperName = "log_1I_0D_cache_access";
+               helperAddr = &log_1I_0D_cache_access;
+               argv = mkIRExprVec_1( i_node_expr );
+               regparms = 1;
+               i++;
+            }
+            break;
+         case Ev_Dr:
+         case Ev_Dm:
+            /* Data read or modify */
+            helperName = "log_0I_1Dr_cache_access";
+            helperAddr = &log_0I_1Dr_cache_access;
+            argv = mkIRExprVec_3( i_node_expr, 
+                                  get_Event_dea(ev), 
+                                  mkIRExpr_HWord( get_Event_dszB(ev) ) );
+            regparms = 3;
+            i++;
+            break;
+         case Ev_Dw:
+            /* Data write */
+            helperName = "log_0I_1Dw_cache_access";
+            helperAddr = &log_0I_1Dw_cache_access;
+            argv = mkIRExprVec_3( i_node_expr,
+                                  get_Event_dea(ev), 
+                                  mkIRExpr_HWord( get_Event_dszB(ev) ) );
+            regparms = 3;
+            i++;
+            break;
+         case Ev_Bc:
+            /* Conditional branch */
+            helperName = "log_cond_branch";
+            helperAddr = &log_cond_branch;
+            argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken );
+            regparms = 2;
+            i++;
+            break;
+         case Ev_Bi:
+            /* Branch to an unknown destination */
+            helperName = "log_ind_branch";
+            helperAddr = &log_ind_branch;
+            argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst );
+            regparms = 2;
+            i++;
+            break;
+         default:
+            tl_assert(0);
+      }
+
+      /* Add the helper. */
+      tl_assert(helperName);
+      tl_assert(helperAddr);
+      tl_assert(argv);
+      di = unsafeIRDirty_0_N( regparms, 
+                              helperName, VG_(fnptr_to_fnentry)( helperAddr ), 
+                              argv );
+      addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) );
+   }
+
+   cgs->events_used = 0;
+}
+
+static void addEvent_Ir ( CgState* cgs, InstrInfo* inode )
+{
+   Event* evt;
+   if (cgs->events_used == N_EVENTS)
+      flushEvents(cgs);
+   tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
+   evt = &cgs->events[cgs->events_used];
+   init_Event(evt);
+   evt->tag      = Ev_Ir;
+   evt->inode    = inode;
+   cgs->events_used++;
+}
+
+static
+void addEvent_Dr ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
+{
+   Event* evt;
+   tl_assert(isIRAtom(ea));
+   tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE);
+   if (!clo_cache_sim)
+      return;
+   if (cgs->events_used == N_EVENTS)
+      flushEvents(cgs);
+   tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
+   evt = &cgs->events[cgs->events_used];
+   init_Event(evt);
+   evt->tag       = Ev_Dr;
+   evt->inode     = inode;
+   evt->Ev.Dr.szB = datasize;
+   evt->Ev.Dr.ea  = ea;
+   cgs->events_used++;
+}
+
+static
+void addEvent_Dw ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
+{
+   Event* lastEvt;
+   Event* evt;
+
+   tl_assert(isIRAtom(ea));
+   tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE);
+
+   if (!clo_cache_sim)
+      return;
+
+   /* Is it possible to merge this write with the preceding read? */
+   lastEvt = &cgs->events[cgs->events_used-1];
+   if (cgs->events_used > 0
+    && lastEvt->tag       == Ev_Dr
+    && lastEvt->Ev.Dr.szB == datasize
+    && lastEvt->inode     == inode
+    && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
+   {
+      lastEvt->tag   = Ev_Dm;
+      return;
+   }
+
+   /* No.  Add as normal. */
+   if (cgs->events_used == N_EVENTS)
+      flushEvents(cgs);
+   tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
+   evt = &cgs->events[cgs->events_used];
+   init_Event(evt);
+   evt->tag       = Ev_Dw;
+   evt->inode     = inode;
+   evt->Ev.Dw.szB = datasize;
+   evt->Ev.Dw.ea  = ea;
+   cgs->events_used++;
+}
+
+static
+void addEvent_Bc ( CgState* cgs, InstrInfo* inode, IRAtom* guard )
+{
+   Event* evt;
+   tl_assert(isIRAtom(guard));
+   tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, guard) 
+             == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
+   if (!clo_branch_sim)
+      return;
+   if (cgs->events_used == N_EVENTS)
+      flushEvents(cgs);
+   tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
+   evt = &cgs->events[cgs->events_used];
+   init_Event(evt);
+   evt->tag         = Ev_Bc;
+   evt->inode       = inode;
+   evt->Ev.Bc.taken = guard;
+   cgs->events_used++;
+}
+
+static
+void addEvent_Bi ( CgState* cgs, InstrInfo* inode, IRAtom* whereTo )
+{
+   Event* evt;
+   tl_assert(isIRAtom(whereTo));
+   tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, whereTo) 
+             == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
+   if (!clo_branch_sim)
+      return;
+   if (cgs->events_used == N_EVENTS)
+      flushEvents(cgs);
+   tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
+   evt = &cgs->events[cgs->events_used];
+   init_Event(evt);
+   evt->tag       = Ev_Bi;
+   evt->inode     = inode;
+   evt->Ev.Bi.dst = whereTo;
+   cgs->events_used++;
+}
+
+////////////////////////////////////////////////////////////
+
+
+static
+IRSB* cg_instrument ( VgCallbackClosure* closure,
+                      IRSB* sbIn, 
+                      VexGuestLayout* layout, 
+                      VexGuestExtents* vge,
+                      IRType gWordTy, IRType hWordTy )
+{
+   Int        i, isize;
+   IRStmt*    st;
+   Addr64     cia; /* address of current insn */
+   CgState    cgs;
+   IRTypeEnv* tyenv = sbIn->tyenv;
+   InstrInfo* curr_inode = NULL;
+
+   if (gWordTy != hWordTy) {
+      /* We don't currently support this case. */
+      VG_(tool_panic)("host/guest word size mismatch");
+   }
+
+   // Set up new SB
+   cgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
+
+   // Copy verbatim any IR preamble preceding the first IMark
+   i = 0;
+   while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
+      addStmtToIRSB( cgs.sbOut, sbIn->stmts[i] );
+      i++;
+   }
+
+   // Get the first statement, and initial cia from it
+   tl_assert(sbIn->stmts_used > 0);
+   tl_assert(i < sbIn->stmts_used);
+   st = sbIn->stmts[i];
+   tl_assert(Ist_IMark == st->tag);
+
+   cia   = st->Ist.IMark.addr;
+   isize = st->Ist.IMark.len;
+   // If Vex fails to decode an instruction, the size will be zero.
+   // Pretend otherwise.
+   if (isize == 0) isize = VG_MIN_INSTR_SZB;
+
+   // Set up running state and get block info
+   tl_assert(closure->readdr == vge->base[0]);
+   cgs.events_used = 0;
+   cgs.sbInfo      = get_SB_info(sbIn, (Addr)closure->readdr);
+   cgs.sbInfo_i    = 0;
+
+   if (DEBUG_CG)
+      VG_(printf)("\n\n---------- cg_instrument ----------\n");
+
+   // Traverse the block, initialising inodes, adding events and flushing as
+   // necessary.
+   for (/*use current i*/; i < sbIn->stmts_used; i++) {
+
+      st = sbIn->stmts[i];
+      tl_assert(isFlatIRStmt(st));
+
+      switch (st->tag) {
+         case Ist_NoOp:
+         case Ist_AbiHint:
+         case Ist_Put:
+         case Ist_PutI:
+         case Ist_MBE:
+            break;
+
+         case Ist_IMark:
+            cia   = st->Ist.IMark.addr;
+            isize = st->Ist.IMark.len;
+
+            // If Vex fails to decode an instruction, the size will be zero.
+            // Pretend otherwise.
+            if (isize == 0) isize = VG_MIN_INSTR_SZB;
+
+            // Sanity-check size.
+            tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
+                     || VG_CLREQ_SZB == isize );
+
+            // Get space for and init the inode, record it as the current one.
+            // Subsequent Dr/Dw/Dm events from the same instruction will 
+            // also use it.
+            curr_inode = setup_InstrInfo(&cgs, cia, isize);
+
+            addEvent_Ir( &cgs, curr_inode );
+            break;
+
+         case Ist_WrTmp: {
+            IRExpr* data = st->Ist.WrTmp.data;
+            if (data->tag == Iex_Load) {
+               IRExpr* aexpr = data->Iex.Load.addr;
+               // Note also, endianness info is ignored.  I guess
+               // that's not interesting.
+               addEvent_Dr( &cgs, curr_inode, sizeofIRType(data->Iex.Load.ty), 
+                                  aexpr );
+            }
+            break;
+         }
+
+         case Ist_Store: {
+            IRExpr* data  = st->Ist.Store.data;
+            IRExpr* aexpr = st->Ist.Store.addr;
+            addEvent_Dw( &cgs, curr_inode, 
+                         sizeofIRType(typeOfIRExpr(tyenv, data)), aexpr );
+            break;
+         }
+
+         case Ist_Dirty: {
+            Int      dataSize;
+            IRDirty* d = st->Ist.Dirty.details;
+            if (d->mFx != Ifx_None) {
+               /* This dirty helper accesses memory.  Collect the details. */
+               tl_assert(d->mAddr != NULL);
+               tl_assert(d->mSize != 0);
+               dataSize = d->mSize;
+               // Large (eg. 28B, 108B, 512B on x86) data-sized
+               // instructions will be done inaccurately, but they're
+               // very rare and this avoids errors from hitting more
+               // than two cache lines in the simulation.
+               if (dataSize > MIN_LINE_SIZE)
+                  dataSize = MIN_LINE_SIZE;
+               if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
+                  addEvent_Dr( &cgs, curr_inode, dataSize, d->mAddr );
+               if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
+                  addEvent_Dw( &cgs, curr_inode, dataSize, d->mAddr );
+            } else {
+               tl_assert(d->mAddr == NULL);
+               tl_assert(d->mSize == 0);
+            }
+            break;
+         }
+
+         case Ist_Exit: {
+            /* Stuff to widen the guard expression to a host word, so
+               we can pass it to the branch predictor simulation
+               functions easily. */
+            Bool     inverted;
+            Addr64   nia, sea;
+            IRConst* dst;
+            IROp     tyW    = hWordTy;
+            IROp     widen  = tyW==Ity_I32  ? Iop_1Uto32  : Iop_1Uto64;
+            IROp     opXOR  = tyW==Ity_I32  ? Iop_Xor32   : Iop_Xor64;
+            IRTemp   guard1 = newIRTemp(cgs.sbOut->tyenv, Ity_I1);
+            IRTemp   guardW = newIRTemp(cgs.sbOut->tyenv, tyW);
+            IRTemp   guard  = newIRTemp(cgs.sbOut->tyenv, tyW);
+            IRExpr*  one    = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
+                                           : IRExpr_Const(IRConst_U64(1));
+
+            /* First we need to figure out whether the side exit got
+               inverted by the ir optimiser.  To do that, figure out
+               the next (fallthrough) instruction's address and the
+               side exit address and see if they are the same. */
+            nia = cia + (Addr64)isize;
+            if (tyW == Ity_I32) 
+               nia &= 0xFFFFFFFFULL;
+
+            /* Side exit address */
+            dst = st->Ist.Exit.dst;
+            if (tyW == Ity_I32) {
+               tl_assert(dst->tag == Ico_U32);
+               sea = (Addr64)(UInt)dst->Ico.U32;
+            } else {
+               tl_assert(tyW == Ity_I64);
+               tl_assert(dst->tag == Ico_U64);
+               sea = dst->Ico.U64;
+            }
+
+            inverted = nia == sea;
+
+            /* Widen the guard expression. */
+            addStmtToIRSB( cgs.sbOut, 
+                           IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
+            addStmtToIRSB( cgs.sbOut,
+                           IRStmt_WrTmp( guardW,
+                                         IRExpr_Unop(widen, 
+                                                     IRExpr_RdTmp(guard1))) );
+            /* If the exit is inverted, invert the sense of the guard. */
+            addStmtToIRSB( 
+               cgs.sbOut,
+               IRStmt_WrTmp( 
+                  guard,
+                  inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
+                           : IRExpr_RdTmp(guardW) 
+               ));
+            /* And post the event. */
+            addEvent_Bc( &cgs, curr_inode, IRExpr_RdTmp(guard) );
+
+            /* We may never reach the next statement, so need to flush
+               all outstanding transactions now. */
+            flushEvents( &cgs );
+            break;
+         }
+
+         default:
+            tl_assert(0);
+            break;
+      }
+
+      /* Copy the original statement */
+      addStmtToIRSB( cgs.sbOut, st );
+
+      if (DEBUG_CG) {
+         ppIRStmt(st);
+         VG_(printf)("\n");
+      }
+   }
+
+   /* Deal with branches to unknown destinations.  Except ignore ones
+      which are function returns as we assume the return stack
+      predictor never mispredicts. */
+   if (sbIn->jumpkind == Ijk_Boring) {
+      if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
+      switch (sbIn->next->tag) {
+         case Iex_Const: 
+            break; /* boring - branch to known address */
+         case Iex_RdTmp: 
+            /* looks like an indirect branch (branch to unknown) */
+            addEvent_Bi( &cgs, curr_inode, sbIn->next );
+            break;
+         default:
+            /* shouldn't happen - if the incoming IR is properly
+               flattened, should only have tmp and const cases to
+               consider. */
+            tl_assert(0); 
+      }
+   }
+
+   /* At the end of the bb.  Flush outstandings. */
+   flushEvents( &cgs );
+
+   /* done.  stay sane ... */
+   tl_assert(cgs.sbInfo_i == cgs.sbInfo->n_instrs);
+
+   if (DEBUG_CG) {
+      VG_(printf)( "goto {");
+      ppIRJumpKind(sbIn->jumpkind);
+      VG_(printf)( "} ");
+      ppIRExpr( sbIn->next );
+      VG_(printf)( "}\n");
+   }
+
+   return cgs.sbOut;
+}
+
+/*------------------------------------------------------------*/
+/*--- Cache configuration                                  ---*/
+/*------------------------------------------------------------*/
+
+#define UNDEFINED_CACHE     { -1, -1, -1 }
+
+static cache_t clo_I1_cache = UNDEFINED_CACHE;
+static cache_t clo_D1_cache = UNDEFINED_CACHE;
+static cache_t clo_L2_cache = UNDEFINED_CACHE;
+
+/* Checks cache config is ok;  makes it so if not. */
+static 
+void check_cache(cache_t* cache, Char *name)
+{
+   /* Simulator requires line size and set count to be powers of two */
+   if (( cache->size % (cache->line_size * cache->assoc) != 0) ||
+       (-1 == VG_(log2)(cache->size/cache->line_size/cache->assoc))) {
+      VG_UMSG("error: %s set count not a power of two; aborting.", name);
+      VG_(exit)(1);
+   }
+
+   if (-1 == VG_(log2)(cache->line_size)) {
+      VG_UMSG("error: %s line size of %dB not a power of two; aborting.",
+              name, cache->line_size);
+      VG_(exit)(1);
+   }
+
+   // Then check line size >= 16 -- any smaller and a single instruction could
+   // straddle three cache lines, which breaks a simulation assertion and is
+   // stupid anyway.
+   if (cache->line_size < MIN_LINE_SIZE) {
+      VG_UMSG("error: %s line size of %dB too small; aborting.", 
+              name, cache->line_size);
+      VG_(exit)(1);
+   }
+
+   /* Then check cache size > line size (causes seg faults if not). */
+   if (cache->size <= cache->line_size) {
+      VG_UMSG("error: %s cache size of %dB <= line size of %dB; aborting.",
+              name, cache->size, cache->line_size);
+      VG_(exit)(1);
+   }
+
+   /* Then check assoc <= (size / line size) (seg faults otherwise). */
+   if (cache->assoc > (cache->size / cache->line_size)) {
+      VG_UMSG("warning: %s associativity > (size / line size); aborting.",
+              name);
+      VG_(exit)(1);
+   }
+}
+
+static 
+void configure_caches(cache_t* I1c, cache_t* D1c, cache_t* L2c)
+{
+#define DEFINED(L)   (-1 != L.size  || -1 != L.assoc || -1 != L.line_size)
+
+   Int n_clos = 0;
+
+   // Count how many were defined on the command line.
+   if (DEFINED(clo_I1_cache)) { n_clos++; }
+   if (DEFINED(clo_D1_cache)) { n_clos++; }
+   if (DEFINED(clo_L2_cache)) { n_clos++; }
+
+   // Set the cache config (using auto-detection, if supported by the
+   // architecture)
+   VG_(configure_caches)( I1c, D1c, L2c, (3 == n_clos) );
+
+   // Then replace with any defined on the command line.
+   if (DEFINED(clo_I1_cache)) { *I1c = clo_I1_cache; }
+   if (DEFINED(clo_D1_cache)) { *D1c = clo_D1_cache; }
+   if (DEFINED(clo_L2_cache)) { *L2c = clo_L2_cache; }
+
+   // Then check values and fix if not acceptable.
+   check_cache(I1c, "I1");
+   check_cache(D1c, "D1");
+   check_cache(L2c, "L2");
+
+   if (VG_(clo_verbosity) >= 2) {
+      VG_UMSG("Cache configuration used:");
+      VG_UMSG("  I1: %dB, %d-way, %dB lines",
+              I1c->size, I1c->assoc, I1c->line_size);
+      VG_UMSG("  D1: %dB, %d-way, %dB lines",
+              D1c->size, D1c->assoc, D1c->line_size);
+      VG_UMSG("  L2: %dB, %d-way, %dB lines",
+              L2c->size, L2c->assoc, L2c->line_size);
+   }
+#undef CMD_LINE_DEFINED
+}
+
+/*------------------------------------------------------------*/
+/*--- cg_fini() and related function                       ---*/
+/*------------------------------------------------------------*/
+
+// Total reads/writes/misses.  Calculated during CC traversal at the end.
+// All auto-zeroed.
+static CacheCC  Ir_total;
+static CacheCC  Dr_total;
+static CacheCC  Dw_total;
+static BranchCC Bc_total;
+static BranchCC Bi_total;
+
+static void fprint_CC_table_and_calc_totals(void)
+{
+   Int     i, fd;
+   SysRes  sres;
+   Char    buf[512], *currFile = NULL, *currFn = NULL;
+   LineCC* lineCC;
+
+   // Setup output filename.  Nb: it's important to do this now, ie. as late
+   // as possible.  If we do it at start-up and the program forks and the
+   // output file format string contains a %p (pid) specifier, both the
+   // parent and child will incorrectly write to the same file;  this
+   // happened in 3.3.0.
+   Char* cachegrind_out_file =
+      VG_(expand_file_name)("--cachegrind-out-file", clo_cachegrind_out_file);
+
+   sres = VG_(open)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
+                                         VKI_S_IRUSR|VKI_S_IWUSR);
+   if (sres.isError) {
+      // If the file can't be opened for whatever reason (conflict
+      // between multiple cachegrinded processes?), give up now.
+      VG_UMSG("error: can't open cache simulation output file '%s'",
+              cachegrind_out_file );
+      VG_UMSG("       ... so simulation results will be missing.");
+      VG_(free)(cachegrind_out_file);
+      return;
+   } else {
+      fd = sres.res;
+      VG_(free)(cachegrind_out_file);
+   }
+
+   // "desc:" lines (giving I1/D1/L2 cache configuration).  The spaces after
+   // the 2nd colon makes cg_annotate's output look nicer.
+   VG_(sprintf)(buf, "desc: I1 cache:         %s\n"
+                     "desc: D1 cache:         %s\n"
+                     "desc: L2 cache:         %s\n",
+                     I1.desc_line, D1.desc_line, L2.desc_line);
+   VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+
+   // "cmd:" line
+   VG_(strcpy)(buf, "cmd:");
+   VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+   if (VG_(args_the_exename)) {
+      VG_(write)(fd, " ", 1);
+      VG_(write)(fd, VG_(args_the_exename), 
+                     VG_(strlen)( VG_(args_the_exename) ));
+   }
+   for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) {
+      HChar* arg = * (HChar**) VG_(indexXA)( VG_(args_for_client), i );
+      if (arg) {
+         VG_(write)(fd, " ", 1);
+         VG_(write)(fd, arg, VG_(strlen)( arg ));
+      }
+   }
+   // "events:" line
+   if (clo_cache_sim && clo_branch_sim) {
+      VG_(sprintf)(buf, "\nevents: Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw "
+                                  "Bc Bcm Bi Bim\n");
+   }
+   else if (clo_cache_sim && !clo_branch_sim) {
+      VG_(sprintf)(buf, "\nevents: Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw "
+                                  "\n");
+   }
+   else if (!clo_cache_sim && clo_branch_sim) {
+      VG_(sprintf)(buf, "\nevents: Ir "
+                                  "Bc Bcm Bi Bim\n");
+   }
+   else
+      tl_assert(0); /* can't happen */
+
+   VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+
+   // Traverse every lineCC
+   VG_(OSetGen_ResetIter)(CC_table);
+   while ( (lineCC = VG_(OSetGen_Next)(CC_table)) ) {
+      Bool just_hit_a_new_file = False;
+      // If we've hit a new file, print a "fl=" line.  Note that because
+      // each string is stored exactly once in the string table, we can use
+      // pointer comparison rather than strcmp() to test for equality, which
+      // is good because most of the time the comparisons are equal and so
+      // the whole strings would have to be checked.
+      if ( lineCC->loc.file != currFile ) {
+         currFile = lineCC->loc.file;
+         VG_(sprintf)(buf, "fl=%s\n", currFile);
+         VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+         distinct_files++;
+         just_hit_a_new_file = True;
+      }
+      // If we've hit a new function, print a "fn=" line.  We know to do
+      // this when the function name changes, and also every time we hit a
+      // new file (in which case the new function name might be the same as
+      // in the old file, hence the just_hit_a_new_file test).
+      if ( just_hit_a_new_file || lineCC->loc.fn != currFn ) {
+         currFn = lineCC->loc.fn;
+         VG_(sprintf)(buf, "fn=%s\n", currFn);
+         VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+         distinct_fns++;
+      }
+
+      // Print the LineCC
+      if (clo_cache_sim && clo_branch_sim) {
+         VG_(sprintf)(buf, "%u %llu %llu %llu"
+                             " %llu %llu %llu"
+                             " %llu %llu %llu"
+                             " %llu %llu %llu %llu\n",
+                            lineCC->loc.line,
+                            lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.m2, 
+                            lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.m2,
+                            lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.m2,
+                            lineCC->Bc.b, lineCC->Bc.mp, 
+                            lineCC->Bi.b, lineCC->Bi.mp);
+      }
+      else if (clo_cache_sim && !clo_branch_sim) {
+         VG_(sprintf)(buf, "%u %llu %llu %llu"
+                             " %llu %llu %llu"
+                             " %llu %llu %llu\n",
+                            lineCC->loc.line,
+                            lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.m2, 
+                            lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.m2,
+                            lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.m2);
+      }
+      else if (!clo_cache_sim && clo_branch_sim) {
+         VG_(sprintf)(buf, "%u %llu"
+                             " %llu %llu %llu %llu\n",
+                            lineCC->loc.line,
+                            lineCC->Ir.a, 
+                            lineCC->Bc.b, lineCC->Bc.mp, 
+                            lineCC->Bi.b, lineCC->Bi.mp);
+      }
+      else
+         tl_assert(0);
+
+      VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+
+      // Update summary stats
+      Ir_total.a  += lineCC->Ir.a;
+      Ir_total.m1 += lineCC->Ir.m1;
+      Ir_total.m2 += lineCC->Ir.m2;
+      Dr_total.a  += lineCC->Dr.a;
+      Dr_total.m1 += lineCC->Dr.m1;
+      Dr_total.m2 += lineCC->Dr.m2;
+      Dw_total.a  += lineCC->Dw.a;
+      Dw_total.m1 += lineCC->Dw.m1;
+      Dw_total.m2 += lineCC->Dw.m2;
+      Bc_total.b  += lineCC->Bc.b;
+      Bc_total.mp += lineCC->Bc.mp;
+      Bi_total.b  += lineCC->Bi.b;
+      Bi_total.mp += lineCC->Bi.mp;
+
+      distinct_lines++;
+   }
+
+   // Summary stats must come after rest of table, since we calculate them
+   // during traversal.  */
+   if (clo_cache_sim && clo_branch_sim) {
+      VG_(sprintf)(buf, "summary:"
+                        " %llu %llu %llu"
+                        " %llu %llu %llu"
+                        " %llu %llu %llu"
+                        " %llu %llu %llu %llu\n", 
+                        Ir_total.a, Ir_total.m1, Ir_total.m2,
+                        Dr_total.a, Dr_total.m1, Dr_total.m2,
+                        Dw_total.a, Dw_total.m1, Dw_total.m2,
+                        Bc_total.b, Bc_total.mp, 
+                        Bi_total.b, Bi_total.mp);
+   }
+   else if (clo_cache_sim && !clo_branch_sim) {
+      VG_(sprintf)(buf, "summary:"
+                        " %llu %llu %llu"
+                        " %llu %llu %llu"
+                        " %llu %llu %llu\n",
+                        Ir_total.a, Ir_total.m1, Ir_total.m2,
+                        Dr_total.a, Dr_total.m1, Dr_total.m2,
+                        Dw_total.a, Dw_total.m1, Dw_total.m2);
+   }
+   else if (!clo_cache_sim && clo_branch_sim) {
+      VG_(sprintf)(buf, "summary:"
+                        " %llu"
+                        " %llu %llu %llu %llu\n", 
+                        Ir_total.a,
+                        Bc_total.b, Bc_total.mp, 
+                        Bi_total.b, Bi_total.mp);
+   }
+   else
+      tl_assert(0);
+
+   VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+   VG_(close)(fd);
+}
+
+static UInt ULong_width(ULong n)
+{
+   UInt w = 0;
+   while (n > 0) {
+      n = n / 10;
+      w++;
+   }
+   if (w == 0) w = 1;
+   return w + (w-1)/3;   // add space for commas
+}
+
+static void cg_fini(Int exitcode)
+{
+   static Char buf1[128], buf2[128], buf3[128], buf4[123], fmt[128];
+
+   CacheCC  D_total;
+   BranchCC B_total;
+   ULong L2_total_m, L2_total_mr, L2_total_mw,
+         L2_total, L2_total_r, L2_total_w;
+   Int l1, l2, l3;
+
+   /* Running with both cache and branch simulation disabled is not
+      allowed (checked during command line option processing). */
+   tl_assert(clo_cache_sim || clo_branch_sim);
+
+   fprint_CC_table_and_calc_totals();
+
+   if (VG_(clo_verbosity) == 0) 
+      return;
+
+   #define MAX(a, b)  ((a) >= (b) ? (a) : (b))
+
+   /* I cache results.  Use the I_refs value to determine the first column
+    * width. */
+   l1 = ULong_width(Ir_total.a);
+   l2 = ULong_width(MAX(Dr_total.a, Bc_total.b));
+   l3 = ULong_width(MAX(Dw_total.a, Bi_total.b));
+
+   /* Make format string, getting width right for numbers */
+   VG_(sprintf)(fmt, "%%s %%,%dllu", l1);
+
+   /* Always print this */
+   VG_UMSG(fmt, "I   refs:     ", Ir_total.a);
+
+   /* If cache profiling is enabled, show D access numbers and all
+      miss numbers */
+   if (clo_cache_sim) {
+      VG_UMSG(fmt, "I1  misses:   ", Ir_total.m1);
+      VG_UMSG(fmt, "L2i misses:   ", Ir_total.m2);
+
+      if (0 == Ir_total.a) Ir_total.a = 1;
+      VG_(percentify)(Ir_total.m1, Ir_total.a, 2, l1+1, buf1);
+      VG_UMSG("I1  miss rate: %s", buf1);
+
+      VG_(percentify)(Ir_total.m2, Ir_total.a, 2, l1+1, buf1);
+      VG_UMSG("L2i miss rate: %s", buf1);
+      VG_UMSG("");
+
+      /* D cache results.  Use the D_refs.rd and D_refs.wr values to
+       * determine the width of columns 2 & 3. */
+      D_total.a  = Dr_total.a  + Dw_total.a;
+      D_total.m1 = Dr_total.m1 + Dw_total.m1;
+      D_total.m2 = Dr_total.m2 + Dw_total.m2;
+
+      /* Make format string, getting width right for numbers */
+      VG_(sprintf)(fmt, "%%s %%,%dllu  (%%,%dllu rd   + %%,%dllu wr)", l1, l2, l3);
+
+      VG_UMSG(fmt, "D   refs:     ", 
+                   D_total.a, Dr_total.a, Dw_total.a);
+      VG_UMSG(fmt, "D1  misses:   ",
+                   D_total.m1, Dr_total.m1, Dw_total.m1);
+      VG_UMSG(fmt, "L2d misses:   ",
+                   D_total.m2, Dr_total.m2, Dw_total.m2);
+
+      if (0 == D_total.a)  D_total.a = 1;
+      if (0 == Dr_total.a) Dr_total.a = 1;
+      if (0 == Dw_total.a) Dw_total.a = 1;
+      VG_(percentify)( D_total.m1,  D_total.a, 1, l1+1, buf1);
+      VG_(percentify)(Dr_total.m1, Dr_total.a, 1, l2+1, buf2);
+      VG_(percentify)(Dw_total.m1, Dw_total.a, 1, l3+1, buf3);
+      VG_UMSG("D1  miss rate: %s (%s     + %s  )", buf1, buf2,buf3);
+
+      VG_(percentify)( D_total.m2,  D_total.a, 1, l1+1, buf1);
+      VG_(percentify)(Dr_total.m2, Dr_total.a, 1, l2+1, buf2);
+      VG_(percentify)(Dw_total.m2, Dw_total.a, 1, l3+1, buf3);
+      VG_UMSG("L2d miss rate: %s (%s     + %s  )", buf1, buf2,buf3);
+      VG_UMSG("");
+
+      /* L2 overall results */
+
+      L2_total   = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
+      L2_total_r = Dr_total.m1 + Ir_total.m1;
+      L2_total_w = Dw_total.m1;
+      VG_UMSG(fmt, "L2 refs:      ",
+                   L2_total, L2_total_r, L2_total_w);
+
+      L2_total_m  = Dr_total.m2 + Dw_total.m2 + Ir_total.m2;
+      L2_total_mr = Dr_total.m2 + Ir_total.m2;
+      L2_total_mw = Dw_total.m2;
+      VG_UMSG(fmt, "L2 misses:    ",
+                   L2_total_m, L2_total_mr, L2_total_mw);
+
+      VG_(percentify)(L2_total_m,  (Ir_total.a + D_total.a),  1, l1+1, buf1);
+      VG_(percentify)(L2_total_mr, (Ir_total.a + Dr_total.a), 1, l2+1, buf2);
+      VG_(percentify)(L2_total_mw, Dw_total.a,                1, l3+1, buf3);
+      VG_UMSG("L2 miss rate:  %s (%s     + %s  )", buf1, buf2,buf3);
+   }
+
+   /* If branch profiling is enabled, show branch overall results. */
+   if (clo_branch_sim) {
+      /* Make format string, getting width right for numbers */
+      VG_(sprintf)(fmt, "%%s %%,%dllu  (%%,%dllu cond + %%,%dllu ind)", l1, l2, l3);
+
+      if (0 == Bc_total.b)  Bc_total.b = 1;
+      if (0 == Bi_total.b)  Bi_total.b = 1;
+      B_total.b  = Bc_total.b  + Bi_total.b;
+      B_total.mp = Bc_total.mp + Bi_total.mp;
+
+      VG_UMSG("");
+      VG_UMSG(fmt, "Branches:     ",
+                   B_total.b, Bc_total.b, Bi_total.b);
+
+      VG_UMSG(fmt, "Mispredicts:  ",
+                   B_total.mp, Bc_total.mp, Bi_total.mp);
+
+      VG_(percentify)(B_total.mp,  B_total.b,  1, l1+1, buf1);
+      VG_(percentify)(Bc_total.mp, Bc_total.b, 1, l2+1, buf2);
+      VG_(percentify)(Bi_total.mp, Bi_total.b, 1, l3+1, buf3);
+
+      VG_UMSG("Mispred rate:  %s (%s     + %s   )", buf1, buf2,buf3);
+   }
+
+   // Various stats
+   if (VG_(clo_verbosity) > 1) {
+      Int debug_lookups = full_debugs      + fn_debugs +
+                          file_line_debugs + no_debugs;
+
+      VG_DMSG("");
+      VG_DMSG("cachegrind: distinct files: %d", distinct_files);
+      VG_DMSG("cachegrind: distinct fns:   %d", distinct_fns);
+      VG_DMSG("cachegrind: distinct lines: %d", distinct_lines);
+      VG_DMSG("cachegrind: distinct instrs:%d", distinct_instrs);
+      VG_DMSG("cachegrind: debug lookups      : %d", debug_lookups);
+      
+      VG_(percentify)(full_debugs,      debug_lookups, 1, 6, buf1);
+      VG_(percentify)(file_line_debugs, debug_lookups, 1, 6, buf2);
+      VG_(percentify)(fn_debugs,        debug_lookups, 1, 6, buf3);
+      VG_(percentify)(no_debugs,        debug_lookups, 1, 6, buf4);
+      VG_DMSG("cachegrind: with full      info:%s (%d)", 
+              buf1, full_debugs);
+      VG_DMSG("cachegrind: with file/line info:%s (%d)", 
+              buf2, file_line_debugs);
+      VG_DMSG("cachegrind: with fn name   info:%s (%d)", 
+              buf3, fn_debugs);
+      VG_DMSG("cachegrind: with zero      info:%s (%d)", 
+              buf4, no_debugs);
+
+      VG_DMSG("cachegrind: string table size: %lu",
+              VG_(OSetGen_Size)(stringTable));
+      VG_DMSG("cachegrind: CC table size: %lu",
+              VG_(OSetGen_Size)(CC_table));
+      VG_DMSG("cachegrind: InstrInfo table size: %lu",
+              VG_(OSetGen_Size)(instrInfoTable));
+   }
+}
+
+/*--------------------------------------------------------------------*/
+/*--- Discarding BB info                                           ---*/
+/*--------------------------------------------------------------------*/
+
+// Called when a translation is removed from the translation cache for
+// any reason at all: to free up space, because the guest code was
+// unmapped or modified, or for any arbitrary reason.
+static
+void cg_discard_superblock_info ( Addr64 orig_addr64, VexGuestExtents vge )
+{
+   SB_info* sbInfo;
+   Addr     orig_addr = (Addr)vge.base[0];
+
+   tl_assert(vge.n_used > 0);
+
+   if (DEBUG_CG)
+      VG_(printf)( "discard_basic_block_info: %p, %p, %llu\n", 
+                   (void*)(Addr)orig_addr,
+                   (void*)(Addr)vge.base[0], (ULong)vge.len[0]);
+
+   // Get BB info, remove from table, free BB info.  Simple!  Note that we
+   // use orig_addr, not the first instruction address in vge.
+   sbInfo = VG_(OSetGen_Remove)(instrInfoTable, &orig_addr);
+   tl_assert(NULL != sbInfo);
+   VG_(OSetGen_FreeNode)(instrInfoTable, sbInfo);
+}
+
+/*--------------------------------------------------------------------*/
+/*--- Command line processing                                      ---*/
+/*--------------------------------------------------------------------*/
+
+static void parse_cache_opt ( cache_t* cache, Char* opt )
+{
+   Long i1, i2, i3;
+   Char* endptr;
+
+   // Option argument looks like "65536,2,64".  Extract them.
+   i1 = VG_(strtoll10)(opt,      &endptr); if (*endptr != ',')  goto bad;
+   i2 = VG_(strtoll10)(endptr+1, &endptr); if (*endptr != ',')  goto bad;
+   i3 = VG_(strtoll10)(endptr+1, &endptr); if (*endptr != '\0') goto bad;
+
+   // Check for overflow.
+   cache->size      = (Int)i1;
+   cache->assoc     = (Int)i2;
+   cache->line_size = (Int)i3;
+   if (cache->size      != i1) goto overflow;
+   if (cache->assoc     != i2) goto overflow;
+   if (cache->line_size != i3) goto overflow;
+
+   return;
+
+  overflow:
+   VG_UMSG("one of the cache parameters was too large and overflowed\n");
+  bad:
+   // XXX: this omits the "--I1/D1/L2=" part from the message, but that's
+   // not a big deal.
+   VG_(err_bad_option)(opt);
+}
+
+static Bool cg_process_cmd_line_option(Char* arg)
+{
+   Char* tmp_str;
+
+   // 5 is length of "--I1="
+   if      VG_STR_CLO(arg, "--I1", tmp_str)
+      parse_cache_opt(&clo_I1_cache, tmp_str);
+   else if VG_STR_CLO(arg, "--D1", tmp_str)
+      parse_cache_opt(&clo_D1_cache, tmp_str);
+   else if VG_STR_CLO(arg, "--L2", tmp_str)
+      parse_cache_opt(&clo_L2_cache, tmp_str);
+
+   else if VG_STR_CLO( arg, "--cachegrind-out-file", clo_cachegrind_out_file) {}
+   else if VG_BOOL_CLO(arg, "--cache-sim",  clo_cache_sim)  {}
+   else if VG_BOOL_CLO(arg, "--branch-sim", clo_branch_sim) {}
+   else
+      return False;
+
+   return True;
+}
+
+static void cg_print_usage(void)
+{
+   VG_(printf)(
+"    --I1=<size>,<assoc>,<line_size>  set I1 cache manually\n"
+"    --D1=<size>,<assoc>,<line_size>  set D1 cache manually\n"
+"    --L2=<size>,<assoc>,<line_size>  set L2 cache manually\n"
+"    --cache-sim=yes|no  [yes]        collect cache stats?\n"
+"    --branch-sim=yes|no [no]         collect branch prediction stats?\n"
+"    --cachegrind-out-file=<file>     output file name [cachegrind.out.%%p]\n"
+   );
+}
+
+static void cg_print_debug_usage(void)
+{
+   VG_(printf)(
+"    (none)\n"
+   );
+}
+
+/*--------------------------------------------------------------------*/
+/*--- Setup                                                        ---*/
+/*--------------------------------------------------------------------*/
+
+static void cg_post_clo_init(void); /* just below */
+
+static void cg_pre_clo_init(void)
+{
+   VG_(details_name)            ("Cachegrind");
+   VG_(details_version)         (NULL);
+   VG_(details_description)     ("a cache and branch-prediction profiler");
+   VG_(details_copyright_author)(
+      "Copyright (C) 2002-2009, and GNU GPL'd, by Nicholas Nethercote et al.");
+   VG_(details_bug_reports_to)  (VG_BUGS_TO);
+   VG_(details_avg_translation_sizeB) ( 500 );
+
+   VG_(basic_tool_funcs)          (cg_post_clo_init,
+                                   cg_instrument,
+                                   cg_fini);
+
+   VG_(needs_superblock_discards)(cg_discard_superblock_info);
+   VG_(needs_command_line_options)(cg_process_cmd_line_option,
+                                   cg_print_usage,
+                                   cg_print_debug_usage);
+}
+
+static void cg_post_clo_init(void)
+{
+   cache_t I1c, D1c, L2c; 
+
+   /* Can't disable both cache and branch profiling */
+   if ((!clo_cache_sim) && (!clo_branch_sim)) {
+      VG_UMSG("ERROR: --cache-sim=no --branch-sim=no is not allowed.");
+      VG_UMSG("You must select cache profiling, or branch profiling, or both.");
+      VG_(exit)(2);
+   }
+
+   CC_table =
+      VG_(OSetGen_Create)(offsetof(LineCC, loc),
+                          cmp_CodeLoc_LineCC,
+                          VG_(malloc), "cg.main.cpci.1",
+                          VG_(free));
+   instrInfoTable =
+      VG_(OSetGen_Create)(/*keyOff*/0,
+                          NULL,
+                          VG_(malloc), "cg.main.cpci.2",
+                          VG_(free));
+   stringTable =
+      VG_(OSetGen_Create)(/*keyOff*/0,
+                          stringCmp,
+                          VG_(malloc), "cg.main.cpci.3",
+                          VG_(free));
+
+   configure_caches(&I1c, &D1c, &L2c);
+
+   cachesim_I1_initcache(I1c);
+   cachesim_D1_initcache(D1c);
+   cachesim_L2_initcache(L2c);
+}
+
+VG_DETERMINE_INTERFACE_VERSION(cg_pre_clo_init)
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
+
diff --git a/cachegrind/.svn/text-base/cg_merge.c.svn-base b/cachegrind/.svn/text-base/cg_merge.c.svn-base
new file mode 100644
index 0000000..1d8ad41
--- /dev/null
+++ b/cachegrind/.svn/text-base/cg_merge.c.svn-base
@@ -0,0 +1,1571 @@
+
+/*--------------------------------------------------------------------*/
+/*--- A program that merges multiple cachegrind output files.      ---*/
+/*---                                                   cg_merge.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+  This file is part of Cachegrind, a Valgrind tool for cache
+  profiling programs.
+
+  Copyright (C) 2002-2009 Nicholas Nethercote
+     njn@valgrind.org
+
+  AVL tree code derived from
+  ANSI C Library for maintainance of AVL Balanced Trees
+  (C) 2000 Daniel Nagy, Budapest University of Technology and Economics
+  Released under GNU General Public License (GPL) version 2
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of the
+  License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+  02111-1307, USA.
+
+  The GNU General Public License is contained in the file COPYING.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include <ctype.h>
+
+typedef  signed long   Word;
+typedef  unsigned long UWord;
+typedef  unsigned char Bool;
+#define True ((Bool)1)
+#define False ((Bool)0)
+typedef  signed int    Int;
+typedef  unsigned int  UInt;
+typedef  unsigned long long int ULong;
+typedef  signed char   Char;
+typedef  size_t        SizeT;
+
+
+//------------------------------------------------------------------//
+//---                           WordFM                           ---//
+//---                      Public interface                      ---//
+//------------------------------------------------------------------//
+
+typedef  struct _WordFM  WordFM; /* opaque */
+
+/* Initialise a WordFM */
+void initFM ( WordFM* t, 
+              void*   (*alloc_nofail)( SizeT ),
+              void    (*dealloc)(void*),
+              Word    (*kCmp)(Word,Word) );
+
+/* Allocate and initialise a WordFM */
+WordFM* newFM( void* (*alloc_nofail)( SizeT ),
+               void  (*dealloc)(void*),
+               Word  (*kCmp)(Word,Word) );
+
+/* Free up the FM.  If kFin is non-NULL, it is applied to keys
+   before the FM is deleted; ditto with vFin for vals. */
+void deleteFM ( WordFM*, void(*kFin)(Word), void(*vFin)(Word) );
+
+/* Add (k,v) to fm.  If a binding for k already exists, it is updated
+   to map to this new v.  In that case we should really return the
+   previous v so that caller can finalise it.  Oh well. */
+void addToFM ( WordFM* fm, Word k, Word v );
+
+// Delete key from fm, returning associated val if found
+Bool delFromFM ( WordFM* fm, /*OUT*/Word* oldV, Word key );
+
+// Look up in fm, assigning found val at spec'd address
+Bool lookupFM ( WordFM* fm, /*OUT*/Word* valP, Word key );
+
+Word sizeFM ( WordFM* fm );
+
+// set up FM for iteration
+void initIterFM ( WordFM* fm );
+
+// get next key/val pair.  Will assert if fm has been modified
+// or looked up in since initIterFM was called.
+Bool nextIterFM ( WordFM* fm, /*OUT*/Word* pKey, /*OUT*/Word* pVal );
+
+// clear the I'm iterating flag
+void doneIterFM ( WordFM* fm );
+
+// Deep copy a FM.  If dopyK is NULL, keys are copied verbatim.
+// If non-null, dopyK is applied to each key to generate the
+// version in the new copy.  In that case, if the argument to dopyK
+// is non-NULL but the result is NULL, it is assumed that dopyK
+// could not allocate memory, in which case the copy is abandoned
+// and NULL is returned.  Ditto with dopyV for values.
+WordFM* dopyFM ( WordFM* fm, Word(*dopyK)(Word), Word(*dopyV)(Word) );
+
+//------------------------------------------------------------------//
+//---                         end WordFM                         ---//
+//---                      Public interface                      ---//
+//------------------------------------------------------------------//
+
+
+static char* argv0 = "cg_merge";
+
+/* Keep track of source filename/line no so as to be able to
+   print decent error messages. */
+typedef
+   struct {
+      FILE* fp;
+      UInt  lno;
+      char* filename;
+   }
+   SOURCE;
+
+static void printSrcLoc ( SOURCE* s )
+{
+   fprintf(stderr, "%s: near %s line %u\n", argv0, s->filename, s->lno-1);
+}
+
+__attribute__((noreturn))
+static void mallocFail ( SOURCE* s, char* who )
+{
+   fprintf(stderr, "%s: out of memory in %s\n", argv0, who );
+   printSrcLoc( s );
+   exit(2);
+}
+
+__attribute__((noreturn))
+static void parseError ( SOURCE* s, char* msg )
+{
+   fprintf(stderr, "%s: parse error: %s\n", argv0, msg );
+   printSrcLoc( s );
+   exit(1);
+}
+
+__attribute__((noreturn))
+static void barf ( SOURCE* s, char* msg )
+{
+   fprintf(stderr, "%s: %s\n", argv0, msg );
+   printSrcLoc( s );
+   exit(1);
+}
+
+// Read a line
+#define M_LINEBUF 40960
+static char line[M_LINEBUF];
+
+// True if anything read, False if at EOF
+static Bool readline ( SOURCE* s )
+{
+   int ch, i = 0;
+   line[0] = 0;
+   while (1) {
+      if (i >= M_LINEBUF-10)
+         parseError(s, "Unexpected long line in input file");
+      ch = getc(s->fp);
+      if (ch != EOF) {
+          line[i++] = ch;
+          line[i] = 0;
+          if (ch == '\n') {
+             line[i-1] = 0;
+             s->lno++;
+             break;
+          }
+      } else {
+         if (ferror(s->fp)) {
+            perror(argv0);
+            barf(s, "I/O error while reading input file");
+         } else {
+            // hit EOF
+            break;
+         }
+      }
+   }
+   return line[0] != 0;
+}
+
+static Bool streqn ( char* s1, char* s2, size_t n )
+{
+   return 0 == strncmp(s1, s2, n);
+}
+
+static Bool streq ( char* s1, char* s2 )
+{
+   return 0 == strcmp(s1, s2 );
+}
+
+
+////////////////////////////////////////////////////////////////
+
+typedef
+   struct {
+      char* fi_name;
+      char* fn_name;
+   }
+   FileFn;
+
+typedef
+   struct {
+      Int n_counts;
+      ULong* counts;
+   }
+   Counts;
+
+typedef
+   struct {
+      // null-terminated vector of desc_lines
+      char** desc_lines;
+
+      // Cmd line
+      char* cmd_line;
+
+      // Events line
+      char* events_line;
+      Int   n_events;
+
+      // Summary line (copied from input)
+      char* summary_line;
+
+      /* Outermost map is
+            WordFM FileFn* innerMap
+         where innerMap is   WordFM line-number=UWord Counts */
+      WordFM* outerMap;
+
+      // Summary counts (computed whilst parsing)
+      // should match .summary_line
+      Counts* summary;
+   }
+   CacheProfFile;
+
+static FileFn* new_FileFn ( char* file_name, char* fn_name )
+{
+   FileFn* ffn = malloc(sizeof(FileFn));
+   if (ffn == NULL)
+      return NULL;
+   ffn->fi_name = file_name;
+   ffn->fn_name = fn_name;
+   return ffn;
+}
+
+static void ddel_FileFn ( FileFn* ffn )
+{
+   if (ffn->fi_name)
+      free(ffn->fi_name);
+   if (ffn->fn_name)
+      free(ffn->fn_name);
+   memset(ffn, 0, sizeof(FileFn));
+   free(ffn);
+}
+
+static FileFn* dopy_FileFn ( FileFn* ff )
+{
+   char* fi2 = strdup(ff->fi_name);
+   char* fn2 = strdup(ff->fn_name);
+   if ((!fi2) || (!fn2))
+      return NULL;
+   return new_FileFn( fi2, fn2 );
+}
+
+static Counts* new_Counts ( Int n_counts, /*COPIED*/ULong* counts )
+{
+   Int i;
+   Counts* cts = malloc(sizeof(Counts));
+   if (cts == NULL)
+      return NULL;
+
+   assert(n_counts >= 0);
+   cts->counts = malloc(n_counts * sizeof(ULong));
+   if (cts->counts == NULL)
+      return NULL;
+
+   cts->n_counts = n_counts;
+   for (i = 0; i < n_counts; i++)
+      cts->counts[i] = counts[i];
+
+   return cts;
+}
+
+static Counts* new_Counts_Zeroed ( Int n_counts )
+{
+   Int i;
+   Counts* cts = malloc(sizeof(Counts));
+   if (cts == NULL)
+      return NULL;
+
+   assert(n_counts >= 0);
+   cts->counts = malloc(n_counts * sizeof(ULong));
+   if (cts->counts == NULL)
+      return NULL;
+
+   cts->n_counts = n_counts;
+   for (i = 0; i < n_counts; i++)
+      cts->counts[i] = 0;
+
+   return cts;
+}
+
+static void sdel_Counts ( Counts* cts )
+{
+   memset(cts, 0, sizeof(Counts));
+   free(cts);
+}
+
+static void ddel_Counts ( Counts* cts )
+{
+   if (cts->counts)
+      free(cts->counts);
+   memset(cts, 0, sizeof(Counts));
+   free(cts);
+}
+
+static Counts* dopy_Counts ( Counts* cts )
+{
+   return new_Counts( cts->n_counts, cts->counts );
+}
+
+static
+CacheProfFile* new_CacheProfFile ( char**  desc_lines,
+                                   char*   cmd_line,
+                                   char*   events_line,
+                                   Int     n_events,
+                                   char*   summary_line,
+                                   WordFM* outerMap,
+                                   Counts* summary )
+{
+   CacheProfFile* cpf = malloc(sizeof(CacheProfFile));
+   if (cpf == NULL)
+      return NULL;
+   cpf->desc_lines   = desc_lines;
+   cpf->cmd_line     = cmd_line;
+   cpf->events_line  = events_line;
+   cpf->n_events     = n_events;
+   cpf->summary_line = summary_line;
+   cpf->outerMap     = outerMap;
+   cpf->summary      = summary;
+   return cpf;
+}
+
+static WordFM* dopy_InnerMap ( WordFM* innerMap )
+{
+   return dopyFM ( innerMap, NULL,
+                             (Word(*)(Word))dopy_Counts );
+}
+
+static void ddel_InnerMap ( WordFM* innerMap )
+{
+   deleteFM( innerMap, NULL, (void(*)(Word))ddel_Counts );
+}
+
+static void ddel_CacheProfFile ( CacheProfFile* cpf )
+{
+   char** p;
+   if (cpf->desc_lines) {
+      for (p = cpf->desc_lines; *p; p++)
+         free(*p);
+      free(cpf->desc_lines);
+   }
+   if (cpf->cmd_line)
+      free(cpf->cmd_line);
+   if (cpf->events_line)
+      free(cpf->events_line);
+   if (cpf->summary_line)
+      free(cpf->summary_line);
+   if (cpf->outerMap)
+      deleteFM( cpf->outerMap, (void(*)(Word))ddel_FileFn, 
+                               (void(*)(Word))ddel_InnerMap );
+   if (cpf->summary)
+      ddel_Counts(cpf->summary);
+
+   memset(cpf, 0, sizeof(CacheProfFile));
+   free(cpf);
+}
+
+static void showCounts ( FILE* f, Counts* c )
+{
+   Int i;
+   for (i = 0; i < c->n_counts; i++) {
+      fprintf(f, "%lld ", c->counts[i]);
+   }
+}
+
+static void show_CacheProfFile ( FILE* f, CacheProfFile* cpf )
+{
+   Int     i;
+   char**  d;
+   FileFn* topKey;
+   WordFM* topVal;
+   UWord   subKey;
+   Counts* subVal;  
+
+   for (d = cpf->desc_lines; *d; d++)
+      fprintf(f, "%s\n", *d);
+   fprintf(f, "%s\n", cpf->cmd_line);
+   fprintf(f, "%s\n", cpf->events_line);
+
+   initIterFM( cpf->outerMap );
+   while (nextIterFM( cpf->outerMap, (Word*)(&topKey), (Word*)(&topVal) )) {
+      fprintf(f, "fl=%s\nfn=%s\n", 
+                 topKey->fi_name, topKey->fn_name );
+      initIterFM( topVal );
+      while (nextIterFM( topVal, (Word*)(&subKey), (Word*)(&subVal) )) {
+         fprintf(f, "%ld   ", subKey );
+         showCounts( f, subVal );
+         fprintf(f, "\n");
+      }
+      doneIterFM( topVal );
+   }
+   doneIterFM( cpf->outerMap );
+
+   //fprintf(f, "%s\n", cpf->summary_line);
+   fprintf(f, "summary:");
+   for (i = 0; i < cpf->summary->n_counts; i++)
+      fprintf(f, " %lld", cpf->summary->counts[i]);
+   fprintf(f, "\n");
+}
+
+////////////////////////////////////////////////////////////////
+
+static Word cmp_FileFn ( Word s1, Word s2 )
+{
+   FileFn* ff1 = (FileFn*)s1;
+   FileFn* ff2 = (FileFn*)s2;
+   Word r = strcmp(ff1->fi_name, ff2->fi_name);
+   if (r == 0)
+      r = strcmp(ff1->fn_name, ff2->fn_name);
+   return r;
+}
+
+static Word cmp_unboxed_UWord ( Word s1, Word s2 )
+{
+   UWord u1 = (UWord)s1;
+   UWord u2 = (UWord)s2;
+   if (u1 < u2) return -1;
+   if (u1 > u2) return 1;
+   return 0;
+}
+
+////////////////////////////////////////////////////////////////
+
+static Bool parse_ULong ( /*OUT*/ULong* res, /*INOUT*/char** pptr)
+{
+   ULong u64;
+   char* ptr = *pptr;
+   while (isspace(*ptr)) ptr++;
+   if (!isdigit(*ptr)) {
+      return False; /* end of string, or junk */
+      *pptr = ptr;
+   }
+   u64 = 0;
+   while (isdigit(*ptr)) {
+      u64 = (u64 * 10) + (ULong)(*ptr - '0');
+      ptr++;
+   }
+   *res = u64;
+   *pptr = ptr;
+   return True;
+}
+
+// str is a line of digits, starting with a line number.  Parse it,
+// returning the first number in *lnno and the rest in a newly
+// allocated Counts struct.  If lnno is non-NULL, treat the first
+// number as a line number and assign it to *lnno instead of
+// incorporating it in the counts array.
+static 
+Counts* splitUpCountsLine ( SOURCE* s, /*OUT*/UWord* lnno, char* str )
+{
+#define N_TMPC 50
+   Bool    ok;
+   Counts* counts;
+   ULong   tmpC[N_TMPC];
+   Int     n_tmpC = 0;
+   while (1) {
+      ok = parse_ULong( &tmpC[n_tmpC], &str );
+      if (!ok)
+         break;
+      n_tmpC++;
+      if (n_tmpC >= N_TMPC)
+         barf(s, "N_TMPC too low.  Increase and recompile.");
+   }
+   if (*str != 0)
+      parseError(s, "garbage in counts line");
+   if (lnno ? (n_tmpC < 2) : (n_tmpC < 1))
+      parseError(s, "too few counts in count line");
+
+   if (lnno) {
+      *lnno = (UWord)tmpC[0];
+      counts = new_Counts( n_tmpC-1, /*COPIED*/&tmpC[1] );
+   } else {
+      counts = new_Counts( n_tmpC, /*COPIED*/&tmpC[0] );
+   }
+
+   return counts;
+#undef N_TMPC
+}
+
+static void addCounts ( SOURCE* s, /*OUT*/Counts* counts1, Counts* counts2 )
+{
+   Int i;
+   if (counts1->n_counts != counts2->n_counts)
+      parseError(s, "addCounts: inconsistent number of counts");
+   for (i = 0; i < counts1->n_counts; i++)
+      counts1->counts[i] += counts2->counts[i];
+}
+
+static Bool addCountsToMap ( SOURCE* s,
+                             WordFM* counts_map, 
+                             UWord lnno, Counts* newCounts )
+{
+   Counts* oldCounts;
+   // look up lnno in the map.  If none present, add a binding
+   // lnno->counts.  If present, add counts to the existing entry.
+   if (lookupFM( counts_map, (Word*)(&oldCounts), (Word)lnno )) {
+      // merge with existing binding
+      addCounts( s, oldCounts, newCounts );
+      return True;
+   } else {
+      // create new binding
+      addToFM( counts_map, (Word)lnno, (Word)newCounts );
+      return False;
+   }
+}
+
+static
+void handle_counts ( SOURCE* s,
+                     CacheProfFile* cpf, 
+                     char* fi, char* fn, char* newCountsStr )
+{
+   WordFM* countsMap;
+   Bool    freeNewCounts;
+   UWord   lnno;
+   Counts* newCounts;
+   FileFn* topKey; 
+
+   if (0)  printf("%s %s %s\n", fi, fn, newCountsStr );
+
+   // parse the numbers
+   newCounts = splitUpCountsLine( s, &lnno, newCountsStr );
+
+   // Did we get the right number?
+   if (newCounts->n_counts != cpf->n_events)
+      goto oom;
+
+   // allocate the key
+   topKey = malloc(sizeof(FileFn));
+   if (topKey) {
+      topKey->fi_name = strdup(fi);
+      topKey->fn_name = strdup(fn);
+   }
+   if (! (topKey && topKey->fi_name && topKey->fn_name))
+      mallocFail(s, "handle_counts:");
+
+   // search for it
+   if (lookupFM( cpf->outerMap, (Word*)(&countsMap), (Word)topKey )) {
+      // found it.  Merge in new counts
+      freeNewCounts = addCountsToMap( s, countsMap, lnno, newCounts );
+      ddel_FileFn(topKey);
+   } else {
+      // not found in the top map.  Create new entry
+      countsMap = newFM( malloc, free, cmp_unboxed_UWord );
+      if (!countsMap)
+         goto oom;
+      addToFM( cpf->outerMap, (Word)topKey, (Word)countsMap );
+      freeNewCounts = addCountsToMap( s, countsMap, lnno, newCounts );
+   }
+
+   // also add to running summary total
+   addCounts( s, cpf->summary, newCounts );
+
+   // if safe to do so, free up the count vector
+   if (freeNewCounts)
+      ddel_Counts(newCounts);
+
+   return;
+
+  oom:
+   parseError(s, "# counts doesn't match # events");
+}
+
+
+/* Parse a complete file from the stream in 's'.  If a parse error
+   happens, do not return; instead exit via parseError().  If an
+   out-of-memory condition happens, do not return; instead exit via
+   mallocError().
+*/
+static CacheProfFile* parse_CacheProfFile ( SOURCE* s )
+{
+#define M_TMP_DESCLINES 10
+
+   Int            i;
+   Bool           b;
+   char*          tmp_desclines[M_TMP_DESCLINES];
+   char*          p;
+   int            n_tmp_desclines = 0;
+   CacheProfFile* cpf;
+   Counts*        summaryRead; 
+   char*          curr_fn_init = "???";
+   char*          curr_fl_init = "???";
+   char*          curr_fn      = curr_fn_init;
+   char*          curr_fl      = curr_fl_init;
+
+   cpf = new_CacheProfFile( NULL, NULL, NULL, 0, NULL, NULL, NULL );
+   if (cpf == NULL)
+      mallocFail(s, "parse_CacheProfFile(1)");
+
+   // Parse "desc:" lines
+   while (1) {
+      b = readline(s);
+      if (!b) 
+         break;
+      if (!streqn(line, "desc: ", 6))
+         break;
+      if (n_tmp_desclines >= M_TMP_DESCLINES)
+         barf(s, "M_TMP_DESCLINES too low; increase and recompile");
+      tmp_desclines[n_tmp_desclines++] = strdup(line);
+   }
+
+   if (n_tmp_desclines == 0)
+      parseError(s, "parse_CacheProfFile: no DESC lines present");
+
+   cpf->desc_lines = malloc( (1+n_tmp_desclines) * sizeof(char*) );
+   if (cpf->desc_lines == NULL)
+      mallocFail(s, "parse_CacheProfFile(2)");
+
+   cpf->desc_lines[n_tmp_desclines] = NULL;
+   for (i = 0; i < n_tmp_desclines; i++)
+      cpf->desc_lines[i] = tmp_desclines[i];
+
+   // Parse "cmd:" line
+   if (!streqn(line, "cmd: ", 5))
+      parseError(s, "parse_CacheProfFile: no CMD line present");
+
+   cpf->cmd_line = strdup(line);
+   if (cpf->cmd_line == NULL)
+      mallocFail(s, "parse_CacheProfFile(3)");
+
+   // Parse "events:" line and figure out how many events there are
+   b = readline(s);
+   if (!b)
+      parseError(s, "parse_CacheProfFile: eof before EVENTS line");
+   if (!streqn(line, "events: ", 8))
+      parseError(s, "parse_CacheProfFile: no EVENTS line present");
+
+   // figure out how many events there are by counting the number
+   // of space-alphanum transitions in the events_line
+   cpf->events_line = strdup(line);
+   if (cpf->events_line == NULL)
+      mallocFail(s, "parse_CacheProfFile(3)");
+
+   cpf->n_events = 0;
+   assert(cpf->events_line[6] == ':');
+   for (p = &cpf->events_line[6]; *p; p++) {
+      if (p[0] == ' ' && isalpha(p[1]))
+         cpf->n_events++;
+   }
+
+   // create the running cross-check summary
+   cpf->summary = new_Counts_Zeroed( cpf->n_events );
+   if (cpf->summary == NULL)
+      mallocFail(s, "parse_CacheProfFile(4)");
+
+   // create the outer map (file+fn name --> inner map)
+   cpf->outerMap = newFM ( malloc, free, cmp_FileFn );
+   if (cpf->outerMap == NULL)
+      mallocFail(s, "parse_CacheProfFile(5)");
+
+   // process count lines
+   while (1) {
+      b = readline(s);
+      if (!b)
+         parseError(s, "parse_CacheProfFile: eof before SUMMARY line");
+
+      if (isdigit(line[0])) {
+         handle_counts(s, cpf, curr_fl, curr_fn, line);
+         continue;
+      }
+      else
+      if (streqn(line, "fn=", 3)) {
+         if (curr_fn != curr_fn_init)
+            free(curr_fn);
+         curr_fn = strdup(line+3);
+         continue;
+      }
+      else
+      if (streqn(line, "fl=", 3)) {
+         if (curr_fl != curr_fl_init)
+            free(curr_fl);
+         curr_fl = strdup(line+3);
+         continue;
+      }
+      else
+      if (streqn(line, "summary: ", 9)) {
+         break;
+      }
+      else
+         parseError(s, "parse_CacheProfFile: unexpected line in main data");
+   }
+
+   // finally, the "summary:" line
+   if (!streqn(line, "summary: ", 9))
+      parseError(s, "parse_CacheProfFile: missing SUMMARY line");
+
+   cpf->summary_line = strdup(line);
+   if (cpf->summary_line == NULL)
+      mallocFail(s, "parse_CacheProfFile(6)");
+
+   // there should be nothing more
+   b = readline(s);
+   if (b)
+      parseError(s, "parse_CacheProfFile: "
+                    "extraneous content after SUMMARY line");
+
+   // check the summary counts are as expected
+   summaryRead = splitUpCountsLine( s, NULL, &cpf->summary_line[8] );
+   if (summaryRead == NULL)
+      mallocFail(s, "parse_CacheProfFile(7)");
+   if (summaryRead->n_counts != cpf->n_events)
+      parseError(s, "parse_CacheProfFile: wrong # counts in SUMMARY line");
+   for (i = 0; i < summaryRead->n_counts; i++) {
+      if (summaryRead->counts[i] != cpf->summary->counts[i]) {
+         parseError(s, "parse_CacheProfFile: "
+                       "computed vs stated SUMMARY counts mismatch");
+      }
+   }
+   free(summaryRead->counts);
+   sdel_Counts(summaryRead);
+
+   // since the summary counts are OK, free up the summary_line text
+   // which contains the same info.
+   if (cpf->summary_line) {
+      free(cpf->summary_line);
+      cpf->summary_line = NULL;
+   }
+
+   if (curr_fn != curr_fn_init)
+      free(curr_fn);
+   if (curr_fl != curr_fl_init)
+      free(curr_fl);
+
+   // All looks OK
+   return cpf;
+
+#undef N_TMP_DESCLINES  
+}
+
+
+static void merge_CacheProfInfo ( SOURCE* s,
+                                  /*MOD*/CacheProfFile* dst,
+                                  CacheProfFile* src )
+{
+   /* For each (filefn, innerMap) in src
+      if filefn not in dst
+         add binding dopy(filefn)->dopy(innerMap) in src
+      else
+         // merge src->innerMap with dst->innerMap
+         for each (lineno, counts) in src->innerMap
+         if lineno not in dst->innerMap
+            add binding lineno->dopy(counts) to dst->innerMap
+         else
+            add counts into dst->innerMap[lineno]
+   */
+   /* Outer iterator:  FileFn* -> WordFM* (inner iterator)
+      Inner iterator:  UWord   -> Counts*
+   */
+   FileFn* soKey;
+   WordFM* soVal;
+   WordFM* doVal;
+   UWord   siKey;
+   Counts* siVal;
+   Counts* diVal;
+
+   /* First check mundane things: that the events: lines are
+      identical. */
+   if (!streq( dst->events_line, src->events_line ))
+     barf(s, "\"events:\" line of most recent file does "
+             "not match those previously processed");
+
+   initIterFM( src->outerMap );
+
+   // for (filefn, innerMap) in src
+   while (nextIterFM( src->outerMap, (Word*)&soKey, (Word*)&soVal )) {
+
+      // is filefn in dst?   
+      if (! lookupFM( dst->outerMap, (Word*)&doVal, (Word)soKey )) {
+
+         // no .. add dopy(filefn) -> dopy(innerMap) to src
+         FileFn* c_soKey = dopy_FileFn(soKey);
+         WordFM* c_soVal = dopy_InnerMap(soVal);
+         if ((!c_soKey) || (!c_soVal)) goto oom;
+         addToFM( dst->outerMap, (Word)c_soKey, (Word)c_soVal );
+
+      } else {
+
+         // yes .. merge the two innermaps
+         initIterFM( soVal );
+
+         // for (lno, counts) in soVal (source inner map)
+         while (nextIterFM( soVal, (Word*)&siKey, (Word*)&siVal )) {
+
+            // is lno in the corresponding dst inner map?
+            if (! lookupFM( doVal, (Word*)&diVal, siKey )) {
+
+               // no .. add lineno->dopy(counts) to dst inner map
+               Counts* c_siVal = dopy_Counts( siVal );
+               if (!c_siVal) goto oom;
+               addToFM( doVal, siKey, (Word)c_siVal );
+
+            } else {
+
+               // yes .. merge counts into dst inner map val
+               addCounts( s, diVal, siVal );
+
+            }
+         }
+
+      }
+
+   }
+
+   // add the summaries too
+   addCounts(s, dst->summary, src->summary );
+
+   return;
+
+  oom:
+   mallocFail(s, "merge_CacheProfInfo");
+}
+
+static void usage ( void )
+{
+   fprintf(stderr, "%s: Merges multiple cachegrind output files into one\n", 
+                   argv0);
+   fprintf(stderr, "%s: usage: %s [-o outfile] [files-to-merge]\n", 
+                   argv0, argv0);
+   exit(1);
+}
+
+int main ( int argc, char** argv )
+{
+   Int            i;
+   SOURCE         src;
+   CacheProfFile  *cpf, *cpfTmp;
+
+   FILE*          outfile = NULL;
+   char*          outfilename = NULL;
+   Int            outfileix = 0;
+
+   if (argv[0])
+      argv0 = argv[0];
+
+   if (argc < 2)
+      usage();
+
+   for (i = 1; i < argc; i++) {
+      if (streq(argv[i], "-h") || streq(argv[i], "--help"))
+         usage();
+   }
+
+   /* Scan args, looking for '-o outfilename'. */
+   for (i = 1; i < argc; i++) {
+      if (streq(argv[i], "-o")) {
+         if (i+1 < argc) {
+            outfilename = argv[i+1];
+            outfileix   = i;
+            break;
+         } else {
+            usage();
+         }
+      }
+   }
+
+   cpf = NULL;
+
+   for (i = 1; i < argc; i++) {
+
+      if (i == outfileix) {
+         /* Skip '-o' and whatever follows it */
+         i += 1;
+         continue;
+      }
+
+      fprintf(stderr, "%s: parsing %s\n", argv0, argv[i]);
+      src.lno      = 1;
+      src.filename = argv[i];
+      src.fp       = fopen(src.filename, "r");
+      if (!src.fp) {
+         perror(argv0);
+         barf(&src, "Cannot open input file");
+      }
+      assert(src.fp);
+      cpfTmp = parse_CacheProfFile( &src );
+      fclose(src.fp);
+
+      /* If this isn't the first file, merge */
+      if (cpf == NULL) {
+         /* this is the first file */
+         cpf = cpfTmp;
+      } else {
+         /* not the first file; merge */
+         fprintf(stderr, "%s: merging %s\n", argv0, argv[i]);
+         merge_CacheProfInfo( &src, cpf, cpfTmp );
+         ddel_CacheProfFile( cpfTmp );
+      }
+
+   }
+
+   /* Now create the output file. */
+
+   if (cpf) {
+
+      fprintf(stderr, "%s: writing %s\n", 
+                       argv0, outfilename ? outfilename : "(stdout)" );
+
+      /* Write the output. */
+      if (outfilename) {
+         outfile = fopen(outfilename, "w");
+         if (!outfile) {
+            fprintf(stderr, "%s: can't create output file %s\n", 
+                            argv0, outfilename);
+            perror(argv0);
+            exit(1);
+         }
+      } else {
+         outfile = stdout;
+      }
+
+      show_CacheProfFile( outfile, cpf );
+      if (ferror(outfile)) {
+         fprintf(stderr, "%s: error writing output file %s\n", 
+                         argv0, outfilename);
+         perror(argv0);
+         if (outfile != stdout)
+            fclose(outfile);
+         exit(1);
+      }
+
+      fflush(outfile);
+      if (outfile != stdout)
+         fclose( outfile );
+
+      ddel_CacheProfFile( cpf );
+   }
+
+   return 0;
+}
+
+
+//------------------------------------------------------------------//
+//---                           WordFM                           ---//
+//---                       Implementation                       ---//
+//------------------------------------------------------------------//
+
+/* ------------ Implementation ------------ */
+
+/* One element of the AVL tree */
+typedef
+   struct _AvlNode {
+      Word key;
+      Word val;
+      struct _AvlNode* left;
+      struct _AvlNode* right;
+      Char balance;
+   }
+   AvlNode;
+
+typedef 
+   struct {
+      Word w;
+      Bool b;
+   }
+   MaybeWord;
+
+#define WFM_STKMAX    32    // At most 2**32 entries can be iterated over
+
+struct _WordFM {
+   AvlNode* root;
+   void*    (*alloc_nofail)( SizeT );
+   void     (*dealloc)(void*);
+   Word     (*kCmp)(Word,Word);
+   AvlNode* nodeStack[WFM_STKMAX]; // Iterator node stack
+   Int      numStack[WFM_STKMAX];  // Iterator num stack
+   Int      stackTop;              // Iterator stack pointer, one past end
+}; 
+
+/* forward */
+static Bool avl_removeroot_wrk(AvlNode** t, Word(*kCmp)(Word,Word));
+
+/* Swing to the left.  Warning: no balance maintainance. */
+static void avl_swl ( AvlNode** root )
+{
+   AvlNode* a = *root;
+   AvlNode* b = a->right;
+   *root    = b;
+   a->right = b->left;
+   b->left  = a;
+}
+
+/* Swing to the right.  Warning: no balance maintainance. */
+static void avl_swr ( AvlNode** root )
+{
+   AvlNode* a = *root;
+   AvlNode* b = a->left;
+   *root    = b;
+   a->left  = b->right;
+   b->right = a;
+}
+
+/* Balance maintainance after especially nasty swings. */
+static void avl_nasty ( AvlNode* root )
+{
+   switch (root->balance) {
+      case -1: 
+         root->left->balance  = 0;
+         root->right->balance = 1;
+         break;
+      case 1:
+         root->left->balance  = -1;
+         root->right->balance = 0;
+         break;
+      case 0:
+         root->left->balance  = 0;
+         root->right->balance = 0;
+         break;
+      default:
+         assert(0);
+   }
+   root->balance=0;
+}
+
+/* Find size of a non-NULL tree. */
+static Word size_avl_nonNull ( AvlNode* nd )
+{
+   return 1 + (nd->left  ? size_avl_nonNull(nd->left)  : 0)
+            + (nd->right ? size_avl_nonNull(nd->right) : 0);
+}
+
+/* Insert element a into the AVL tree t.  Returns True if the depth of
+   the tree has grown.  If element with that key is already present,
+   just copy a->val to existing node, first returning old ->val field
+   of existing node in *oldV, so that the caller can finalize it
+   however it wants.
+*/
+static 
+Bool avl_insert_wrk ( AvlNode**         rootp, 
+                      /*OUT*/MaybeWord* oldV,
+                      AvlNode*          a, 
+                      Word              (*kCmp)(Word,Word) )
+{
+   Word cmpres;
+
+   /* initialize */
+   a->left    = 0;
+   a->right   = 0;
+   a->balance = 0;
+   oldV->b    = False;
+
+   /* insert into an empty tree? */
+   if (!(*rootp)) {
+      (*rootp) = a;
+      return True;
+   }
+ 
+   cmpres = kCmp( (*rootp)->key, a->key );
+
+   if (cmpres > 0) {
+      /* insert into the left subtree */
+      if ((*rootp)->left) {
+         AvlNode* left_subtree = (*rootp)->left;
+         if (avl_insert_wrk(&left_subtree, oldV, a, kCmp)) {
+            switch ((*rootp)->balance--) {
+               case  1: return False;
+               case  0: return True;
+               case -1: break;
+               default: assert(0);
+            }
+            if ((*rootp)->left->balance < 0) {
+               avl_swr( rootp );
+               (*rootp)->balance = 0;
+               (*rootp)->right->balance = 0;
+            } else {
+               avl_swl( &((*rootp)->left) );
+               avl_swr( rootp );
+               avl_nasty( *rootp );
+            }
+         } else {
+            (*rootp)->left = left_subtree;
+         }
+         return False;
+      } else {
+         (*rootp)->left = a;
+         if ((*rootp)->balance--) 
+            return False;
+         return True;
+      }
+      assert(0);/*NOTREACHED*/
+   }
+   else 
+   if (cmpres < 0) {
+      /* insert into the right subtree */
+      if ((*rootp)->right) {
+         AvlNode* right_subtree = (*rootp)->right;
+         if (avl_insert_wrk(&right_subtree, oldV, a, kCmp)) {
+            switch((*rootp)->balance++){
+               case -1: return False;
+               case  0: return True;
+               case  1: break;
+               default: assert(0);
+            }
+            if ((*rootp)->right->balance > 0) {
+               avl_swl( rootp );
+               (*rootp)->balance = 0;
+               (*rootp)->left->balance = 0;
+            } else {
+               avl_swr( &((*rootp)->right) );
+               avl_swl( rootp );
+               avl_nasty( *rootp );
+            }
+         } else {
+            (*rootp)->right = right_subtree;
+         }
+         return False;
+      } else {
+         (*rootp)->right = a;
+         if ((*rootp)->balance++) 
+            return False;
+         return True;
+      }
+      assert(0);/*NOTREACHED*/
+   }
+   else {
+      /* cmpres == 0, a duplicate - replace the val, but don't
+         incorporate the node in the tree */
+      oldV->b = True;
+      oldV->w = (*rootp)->val;
+      (*rootp)->val = a->val;
+      return False;
+   }
+}
+
+/* Remove an element a from the AVL tree t.  a must be part of
+   the tree.  Returns True if the depth of the tree has shrunk. 
+*/
+static
+Bool avl_remove_wrk ( AvlNode** rootp, 
+                      AvlNode*  a, 
+                      Word(*kCmp)(Word,Word) )
+{
+   Bool ch;
+   Word cmpres = kCmp( (*rootp)->key, a->key );
+
+   if (cmpres > 0){
+      /* remove from the left subtree */
+      AvlNode* left_subtree = (*rootp)->left;
+      assert(left_subtree);
+      ch = avl_remove_wrk(&left_subtree, a, kCmp);
+      (*rootp)->left=left_subtree;
+      if (ch) {
+         switch ((*rootp)->balance++) {
+            case -1: return True;
+            case  0: return False;
+            case  1: break;
+            default: assert(0);
+         }
+         switch ((*rootp)->right->balance) {
+            case 0:
+               avl_swl( rootp );
+               (*rootp)->balance = -1;
+               (*rootp)->left->balance = 1;
+               return False;
+            case 1: 
+               avl_swl( rootp );
+               (*rootp)->balance = 0;
+               (*rootp)->left->balance = 0;
+               return -1;
+            case -1:
+               break;
+            default:
+               assert(0);
+         }
+         avl_swr( &((*rootp)->right) );
+         avl_swl( rootp );
+         avl_nasty( *rootp );
+         return True;
+      }
+   }
+   else
+   if (cmpres < 0) {
+      /* remove from the right subtree */
+      AvlNode* right_subtree = (*rootp)->right;
+      assert(right_subtree);
+      ch = avl_remove_wrk(&right_subtree, a, kCmp);
+      (*rootp)->right = right_subtree;
+      if (ch) {
+         switch ((*rootp)->balance--) {
+            case  1: return True;
+            case  0: return False;
+            case -1: break;
+            default: assert(0);
+         }
+         switch ((*rootp)->left->balance) {
+            case 0:
+               avl_swr( rootp );
+               (*rootp)->balance = 1;
+               (*rootp)->right->balance = -1;
+               return False;
+            case -1:
+               avl_swr( rootp );
+               (*rootp)->balance = 0;
+               (*rootp)->right->balance = 0;
+               return True;
+            case 1:
+               break;
+            default:
+               assert(0);
+         }
+         avl_swl( &((*rootp)->left) );
+         avl_swr( rootp );
+         avl_nasty( *rootp );
+         return True;
+      }
+   }
+   else {
+      assert(cmpres == 0);
+      assert((*rootp)==a);
+      return avl_removeroot_wrk(rootp, kCmp);
+   }
+   return 0;
+}
+
+/* Remove the root of the AVL tree *rootp.
+ * Warning: dumps core if *rootp is empty
+ */
+static 
+Bool avl_removeroot_wrk ( AvlNode** rootp, 
+                          Word(*kCmp)(Word,Word) )
+{
+   Bool     ch;
+   AvlNode* a;
+   if (!(*rootp)->left) {
+      if (!(*rootp)->right) {
+         (*rootp) = 0;
+         return True;
+      }
+      (*rootp) = (*rootp)->right;
+      return True;
+   }
+   if (!(*rootp)->right) {
+      (*rootp) = (*rootp)->left;
+      return True;
+   }
+   if ((*rootp)->balance < 0) {
+      /* remove from the left subtree */
+      a = (*rootp)->left;
+      while (a->right) a = a->right;
+   } else {
+      /* remove from the right subtree */
+      a = (*rootp)->right;
+      while (a->left) a = a->left;
+   }
+   ch = avl_remove_wrk(rootp, a, kCmp);
+   a->left    = (*rootp)->left;
+   a->right   = (*rootp)->right;
+   a->balance = (*rootp)->balance;
+   (*rootp)   = a;
+   if(a->balance == 0) return ch;
+   return False;
+}
+
+static 
+AvlNode* avl_find_node ( AvlNode* t, Word k, Word(*kCmp)(Word,Word) )
+{
+   Word cmpres;
+   while (True) {
+      if (t == NULL) return NULL;
+      cmpres = kCmp(t->key, k);
+      if (cmpres > 0) t = t->left;  else
+      if (cmpres < 0) t = t->right; else
+      return t;
+   }
+}
+
+// Clear the iterator stack.
+static void stackClear(WordFM* fm)
+{
+   Int i;
+   assert(fm);
+   for (i = 0; i < WFM_STKMAX; i++) {
+      fm->nodeStack[i] = NULL;
+      fm->numStack[i]  = 0;
+   }
+   fm->stackTop = 0;
+}
+
+// Push onto the iterator stack.
+static inline void stackPush(WordFM* fm, AvlNode* n, Int i)
+{
+   assert(fm->stackTop < WFM_STKMAX);
+   assert(1 <= i && i <= 3);
+   fm->nodeStack[fm->stackTop] = n;
+   fm-> numStack[fm->stackTop] = i;
+   fm->stackTop++;
+}
+
+// Pop from the iterator stack.
+static inline Bool stackPop(WordFM* fm, AvlNode** n, Int* i)
+{
+   assert(fm->stackTop <= WFM_STKMAX);
+
+   if (fm->stackTop > 0) {
+      fm->stackTop--;
+      *n = fm->nodeStack[fm->stackTop];
+      *i = fm-> numStack[fm->stackTop];
+      assert(1 <= *i && *i <= 3);
+      fm->nodeStack[fm->stackTop] = NULL;
+      fm-> numStack[fm->stackTop] = 0;
+      return True;
+   } else {
+      return False;
+   }
+}
+
+static 
+AvlNode* avl_dopy ( AvlNode* nd, 
+                    Word(*dopyK)(Word), 
+                    Word(*dopyV)(Word),
+                    void*(alloc_nofail)(SizeT) )
+{
+   AvlNode* nyu;
+   if (! nd)
+      return NULL;
+   nyu = alloc_nofail(sizeof(AvlNode));
+   assert(nyu);
+   
+   nyu->left = nd->left;
+   nyu->right = nd->right;
+   nyu->balance = nd->balance;
+
+   /* Copy key */
+   if (dopyK) {
+      nyu->key = dopyK( nd->key );
+      if (nd->key != 0 && nyu->key == 0)
+         return NULL; /* oom in key dcopy */
+   } else {
+      /* copying assumedly unboxed keys */
+      nyu->key = nd->key;
+   }
+
+   /* Copy val */
+   if (dopyV) {
+      nyu->val = dopyV( nd->val );
+      if (nd->val != 0 && nyu->val == 0)
+         return NULL; /* oom in val dcopy */
+   } else {
+      /* copying assumedly unboxed vals */
+      nyu->val = nd->val;
+   }
+
+   /* Copy subtrees */
+   if (nyu->left) {
+      nyu->left = avl_dopy( nyu->left, dopyK, dopyV, alloc_nofail );
+      if (! nyu->left)
+         return NULL;
+   }
+   if (nyu->right) {
+      nyu->right = avl_dopy( nyu->right, dopyK, dopyV, alloc_nofail );
+      if (! nyu->right)
+         return NULL;
+   }
+
+   return nyu;
+}
+
+/* --- Public interface functions --- */
+
+/* Initialise a WordFM. */
+void initFM ( WordFM* fm,
+              void*   (*alloc_nofail)( SizeT ),
+              void    (*dealloc)(void*),
+              Word    (*kCmp)(Word,Word) )
+{
+   fm->root         = 0;
+   fm->kCmp         = kCmp;
+   fm->alloc_nofail = alloc_nofail;
+   fm->dealloc      = dealloc;
+   fm->stackTop     = 0;
+}
+
+/* Allocate and Initialise a WordFM. */
+WordFM* newFM( void* (*alloc_nofail)( SizeT ),
+               void  (*dealloc)(void*),
+               Word  (*kCmp)(Word,Word) )
+{
+   WordFM* fm = alloc_nofail(sizeof(WordFM));
+   assert(fm);
+   initFM(fm, alloc_nofail, dealloc, kCmp);
+   return fm;
+}
+
+static void avl_free ( AvlNode* nd, 
+                       void(*kFin)(Word),
+                       void(*vFin)(Word),
+                       void(*dealloc)(void*) )
+{
+   if (!nd)
+      return;
+   if (nd->left)
+      avl_free(nd->left, kFin, vFin, dealloc);
+   if (nd->right)
+      avl_free(nd->right, kFin, vFin, dealloc);
+   if (kFin)
+      kFin( nd->key );
+   if (vFin)
+      vFin( nd->val );
+   memset(nd, 0, sizeof(AvlNode));
+   dealloc(nd);
+}
+
+/* Free up the FM.  If kFin is non-NULL, it is applied to keys
+   before the FM is deleted; ditto with vFin for vals. */
+void deleteFM ( WordFM* fm, void(*kFin)(Word), void(*vFin)(Word) )
+{
+   void(*dealloc)(void*) = fm->dealloc;
+   avl_free( fm->root, kFin, vFin, dealloc );
+   memset(fm, 0, sizeof(WordFM) );
+   dealloc(fm);
+}
+
+/* Add (k,v) to fm. */
+void addToFM ( WordFM* fm, Word k, Word v )
+{
+   MaybeWord oldV;
+   AvlNode* node;
+   node = fm->alloc_nofail( sizeof(struct _AvlNode) );
+   node->key = k;
+   node->val = v;
+   oldV.b = False;
+   oldV.w = 0;
+   avl_insert_wrk( &fm->root, &oldV, node, fm->kCmp );
+   //if (oldV.b && fm->vFin)
+   //   fm->vFin( oldV.w );
+   if (oldV.b)
+      free(node);
+}
+
+// Delete key from fm, returning associated val if found
+Bool delFromFM ( WordFM* fm, /*OUT*/Word* oldV, Word key )
+{
+   AvlNode* node = avl_find_node( fm->root, key, fm->kCmp );
+   if (node) {
+      avl_remove_wrk( &fm->root, node, fm->kCmp );
+      if (oldV)
+         *oldV = node->val;
+      fm->dealloc(node);
+      return True;
+   } else {
+      return False;
+   }
+}
+
+// Look up in fm, assigning found val at spec'd address
+Bool lookupFM ( WordFM* fm, /*OUT*/Word* valP, Word key )
+{
+   AvlNode* node = avl_find_node( fm->root, key, fm->kCmp );
+   if (node) {
+      if (valP)
+         *valP = node->val;
+      return True;
+   } else {
+      return False;
+   }
+}
+
+Word sizeFM ( WordFM* fm )
+{
+   // Hmm, this is a bad way to do this
+   return fm->root ? size_avl_nonNull( fm->root ) : 0;
+}
+
+// set up FM for iteration
+void initIterFM ( WordFM* fm )
+{
+   assert(fm);
+   stackClear(fm);
+   if (fm->root)
+      stackPush(fm, fm->root, 1);
+}
+
+// get next key/val pair.  Will assert if fm has been modified
+// or looked up in since initIterFM was called.
+Bool nextIterFM ( WordFM* fm, /*OUT*/Word* pKey, /*OUT*/Word* pVal )
+{
+   Int i = 0;
+   AvlNode* n = NULL;
+   
+   assert(fm);
+
+   // This in-order traversal requires each node to be pushed and popped
+   // three times.  These could be avoided by updating nodes in-situ on the
+   // top of the stack, but the push/pop cost is so small that it's worth
+   // keeping this loop in this simpler form.
+   while (stackPop(fm, &n, &i)) {
+      switch (i) {
+      case 1: 
+         stackPush(fm, n, 2);
+         if (n->left)  stackPush(fm, n->left, 1);
+         break;
+      case 2: 
+         stackPush(fm, n, 3);
+         if (pKey) *pKey = n->key;
+         if (pVal) *pVal = n->val;
+         return True;
+      case 3:
+         if (n->right) stackPush(fm, n->right, 1);
+         break;
+      default:
+         assert(0);
+      }
+   }
+
+   // Stack empty, iterator is exhausted, return NULL
+   return False;
+}
+
+// clear the I'm iterating flag
+void doneIterFM ( WordFM* fm )
+{
+}
+
+WordFM* dopyFM ( WordFM* fm, Word(*dopyK)(Word), Word(*dopyV)(Word) )
+{
+   WordFM* nyu; 
+
+   /* can't clone the fm whilst iterating on it */
+   assert(fm->stackTop == 0);
+
+   nyu = fm->alloc_nofail( sizeof(WordFM) );
+   assert(nyu);
+
+   *nyu = *fm;
+
+   fm->stackTop = 0;
+   memset(fm->nodeStack, 0, sizeof(fm->nodeStack));
+   memset(fm->numStack, 0,  sizeof(fm->numStack));
+
+   if (nyu->root) {
+      nyu->root = avl_dopy( nyu->root, dopyK, dopyV, fm->alloc_nofail );
+      if (! nyu->root)
+         return NULL;
+   }
+
+   return nyu;
+}
+
+//------------------------------------------------------------------//
+//---                         end WordFM                         ---//
+//---                       Implementation                       ---//
+//------------------------------------------------------------------//
+
+/*--------------------------------------------------------------------*/
+/*--- end                                               cg_merge.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/cachegrind/.svn/text-base/cg_sim.c.svn-base b/cachegrind/.svn/text-base/cg_sim.c.svn-base
new file mode 100644
index 0000000..57abdfc
--- /dev/null
+++ b/cachegrind/.svn/text-base/cg_sim.c.svn-base
@@ -0,0 +1,198 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Cache simulation                                    cg_sim.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Cachegrind, a Valgrind tool for cache
+   profiling programs.
+
+   Copyright (C) 2002-2009 Nicholas Nethercote
+      njn@valgrind.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+/* Notes:
+  - simulates a write-allocate cache
+  - (block --> set) hash function uses simple bit selection
+  - handling of references straddling two cache blocks:
+      - counts as only one cache access (not two)
+      - both blocks hit                  --> one hit
+      - one block hits, the other misses --> one miss
+      - both blocks miss                 --> one miss (not two)
+*/
+
+typedef struct {
+   Int          size;                   /* bytes */
+   Int          assoc;
+   Int          line_size;              /* bytes */
+   Int          sets;
+   Int          sets_min_1;
+   Int          line_size_bits;
+   Int          tag_shift;
+   Char         desc_line[128];
+   UWord*       tags;
+} cache_t2;
+
+/* By this point, the size/assoc/line_size has been checked. */
+static void cachesim_initcache(cache_t config, cache_t2* c)
+{
+   Int i;
+
+   c->size      = config.size;
+   c->assoc     = config.assoc;
+   c->line_size = config.line_size;
+
+   c->sets           = (c->size / c->line_size) / c->assoc;
+   c->sets_min_1     = c->sets - 1;
+   c->line_size_bits = VG_(log2)(c->line_size);
+   c->tag_shift      = c->line_size_bits + VG_(log2)(c->sets);
+
+   if (c->assoc == 1) {
+      VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped", 
+                                 c->size, c->line_size);
+   } else {
+      VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative",
+                                 c->size, c->line_size, c->assoc);
+   }
+
+   c->tags = VG_(malloc)("cg.sim.ci.1",
+                         sizeof(UWord) * c->sets * c->assoc);
+
+   for (i = 0; i < c->sets * c->assoc; i++)
+      c->tags[i] = 0;
+}
+
+/* This is done as a macro rather than by passing in the cache_t2 as an 
+ * arg because it slows things down by a small amount (3-5%) due to all 
+ * that extra indirection. */
+
+#define CACHESIM(L, MISS_TREATMENT)                                         \
+/* The cache and associated bits and pieces. */                             \
+static cache_t2 L;                                                          \
+                                                                            \
+static void cachesim_##L##_initcache(cache_t config)                        \
+{                                                                           \
+    cachesim_initcache(config, &L);                                         \
+}                                                                           \
+                                                                            \
+/* This attribute forces GCC to inline this function, even though it's */   \
+/* bigger than its usual limit.  Inlining gains around 5--10% speedup. */   \
+__attribute__((always_inline))                                              \
+static __inline__                                                           \
+void cachesim_##L##_doref(Addr a, UChar size, ULong* m1, ULong *m2)         \
+{                                                                           \
+   UInt  set1 = ( a         >> L.line_size_bits) & (L.sets_min_1);          \
+   UInt  set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1);          \
+   UWord tag  = a >> L.tag_shift;                                           \
+   UWord tag2;                                                              \
+   Int i, j;                                                                \
+   Bool is_miss = False;                                                    \
+   UWord* set;                                                              \
+                                                                            \
+   /* First case: word entirely within line. */                             \
+   if (set1 == set2) {                                                      \
+                                                                            \
+      set = &(L.tags[set1 * L.assoc]);                                      \
+                                                                            \
+      /* This loop is unrolled for just the first case, which is the most */\
+      /* common.  We can't unroll any further because it would screw up   */\
+      /* if we have a direct-mapped (1-way) cache.                        */\
+      if (tag == set[0]) {                                                  \
+         return;                                                            \
+      }                                                                     \
+      /* If the tag is one other than the MRU, move it into the MRU spot  */\
+      /* and shuffle the rest down.                                       */\
+      for (i = 1; i < L.assoc; i++) {                                       \
+         if (tag == set[i]) {                                               \
+            for (j = i; j > 0; j--) {                                       \
+               set[j] = set[j - 1];                                         \
+            }                                                               \
+            set[0] = tag;                                                   \
+            return;                                                         \
+         }                                                                  \
+      }                                                                     \
+                                                                            \
+      /* A miss;  install this tag as MRU, shuffle rest down. */            \
+      for (j = L.assoc - 1; j > 0; j--) {                                   \
+         set[j] = set[j - 1];                                               \
+      }                                                                     \
+      set[0] = tag;                                                         \
+      MISS_TREATMENT;                                                       \
+      return;                                                               \
+                                                                            \
+   /* Second case: word straddles two lines. */                             \
+   /* Nb: this is a fast way of doing ((set1+1) % L.sets) */                \
+   } else if (((set1 + 1) & (L.sets-1)) == set2) {                          \
+      set = &(L.tags[set1 * L.assoc]);                                      \
+      if (tag == set[0]) {                                                  \
+         goto block2;                                                       \
+      }                                                                     \
+      for (i = 1; i < L.assoc; i++) {                                       \
+         if (tag == set[i]) {                                               \
+            for (j = i; j > 0; j--) {                                       \
+               set[j] = set[j - 1];                                         \
+            }                                                               \
+            set[0] = tag;                                                   \
+            goto block2;                                                    \
+         }                                                                  \
+      }                                                                     \
+      for (j = L.assoc - 1; j > 0; j--) {                                   \
+         set[j] = set[j - 1];                                               \
+      }                                                                     \
+      set[0] = tag;                                                         \
+      is_miss = True;                                                       \
+block2:                                                                     \
+      set = &(L.tags[set2 * L.assoc]);                                      \
+      tag2 = (a+size-1) >> L.tag_shift;                                     \
+      if (tag2 == set[0]) {                                                 \
+         goto miss_treatment;                                               \
+      }                                                                     \
+      for (i = 1; i < L.assoc; i++) {                                       \
+         if (tag2 == set[i]) {                                              \
+            for (j = i; j > 0; j--) {                                       \
+               set[j] = set[j - 1];                                         \
+            }                                                               \
+            set[0] = tag2;                                                  \
+            goto miss_treatment;                                            \
+         }                                                                  \
+      }                                                                     \
+      for (j = L.assoc - 1; j > 0; j--) {                                   \
+         set[j] = set[j - 1];                                               \
+      }                                                                     \
+      set[0] = tag2;                                                        \
+      is_miss = True;                                                       \
+miss_treatment:                                                             \
+      if (is_miss) { MISS_TREATMENT; }                                      \
+                                                                            \
+   } else {                                                                 \
+       VG_(printf)("addr: %lx  size: %u  sets: %d %d", a, size, set1, set2);\
+       VG_(tool_panic)("item straddles more than two cache sets");          \
+   }                                                                        \
+   return;                                                                  \
+}
+
+CACHESIM(L2, (*m2)++ );
+CACHESIM(I1, { (*m1)++; cachesim_L2_doref(a, size, m1, m2); } );
+CACHESIM(D1, { (*m1)++; cachesim_L2_doref(a, size, m1, m2); } );
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                 cg_sim.c ---*/
+/*--------------------------------------------------------------------*/
+
diff --git a/cachegrind/Makefile b/cachegrind/Makefile
new file mode 100644
index 0000000..b2cf087
--- /dev/null
+++ b/cachegrind/Makefile
@@ -0,0 +1,1229 @@
+# Makefile.in generated by automake 1.10.1 from Makefile.am.
+# cachegrind/Makefile.  Generated from Makefile.in by configure.
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008  Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+
+
+# This file contains things shared by coregrind/Makefile.am and tool
+# Makefile.am files.
+
+
+
+
+pkgdatadir = $(datadir)/valgrind
+pkglibdir = $(libdir)/valgrind
+pkgincludedir = $(includedir)/valgrind
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = x86_64-unknown-linux-gnu
+host_triplet = x86_64-unknown-linux-gnu
+DIST_COMMON = $(noinst_HEADERS) $(srcdir)/Makefile.am \
+	$(srcdir)/Makefile.in $(srcdir)/cg_annotate.in \
+	$(top_srcdir)/Makefile.all.am \
+	$(top_srcdir)/Makefile.core-tool.am \
+	$(top_srcdir)/Makefile.flags.am $(top_srcdir)/Makefile.tool.am
+noinst_PROGRAMS = $(am__EXEEXT_1) $(am__EXEEXT_2) $(am__EXEEXT_3) \
+	$(am__EXEEXT_4) $(am__EXEEXT_5) $(am__EXEEXT_6)
+#am__append_1 = cachegrind-x86-linux
+am__append_2 = cachegrind-amd64-linux
+#am__append_3 = cachegrind-ppc32-linux
+#am__append_4 = cachegrind-ppc64-linux
+#am__append_5 = cachegrind-ppc32-aix5
+#am__append_6 = cachegrind-ppc64-aix5
+bin_PROGRAMS = cg_merge$(EXEEXT)
+subdir = cachegrind
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES = cg_annotate
+am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(bindir)"
+binPROGRAMS_INSTALL = $(INSTALL_PROGRAM)
+#am__EXEEXT_1 = cachegrind-x86-linux$(EXEEXT)
+am__EXEEXT_2 = cachegrind-amd64-linux$(EXEEXT)
+#am__EXEEXT_3 = cachegrind-ppc32-linux$(EXEEXT)
+#am__EXEEXT_4 = cachegrind-ppc64-linux$(EXEEXT)
+#am__EXEEXT_5 = cachegrind-ppc32-aix5$(EXEEXT)
+#am__EXEEXT_6 = cachegrind-ppc64-aix5$(EXEEXT)
+PROGRAMS = $(bin_PROGRAMS) $(noinst_PROGRAMS)
+am__objects_1 = cachegrind_amd64_linux-cg_main.$(OBJEXT)
+am__objects_2 = cachegrind_amd64_linux-cg-amd64.$(OBJEXT)
+am_cachegrind_amd64_linux_OBJECTS = $(am__objects_1) $(am__objects_2)
+cachegrind_amd64_linux_OBJECTS = $(am_cachegrind_amd64_linux_OBJECTS)
+am__DEPENDENCIES_1 =
+am__DEPENDENCIES_2 = $(COREGRIND_LIBS_AMD64_LINUX) \
+	$(am__DEPENDENCIES_1)
+cachegrind_amd64_linux_LINK = $(CCLD) $(cachegrind_amd64_linux_CFLAGS) \
+	$(CFLAGS) $(cachegrind_amd64_linux_LDFLAGS) $(LDFLAGS) -o $@
+am__objects_3 = cachegrind_ppc32_aix5-cg_main.$(OBJEXT)
+am__objects_4 = cachegrind_ppc32_aix5-cg-ppc32.$(OBJEXT)
+am_cachegrind_ppc32_aix5_OBJECTS = $(am__objects_3) $(am__objects_4)
+cachegrind_ppc32_aix5_OBJECTS = $(am_cachegrind_ppc32_aix5_OBJECTS)
+am__DEPENDENCIES_3 = $(COREGRIND_LIBS_PPC32_AIX5) \
+	$(am__DEPENDENCIES_1)
+cachegrind_ppc32_aix5_LINK = $(CCLD) $(cachegrind_ppc32_aix5_CFLAGS) \
+	$(CFLAGS) $(cachegrind_ppc32_aix5_LDFLAGS) $(LDFLAGS) -o $@
+am__objects_5 = cachegrind_ppc32_linux-cg_main.$(OBJEXT)
+am__objects_6 = cachegrind_ppc32_linux-cg-ppc32.$(OBJEXT)
+am_cachegrind_ppc32_linux_OBJECTS = $(am__objects_5) $(am__objects_6)
+cachegrind_ppc32_linux_OBJECTS = $(am_cachegrind_ppc32_linux_OBJECTS)
+am__DEPENDENCIES_4 = $(COREGRIND_LIBS_PPC32_LINUX) \
+	$(am__DEPENDENCIES_1)
+cachegrind_ppc32_linux_LINK = $(CCLD) $(cachegrind_ppc32_linux_CFLAGS) \
+	$(CFLAGS) $(cachegrind_ppc32_linux_LDFLAGS) $(LDFLAGS) -o $@
+am__objects_7 = cachegrind_ppc64_aix5-cg_main.$(OBJEXT)
+am__objects_8 = cachegrind_ppc64_aix5-cg-ppc64.$(OBJEXT)
+am_cachegrind_ppc64_aix5_OBJECTS = $(am__objects_7) $(am__objects_8)
+cachegrind_ppc64_aix5_OBJECTS = $(am_cachegrind_ppc64_aix5_OBJECTS)
+am__DEPENDENCIES_5 = $(COREGRIND_LIBS_PPC64_AIX5) \
+	$(am__DEPENDENCIES_1)
+cachegrind_ppc64_aix5_LINK = $(CCLD) $(cachegrind_ppc64_aix5_CFLAGS) \
+	$(CFLAGS) $(cachegrind_ppc64_aix5_LDFLAGS) $(LDFLAGS) -o $@
+am__objects_9 = cachegrind_ppc64_linux-cg_main.$(OBJEXT)
+am__objects_10 = cachegrind_ppc64_linux-cg-ppc64.$(OBJEXT)
+am_cachegrind_ppc64_linux_OBJECTS = $(am__objects_9) $(am__objects_10)
+cachegrind_ppc64_linux_OBJECTS = $(am_cachegrind_ppc64_linux_OBJECTS)
+am__DEPENDENCIES_6 = $(COREGRIND_LIBS_PPC64_LINUX) \
+	$(am__DEPENDENCIES_1)
+cachegrind_ppc64_linux_LINK = $(CCLD) $(cachegrind_ppc64_linux_CFLAGS) \
+	$(CFLAGS) $(cachegrind_ppc64_linux_LDFLAGS) $(LDFLAGS) -o $@
+am__objects_11 = cachegrind_x86_linux-cg_main.$(OBJEXT)
+am__objects_12 = cachegrind_x86_linux-cg-x86.$(OBJEXT)
+am_cachegrind_x86_linux_OBJECTS = $(am__objects_11) $(am__objects_12)
+cachegrind_x86_linux_OBJECTS = $(am_cachegrind_x86_linux_OBJECTS)
+am__DEPENDENCIES_7 = $(COREGRIND_LIBS_X86_LINUX) $(am__DEPENDENCIES_1)
+cachegrind_x86_linux_LINK = $(CCLD) $(cachegrind_x86_linux_CFLAGS) \
+	$(CFLAGS) $(cachegrind_x86_linux_LDFLAGS) $(LDFLAGS) -o $@
+am_cg_merge_OBJECTS = cg_merge-cg_merge.$(OBJEXT)
+cg_merge_OBJECTS = $(am_cg_merge_OBJECTS)
+cg_merge_LDADD = $(LDADD)
+cg_merge_LINK = $(CCLD) $(cg_merge_CFLAGS) $(CFLAGS) \
+	$(cg_merge_LDFLAGS) $(LDFLAGS) -o $@
+binSCRIPT_INSTALL = $(INSTALL_SCRIPT)
+SCRIPTS = $(bin_SCRIPTS)
+DEFAULT_INCLUDES = -I. -I$(top_builddir)
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+SOURCES = $(cachegrind_amd64_linux_SOURCES) \
+	$(cachegrind_ppc32_aix5_SOURCES) \
+	$(cachegrind_ppc32_linux_SOURCES) \
+	$(cachegrind_ppc64_aix5_SOURCES) \
+	$(cachegrind_ppc64_linux_SOURCES) \
+	$(cachegrind_x86_linux_SOURCES) $(cg_merge_SOURCES)
+DIST_SOURCES = $(cachegrind_amd64_linux_SOURCES) \
+	$(cachegrind_ppc32_aix5_SOURCES) \
+	$(cachegrind_ppc32_linux_SOURCES) \
+	$(cachegrind_ppc64_aix5_SOURCES) \
+	$(cachegrind_ppc64_linux_SOURCES) \
+	$(cachegrind_x86_linux_SOURCES) $(cg_merge_SOURCES)
+RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
+	html-recursive info-recursive install-data-recursive \
+	install-dvi-recursive install-exec-recursive \
+	install-html-recursive install-info-recursive \
+	install-pdf-recursive install-ps-recursive install-recursive \
+	installcheck-recursive installdirs-recursive pdf-recursive \
+	ps-recursive uninstall-recursive
+HEADERS = $(noinst_HEADERS)
+RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive	\
+  distclean-recursive maintainer-clean-recursive
+ETAGS = etags
+CTAGS = ctags
+DIST_SUBDIRS = $(SUBDIRS)
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = ${SHELL} /home/steph/compile/valgrind/missing --run aclocal-1.10
+AMTAR = ${SHELL} /home/steph/compile/valgrind/missing --run tar
+AR = /usr/bin/ar
+AUTOCONF = ${SHELL} /home/steph/compile/valgrind/missing --run autoconf
+AUTOHEADER = ${SHELL} /home/steph/compile/valgrind/missing --run autoheader
+AUTOMAKE = ${SHELL} /home/steph/compile/valgrind/missing --run automake-1.10
+AWK = gawk
+BOOST_CFLAGS = 
+BOOST_LIBS = -lboost_thread-mt -m64
+CC = gcc
+CCAS = gcc
+CCASDEPMODE = depmode=gcc3
+CCASFLAGS = -Wno-long-long
+CCDEPMODE = depmode=gcc3
+CFLAGS = -Wno-long-long -Wno-pointer-sign -Wdeclaration-after-statement -fno-stack-protector
+CPP = gcc -E
+CPPFLAGS = 
+CXX = g++
+CXXDEPMODE = depmode=gcc3
+CXXFLAGS = -g -O2
+CYGPATH_W = echo
+DEFAULT_SUPP = exp-ptrcheck.supp xfree-3.supp xfree-4.supp glibc-2.X-drd.supp glibc-2.34567-NPTL-helgrind.supp glibc-2.X.supp 
+DEFS = -DHAVE_CONFIG_H
+DEPDIR = .deps
+DIFF = diff -u
+DISTCHECK_CONFIGURE_FLAGS = --with-vex=$(top_srcdir)/VEX
+ECHO_C = 
+ECHO_N = -n
+ECHO_T = 
+EGREP = /bin/grep -E
+EXEEXT = 
+FLAG_FNO_STACK_PROTECTOR = -fno-stack-protector
+FLAG_M32 = -m32
+FLAG_M64 = -m64
+FLAG_MAIX32 = 
+FLAG_MAIX64 = 
+FLAG_MMMX = -mmmx
+FLAG_MSSE = -msse
+FLAG_UNLIMITED_INLINE_UNIT_GROWTH = --param inline-unit-growth=900
+FLAG_WDECL_AFTER_STMT = -Wdeclaration-after-statement
+FLAG_W_EXTRA = -Wextra
+FLAG_W_NO_FORMAT_ZERO_LENGTH = -Wno-format-zero-length
+GDB = /usr/bin/gdb
+GLIBC_VERSION = 2.8
+GREP = /bin/grep
+INSTALL = /usr/bin/install -c
+INSTALL_DATA = ${INSTALL} -m 644
+INSTALL_PROGRAM = ${INSTALL}
+INSTALL_SCRIPT = ${INSTALL}
+INSTALL_STRIP_PROGRAM = $(install_sh) -c -s
+LDFLAGS = 
+LIBOBJS = 
+LIBS = 
+LN_S = ln -s
+LTLIBOBJS = 
+MAINT = #
+MAKEINFO = ${SHELL} /home/steph/compile/valgrind/missing --run makeinfo
+MKDIR_P = /bin/mkdir -p
+MPI_CC = mpicc
+OBJEXT = o
+PACKAGE = valgrind
+PACKAGE_BUGREPORT = valgrind-users@lists.sourceforge.net
+PACKAGE_NAME = Valgrind
+PACKAGE_STRING = Valgrind 3.5.0.SVN
+PACKAGE_TARNAME = valgrind
+PACKAGE_VERSION = 3.5.0.SVN
+PATH_SEPARATOR = :
+PERL = /usr/bin/perl
+PKG_CONFIG = /usr/bin/pkg-config
+PREFERRED_STACK_BOUNDARY = 
+QTCORE_CFLAGS = -DQT_SHARED -I/usr/include/QtCore  
+QTCORE_LIBS = -lQtCore  
+RANLIB = ranlib
+SET_MAKE = 
+SHELL = /bin/sh
+STRIP = 
+VALT_LOAD_ADDRESS = 0x38000000
+VERSION = 3.5.0.SVN
+VEX_DIR = $(top_srcdir)/VEX
+VGCONF_ARCH_PRI = amd64
+VGCONF_OS = linux
+VGCONF_PLATFORM_PRI_CAPS = AMD64_LINUX
+VGCONF_PLATFORM_SEC_CAPS = 
+abs_builddir = /home/steph/compile/valgrind/cachegrind
+abs_srcdir = /home/steph/compile/valgrind/cachegrind
+abs_top_builddir = /home/steph/compile/valgrind
+abs_top_srcdir = /home/steph/compile/valgrind
+ac_ct_CC = gcc
+ac_ct_CXX = g++
+am__include = include
+am__leading_dot = .
+am__quote = 
+am__tar = ${AMTAR} chof - "$$tardir"
+am__untar = ${AMTAR} xf -
+bindir = ${exec_prefix}/bin
+build = x86_64-unknown-linux-gnu
+build_alias = 
+build_cpu = x86_64
+build_os = linux-gnu
+build_vendor = unknown
+builddir = .
+datadir = ${datarootdir}
+datarootdir = ${prefix}/share
+docdir = ${datarootdir}/doc/${PACKAGE_TARNAME}
+dvidir = ${docdir}
+exec_prefix = ${prefix}
+host = x86_64-unknown-linux-gnu
+host_alias = 
+host_cpu = x86_64
+host_os = linux-gnu
+host_vendor = unknown
+htmldir = ${docdir}
+includedir = ${prefix}/include
+infodir = ${datarootdir}/info
+install_sh = $(SHELL) /home/steph/compile/valgrind/install-sh
+libdir = ${exec_prefix}/lib
+libexecdir = ${exec_prefix}/libexec
+localedir = ${datarootdir}/locale
+localstatedir = ${prefix}/var
+mandir = ${datarootdir}/man
+mkdir_p = /bin/mkdir -p
+oldincludedir = /usr/include
+pdfdir = ${docdir}
+prefix = /usr/local
+program_transform_name = s,x,x,
+psdir = ${docdir}
+sbindir = ${exec_prefix}/sbin
+sharedstatedir = ${prefix}/com
+srcdir = .
+sysconfdir = ${prefix}/etc
+target_alias = 
+top_builddir = ..
+top_srcdir = ..
+SUBDIRS = . tests docs
+valdir = $(libdir)/valgrind
+inplacedir = $(top_builddir)/.in_place
+
+# Baseline flags for all compilations.  Aim here is to maximise
+# performance and get whatever useful warnings we can out of gcc.
+AM_CFLAGS_BASE = -O2 -g -Wmissing-prototypes -Wall -Wshadow \
+                 -Wpointer-arith -Wstrict-prototypes -Wmissing-declarations \
+		 -Wno-format-zero-length \
+                 -fno-strict-aliasing
+
+
+# These flags are used for building the preload shared objects.
+# The aim is to give reasonable performance but also to have good
+# stack traces, since users often see stack traces extending 
+# into (and through) the preloads.
+AM_CFLAGS_PIC = -O -g -fpic -fno-omit-frame-pointer -fno-strict-aliasing
+
+# Flags for specific targets.
+#
+# Nb: the AM_CPPFLAGS_* values are suitable for building tools and auxprogs.
+# For building the core, coregrind/Makefile.am files add some extra things.
+#
+# Also: in newer versions of automake (1.10 onwards?) asm files ending with
+# '.S' are considered "pre-processed" (as opposed to those ending in '.s')
+# and so the CPPFLAGS are passed to the assembler.  But this is not true for
+# older automakes (e.g. 1.8.5, 1.9.6), sigh.  So we include
+# AM_CPPFLAGS_<PLATFORM> in each AM_CCASFLAGS_<PLATFORM> variable.  This
+# means some of the flags are duplicated on systems with newer versions of
+# automake, but this does not really matter and seems hard to avoid.
+AM_CPPFLAGS_COMMON = \
+		-I$(top_srcdir) \
+		-I$(top_srcdir)/include \
+		-I$(top_srcdir)/VEX/pub
+
+AM_FLAG_M3264_X86_LINUX = -m32
+AM_CPPFLAGS_X86_LINUX = $(AM_CPPFLAGS_COMMON) \
+			    -DVGA_x86=1 \
+			    -DVGO_linux=1 \
+			    -DVGP_x86_linux=1
+
+AM_CFLAGS_X86_LINUX = -m32  \
+			 	$(AM_CFLAGS_BASE)
+
+AM_CCASFLAGS_X86_LINUX = $(AM_CPPFLAGS_X86_LINUX) -m32 -g
+AM_FLAG_M3264_AMD64_LINUX = -m64
+AM_CPPFLAGS_AMD64_LINUX = $(AM_CPPFLAGS_COMMON) \
+			    -DVGA_amd64=1 \
+			    -DVGO_linux=1 \
+			    -DVGP_amd64_linux=1
+
+AM_CFLAGS_AMD64_LINUX = -m64 -fomit-frame-pointer \
+				 $(AM_CFLAGS_BASE)
+
+AM_CCASFLAGS_AMD64_LINUX = $(AM_CPPFLAGS_AMD64_LINUX) -m64 -g
+AM_FLAG_M3264_PPC32_LINUX = -m32
+AM_CPPFLAGS_PPC32_LINUX = $(AM_CPPFLAGS_COMMON) \
+		-DVGA_ppc32=1 \
+		-DVGO_linux=1 \
+		-DVGP_ppc32_linux=1
+
+AM_CFLAGS_PPC32_LINUX = -m32 $(AM_CFLAGS_BASE)
+AM_CCASFLAGS_PPC32_LINUX = $(AM_CPPFLAGS_PPC32_LINUX) -m32 -g
+AM_FLAG_M3264_PPC64_LINUX = -m64
+AM_CPPFLAGS_PPC64_LINUX = $(AM_CPPFLAGS_COMMON) \
+			    -DVGA_ppc64=1 \
+			    -DVGO_linux=1 \
+			    -DVGP_ppc64_linux=1
+
+AM_CFLAGS_PPC64_LINUX = -m64 $(AM_CFLAGS_BASE)
+AM_CCASFLAGS_PPC64_LINUX = $(AM_CPPFLAGS_PPC64_LINUX) -m64 -g
+AM_FLAG_M3264_PPC32_AIX5 = 
+AM_CPPFLAGS_PPC32_AIX5 = $(AM_CPPFLAGS_COMMON) \
+			    -DVGA_ppc32=1 \
+			    -DVGO_aix5=1 \
+			    -DVGP_ppc32_aix5=1
+
+AM_CFLAGS_PPC32_AIX5 =  -mcpu=powerpc $(AM_CFLAGS_BASE)
+AM_CCASFLAGS_PPC32_AIX5 = $(AM_CPPFLAGS_PPC32_AIX5) \
+			     -mcpu=powerpc -g
+
+AM_FLAG_M3264_PPC64_AIX5 = 
+AM_CPPFLAGS_PPC64_AIX5 = $(AM_CPPFLAGS_COMMON) \
+			    -DVGA_ppc64=1 \
+			    -DVGO_aix5=1 \
+			    -DVGP_ppc64_aix5=1
+
+AM_CFLAGS_PPC64_AIX5 =  -mcpu=powerpc64 $(AM_CFLAGS_BASE)
+AM_CCASFLAGS_PPC64_AIX5 = $(AM_CPPFLAGS_PPC64_AIX5) \
+			     -mcpu=powerpc64 -g
+
+
+# Flags for the primary target.  These must be used to build the
+# regtests and performance tests.  In fact, these must be used to
+# build anything which is built only once on a dual-arch build.
+#
+AM_FLAG_M3264_PRI = $(AM_FLAG_M3264_AMD64_LINUX)
+AM_CPPFLAGS_PRI = $(AM_CPPFLAGS_AMD64_LINUX)
+AM_CFLAGS_PRI = $(AM_CFLAGS_AMD64_LINUX)
+AM_CCASFLAGS_PRI = $(AM_CCASFLAGS_AMD64_LINUX)
+AM_FLAG_M3264_SEC = 
+#AM_FLAG_M3264_SEC = $(AM_FLAG_M3264_)
+
+# Baseline link flags for making dynamic shared objects.
+#
+PRELOAD_LDFLAGS_COMMON_LINUX = -nodefaultlibs -shared -Wl,-z,interpose,-z,initfirst
+PRELOAD_LDFLAGS_COMMON_AIX5 = -nodefaultlibs -shared -Wl,-G -Wl,-bnogc
+PRELOAD_LDFLAGS_X86_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) -m32
+PRELOAD_LDFLAGS_AMD64_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) -m64
+PRELOAD_LDFLAGS_PPC32_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) -m32
+PRELOAD_LDFLAGS_PPC64_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) -m64
+PRELOAD_LDFLAGS_PPC32_AIX5 = $(PRELOAD_LDFLAGS_COMMON_AIX5)  
+PRELOAD_LDFLAGS_PPC64_AIX5 = $(PRELOAD_LDFLAGS_COMMON_AIX5)  
+LIBREPLACEMALLOC_X86_LINUX = \
+	$(top_builddir)/coregrind/libreplacemalloc_toolpreload-x86-linux.a
+
+LIBREPLACEMALLOC_AMD64_LINUX = \
+	$(top_builddir)/coregrind/libreplacemalloc_toolpreload-amd64-linux.a
+
+LIBREPLACEMALLOC_PPC32_LINUX = \
+	$(top_builddir)/coregrind/libreplacemalloc_toolpreload-ppc32-linux.a
+
+LIBREPLACEMALLOC_PPC64_LINUX = \
+	$(top_builddir)/coregrind/libreplacemalloc_toolpreload-ppc64-linux.a
+
+LIBREPLACEMALLOC_PPC32_AIX5 = \
+	$(top_builddir)/coregrind/libreplacemalloc_toolpreload-ppc32-aix5.a
+
+LIBREPLACEMALLOC_PPC64_AIX5 = \
+	$(top_builddir)/coregrind/libreplacemalloc_toolpreload-ppc64-aix5.a
+
+COREGRIND_LIBS_X86_LINUX = \
+	$(top_builddir)/coregrind/libcoregrind-x86-linux.a \
+	$(top_srcdir)/VEX/libvex-x86-linux.a
+
+COREGRIND_LIBS_AMD64_LINUX = \
+	$(top_builddir)/coregrind/libcoregrind-amd64-linux.a \
+	$(top_srcdir)/VEX/libvex-amd64-linux.a
+
+COREGRIND_LIBS_PPC32_LINUX = \
+	$(top_builddir)/coregrind/libcoregrind-ppc32-linux.a \
+	$(top_srcdir)/VEX/libvex-ppc32-linux.a
+
+COREGRIND_LIBS_PPC64_LINUX = \
+	$(top_builddir)/coregrind/libcoregrind-ppc64-linux.a \
+	$(top_srcdir)/VEX/libvex-ppc64-linux.a
+
+COREGRIND_LIBS_PPC32_AIX5 = \
+	$(top_builddir)/coregrind/libcoregrind-ppc32-aix5.a \
+	$(top_srcdir)/VEX/libvex-ppc32-aix5.a
+
+COREGRIND_LIBS_PPC64_AIX5 = \
+	$(top_builddir)/coregrind/libcoregrind-ppc64-aix5.a \
+	$(top_srcdir)/VEX/libvex-ppc64-aix5.a
+
+TOOL_LDADD_COMMON = -lgcc
+TOOL_LDFLAGS_COMMON_LINUX = -static \
+	-Wl,-defsym,valt_load_address=0x38000000 \
+	-nodefaultlibs -nostartfiles -u _start
+
+TOOL_LDFLAGS_COMMON_AIX5 = -static -Wl,-e_start_valgrind
+TOOL_LDADD_X86_LINUX = $(COREGRIND_LIBS_X86_LINUX) $(TOOL_LDADD_COMMON)
+TOOL_LDFLAGS_X86_LINUX = \
+	$(TOOL_LDFLAGS_COMMON_LINUX) -m32 \
+	-Wl,-T,$(top_builddir)/valt_load_address_x86_linux.lds
+
+TOOL_LDADD_AMD64_LINUX = $(COREGRIND_LIBS_AMD64_LINUX) $(TOOL_LDADD_COMMON)
+TOOL_LDFLAGS_AMD64_LINUX = \
+	$(TOOL_LDFLAGS_COMMON_LINUX) -m64 \
+	-Wl,-T,$(top_builddir)/valt_load_address_amd64_linux.lds
+
+TOOL_LDADD_PPC32_LINUX = $(COREGRIND_LIBS_PPC32_LINUX) $(TOOL_LDADD_COMMON)
+TOOL_LDFLAGS_PPC32_LINUX = \
+	$(TOOL_LDFLAGS_COMMON_LINUX) -m32 \
+	-Wl,-T,$(top_builddir)/valt_load_address_ppc32_linux.lds
+
+TOOL_LDADD_PPC64_LINUX = $(COREGRIND_LIBS_PPC64_LINUX) $(TOOL_LDADD_COMMON)
+TOOL_LDFLAGS_PPC64_LINUX = \
+	$(TOOL_LDFLAGS_COMMON_LINUX) -m64 \
+	-Wl,-T,$(top_builddir)/valt_load_address_ppc64_linux.lds
+
+TOOL_LDADD_PPC32_AIX5 = $(COREGRIND_LIBS_PPC32_AIX5) $(TOOL_LDADD_COMMON)
+TOOL_LDFLAGS_PPC32_AIX5 = \
+	$(TOOL_LDFLAGS_COMMON_AIX5) 
+
+TOOL_LDADD_PPC64_AIX5 = $(COREGRIND_LIBS_PPC64_AIX5) $(TOOL_LDADD_COMMON)
+TOOL_LDFLAGS_PPC64_AIX5 = \
+	$(TOOL_LDFLAGS_COMMON_AIX5)  -Wl,-bbigtoc
+
+LIBREPLACEMALLOC_LDFLAGS_X86_LINUX = \
+	-Wl,--whole-archive \
+	$(LIBREPLACEMALLOC_X86_LINUX) \
+	-Wl,--no-whole-archive
+
+LIBREPLACEMALLOC_LDFLAGS_AMD64_LINUX = \
+	-Wl,--whole-archive \
+	$(LIBREPLACEMALLOC_AMD64_LINUX) \
+	-Wl,--no-whole-archive
+
+LIBREPLACEMALLOC_LDFLAGS_PPC32_LINUX = \
+	-Wl,--whole-archive \
+	$(LIBREPLACEMALLOC_PPC32_LINUX) \
+	-Wl,--no-whole-archive
+
+LIBREPLACEMALLOC_LDFLAGS_PPC64_LINUX = \
+	-Wl,--whole-archive \
+	$(LIBREPLACEMALLOC_PPC64_LINUX) \
+	-Wl,--no-whole-archive
+
+LIBREPLACEMALLOC_LDFLAGS_PPC32_AIX5 = \
+	$(LIBREPLACEMALLOC_PPC32_AIX5)
+
+LIBREPLACEMALLOC_LDFLAGS_PPC64_AIX5 = \
+	$(LIBREPLACEMALLOC_PPC64_AIX5)
+
+bin_SCRIPTS = cg_annotate
+noinst_HEADERS = cg_arch.h cg_sim.c cg_branchpred.c
+cg_merge_SOURCES = cg_merge.c
+cg_merge_CPPFLAGS = $(AM_CPPFLAGS_PRI)
+cg_merge_CFLAGS = $(AM_CFLAGS_PRI)
+cg_merge_CCASFLAGS = $(AM_CCASFLAGS_PRI)
+cg_merge_LDFLAGS = $(AM_CFLAGS_PRI)
+CACHEGRIND_SOURCES_COMMON = cg_main.c
+CACHEGRIND_SOURCES_X86 = cg-x86.c
+CACHEGRIND_SOURCES_AMD64 = cg-amd64.c
+CACHEGRIND_SOURCES_PPC32 = cg-ppc32.c
+CACHEGRIND_SOURCES_PPC64 = cg-ppc64.c
+cachegrind_x86_linux_SOURCES = $(CACHEGRIND_SOURCES_COMMON) $(CACHEGRIND_SOURCES_X86)
+cachegrind_x86_linux_CPPFLAGS = $(AM_CPPFLAGS_X86_LINUX)
+cachegrind_x86_linux_CFLAGS = $(AM_CFLAGS_X86_LINUX)
+cachegrind_x86_linux_DEPENDENCIES = $(COREGRIND_LIBS_X86_LINUX)
+cachegrind_x86_linux_LDADD = $(TOOL_LDADD_X86_LINUX)
+cachegrind_x86_linux_LDFLAGS = $(TOOL_LDFLAGS_X86_LINUX)
+cachegrind_amd64_linux_SOURCES = $(CACHEGRIND_SOURCES_COMMON) $(CACHEGRIND_SOURCES_AMD64)
+cachegrind_amd64_linux_CPPFLAGS = $(AM_CPPFLAGS_AMD64_LINUX)
+cachegrind_amd64_linux_CFLAGS = $(AM_CFLAGS_AMD64_LINUX)
+cachegrind_amd64_linux_DEPENDENCIES = $(COREGRIND_LIBS_AMD64_LINUX)
+cachegrind_amd64_linux_LDADD = $(TOOL_LDADD_AMD64_LINUX)
+cachegrind_amd64_linux_LDFLAGS = $(TOOL_LDFLAGS_AMD64_LINUX)
+cachegrind_ppc32_linux_SOURCES = $(CACHEGRIND_SOURCES_COMMON) $(CACHEGRIND_SOURCES_PPC32)
+cachegrind_ppc32_linux_CPPFLAGS = $(AM_CPPFLAGS_PPC32_LINUX)
+cachegrind_ppc32_linux_CFLAGS = $(AM_CFLAGS_PPC32_LINUX)
+cachegrind_ppc32_linux_DEPENDENCIES = $(COREGRIND_LIBS_PPC32_LINUX)
+cachegrind_ppc32_linux_LDADD = $(TOOL_LDADD_PPC32_LINUX)
+cachegrind_ppc32_linux_LDFLAGS = $(TOOL_LDFLAGS_PPC32_LINUX)
+cachegrind_ppc64_linux_SOURCES = $(CACHEGRIND_SOURCES_COMMON) $(CACHEGRIND_SOURCES_PPC64)
+cachegrind_ppc64_linux_CPPFLAGS = $(AM_CPPFLAGS_PPC64_LINUX)
+cachegrind_ppc64_linux_CFLAGS = $(AM_CFLAGS_PPC64_LINUX)
+cachegrind_ppc64_linux_DEPENDENCIES = $(COREGRIND_LIBS_PPC64_LINUX)
+cachegrind_ppc64_linux_LDADD = $(TOOL_LDADD_PPC64_LINUX)
+cachegrind_ppc64_linux_LDFLAGS = $(TOOL_LDFLAGS_PPC64_LINUX)
+cachegrind_ppc32_aix5_SOURCES = $(CACHEGRIND_SOURCES_COMMON) $(CACHEGRIND_SOURCES_PPC32)
+cachegrind_ppc32_aix5_CPPFLAGS = $(AM_CPPFLAGS_PPC32_AIX5)
+cachegrind_ppc32_aix5_CFLAGS = $(AM_CFLAGS_PPC32_AIX5)
+cachegrind_ppc32_aix5_DEPENDENCIES = $(COREGRIND_LIBS_PPC32_AIX5)
+cachegrind_ppc32_aix5_LDADD = $(TOOL_LDADD_PPC32_AIX5)
+cachegrind_ppc32_aix5_LDFLAGS = $(TOOL_LDFLAGS_PPC32_AIX5)
+cachegrind_ppc64_aix5_SOURCES = $(CACHEGRIND_SOURCES_COMMON) $(CACHEGRIND_SOURCES_PPC64)
+cachegrind_ppc64_aix5_CPPFLAGS = $(AM_CPPFLAGS_PPC64_AIX5)
+cachegrind_ppc64_aix5_CFLAGS = $(AM_CFLAGS_PPC64_AIX5)
+cachegrind_ppc64_aix5_DEPENDENCIES = $(COREGRIND_LIBS_PPC64_AIX5)
+cachegrind_ppc64_aix5_LDADD = $(TOOL_LDADD_PPC64_AIX5)
+cachegrind_ppc64_aix5_LDFLAGS = $(TOOL_LDFLAGS_PPC64_AIX5)
+all: all-recursive
+
+.SUFFIXES:
+.SUFFIXES: .c .o .obj
+$(srcdir)/Makefile.in: # $(srcdir)/Makefile.am $(top_srcdir)/Makefile.tool.am $(top_srcdir)/Makefile.all.am $(top_srcdir)/Makefile.flags.am $(top_srcdir)/Makefile.core-tool.am $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
+		&& exit 0; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign  cachegrind/Makefile'; \
+	cd $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign  cachegrind/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: # $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): # $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+cg_annotate: $(top_builddir)/config.status $(srcdir)/cg_annotate.in
+	cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
+install-binPROGRAMS: $(bin_PROGRAMS)
+	@$(NORMAL_INSTALL)
+	test -z "$(bindir)" || $(MKDIR_P) "$(DESTDIR)$(bindir)"
+	@list='$(bin_PROGRAMS)'; for p in $$list; do \
+	  p1=`echo $$p|sed 's/$(EXEEXT)$$//'`; \
+	  if test -f $$p \
+	  ; then \
+	    f=`echo "$$p1" | sed 's,^.*/,,;$(transform);s/$$/$(EXEEXT)/'`; \
+	   echo " $(INSTALL_PROGRAM_ENV) $(binPROGRAMS_INSTALL) '$$p' '$(DESTDIR)$(bindir)/$$f'"; \
+	   $(INSTALL_PROGRAM_ENV) $(binPROGRAMS_INSTALL) "$$p" "$(DESTDIR)$(bindir)/$$f" || exit 1; \
+	  else :; fi; \
+	done
+
+uninstall-binPROGRAMS:
+	@$(NORMAL_UNINSTALL)
+	@list='$(bin_PROGRAMS)'; for p in $$list; do \
+	  f=`echo "$$p" | sed 's,^.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/'`; \
+	  echo " rm -f '$(DESTDIR)$(bindir)/$$f'"; \
+	  rm -f "$(DESTDIR)$(bindir)/$$f"; \
+	done
+
+clean-binPROGRAMS:
+	-test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS)
+
+clean-noinstPROGRAMS:
+	-test -z "$(noinst_PROGRAMS)" || rm -f $(noinst_PROGRAMS)
+cachegrind-amd64-linux$(EXEEXT): $(cachegrind_amd64_linux_OBJECTS) $(cachegrind_amd64_linux_DEPENDENCIES) 
+	@rm -f cachegrind-amd64-linux$(EXEEXT)
+	$(cachegrind_amd64_linux_LINK) $(cachegrind_amd64_linux_OBJECTS) $(cachegrind_amd64_linux_LDADD) $(LIBS)
+cachegrind-ppc32-aix5$(EXEEXT): $(cachegrind_ppc32_aix5_OBJECTS) $(cachegrind_ppc32_aix5_DEPENDENCIES) 
+	@rm -f cachegrind-ppc32-aix5$(EXEEXT)
+	$(cachegrind_ppc32_aix5_LINK) $(cachegrind_ppc32_aix5_OBJECTS) $(cachegrind_ppc32_aix5_LDADD) $(LIBS)
+cachegrind-ppc32-linux$(EXEEXT): $(cachegrind_ppc32_linux_OBJECTS) $(cachegrind_ppc32_linux_DEPENDENCIES) 
+	@rm -f cachegrind-ppc32-linux$(EXEEXT)
+	$(cachegrind_ppc32_linux_LINK) $(cachegrind_ppc32_linux_OBJECTS) $(cachegrind_ppc32_linux_LDADD) $(LIBS)
+cachegrind-ppc64-aix5$(EXEEXT): $(cachegrind_ppc64_aix5_OBJECTS) $(cachegrind_ppc64_aix5_DEPENDENCIES) 
+	@rm -f cachegrind-ppc64-aix5$(EXEEXT)
+	$(cachegrind_ppc64_aix5_LINK) $(cachegrind_ppc64_aix5_OBJECTS) $(cachegrind_ppc64_aix5_LDADD) $(LIBS)
+cachegrind-ppc64-linux$(EXEEXT): $(cachegrind_ppc64_linux_OBJECTS) $(cachegrind_ppc64_linux_DEPENDENCIES) 
+	@rm -f cachegrind-ppc64-linux$(EXEEXT)
+	$(cachegrind_ppc64_linux_LINK) $(cachegrind_ppc64_linux_OBJECTS) $(cachegrind_ppc64_linux_LDADD) $(LIBS)
+cachegrind-x86-linux$(EXEEXT): $(cachegrind_x86_linux_OBJECTS) $(cachegrind_x86_linux_DEPENDENCIES) 
+	@rm -f cachegrind-x86-linux$(EXEEXT)
+	$(cachegrind_x86_linux_LINK) $(cachegrind_x86_linux_OBJECTS) $(cachegrind_x86_linux_LDADD) $(LIBS)
+cg_merge$(EXEEXT): $(cg_merge_OBJECTS) $(cg_merge_DEPENDENCIES) 
+	@rm -f cg_merge$(EXEEXT)
+	$(cg_merge_LINK) $(cg_merge_OBJECTS) $(cg_merge_LDADD) $(LIBS)
+install-binSCRIPTS: $(bin_SCRIPTS)
+	@$(NORMAL_INSTALL)
+	test -z "$(bindir)" || $(MKDIR_P) "$(DESTDIR)$(bindir)"
+	@list='$(bin_SCRIPTS)'; for p in $$list; do \
+	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+	  if test -f $$d$$p; then \
+	    f=`echo "$$p" | sed 's|^.*/||;$(transform)'`; \
+	    echo " $(binSCRIPT_INSTALL) '$$d$$p' '$(DESTDIR)$(bindir)/$$f'"; \
+	    $(binSCRIPT_INSTALL) "$$d$$p" "$(DESTDIR)$(bindir)/$$f"; \
+	  else :; fi; \
+	done
+
+uninstall-binSCRIPTS:
+	@$(NORMAL_UNINSTALL)
+	@list='$(bin_SCRIPTS)'; for p in $$list; do \
+	  f=`echo "$$p" | sed 's|^.*/||;$(transform)'`; \
+	  echo " rm -f '$(DESTDIR)$(bindir)/$$f'"; \
+	  rm -f "$(DESTDIR)$(bindir)/$$f"; \
+	done
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+include ./$(DEPDIR)/cachegrind_amd64_linux-cg-amd64.Po
+include ./$(DEPDIR)/cachegrind_amd64_linux-cg_main.Po
+include ./$(DEPDIR)/cachegrind_ppc32_aix5-cg-ppc32.Po
+include ./$(DEPDIR)/cachegrind_ppc32_aix5-cg_main.Po
+include ./$(DEPDIR)/cachegrind_ppc32_linux-cg-ppc32.Po
+include ./$(DEPDIR)/cachegrind_ppc32_linux-cg_main.Po
+include ./$(DEPDIR)/cachegrind_ppc64_aix5-cg-ppc64.Po
+include ./$(DEPDIR)/cachegrind_ppc64_aix5-cg_main.Po
+include ./$(DEPDIR)/cachegrind_ppc64_linux-cg-ppc64.Po
+include ./$(DEPDIR)/cachegrind_ppc64_linux-cg_main.Po
+include ./$(DEPDIR)/cachegrind_x86_linux-cg-x86.Po
+include ./$(DEPDIR)/cachegrind_x86_linux-cg_main.Po
+include ./$(DEPDIR)/cg_merge-cg_merge.Po
+
+.c.o:
+	$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+	mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+#	source='$<' object='$@' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(COMPILE) -c $<
+
+.c.obj:
+	$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+	mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+#	source='$<' object='$@' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+cachegrind_amd64_linux-cg_main.o: cg_main.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_amd64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_amd64_linux_CFLAGS) $(CFLAGS) -MT cachegrind_amd64_linux-cg_main.o -MD -MP -MF $(DEPDIR)/cachegrind_amd64_linux-cg_main.Tpo -c -o cachegrind_amd64_linux-cg_main.o `test -f 'cg_main.c' || echo '$(srcdir)/'`cg_main.c
+	mv -f $(DEPDIR)/cachegrind_amd64_linux-cg_main.Tpo $(DEPDIR)/cachegrind_amd64_linux-cg_main.Po
+#	source='cg_main.c' object='cachegrind_amd64_linux-cg_main.o' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_amd64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_amd64_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_amd64_linux-cg_main.o `test -f 'cg_main.c' || echo '$(srcdir)/'`cg_main.c
+
+cachegrind_amd64_linux-cg_main.obj: cg_main.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_amd64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_amd64_linux_CFLAGS) $(CFLAGS) -MT cachegrind_amd64_linux-cg_main.obj -MD -MP -MF $(DEPDIR)/cachegrind_amd64_linux-cg_main.Tpo -c -o cachegrind_amd64_linux-cg_main.obj `if test -f 'cg_main.c'; then $(CYGPATH_W) 'cg_main.c'; else $(CYGPATH_W) '$(srcdir)/cg_main.c'; fi`
+	mv -f $(DEPDIR)/cachegrind_amd64_linux-cg_main.Tpo $(DEPDIR)/cachegrind_amd64_linux-cg_main.Po
+#	source='cg_main.c' object='cachegrind_amd64_linux-cg_main.obj' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_amd64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_amd64_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_amd64_linux-cg_main.obj `if test -f 'cg_main.c'; then $(CYGPATH_W) 'cg_main.c'; else $(CYGPATH_W) '$(srcdir)/cg_main.c'; fi`
+
+cachegrind_amd64_linux-cg-amd64.o: cg-amd64.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_amd64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_amd64_linux_CFLAGS) $(CFLAGS) -MT cachegrind_amd64_linux-cg-amd64.o -MD -MP -MF $(DEPDIR)/cachegrind_amd64_linux-cg-amd64.Tpo -c -o cachegrind_amd64_linux-cg-amd64.o `test -f 'cg-amd64.c' || echo '$(srcdir)/'`cg-amd64.c
+	mv -f $(DEPDIR)/cachegrind_amd64_linux-cg-amd64.Tpo $(DEPDIR)/cachegrind_amd64_linux-cg-amd64.Po
+#	source='cg-amd64.c' object='cachegrind_amd64_linux-cg-amd64.o' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_amd64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_amd64_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_amd64_linux-cg-amd64.o `test -f 'cg-amd64.c' || echo '$(srcdir)/'`cg-amd64.c
+
+cachegrind_amd64_linux-cg-amd64.obj: cg-amd64.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_amd64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_amd64_linux_CFLAGS) $(CFLAGS) -MT cachegrind_amd64_linux-cg-amd64.obj -MD -MP -MF $(DEPDIR)/cachegrind_amd64_linux-cg-amd64.Tpo -c -o cachegrind_amd64_linux-cg-amd64.obj `if test -f 'cg-amd64.c'; then $(CYGPATH_W) 'cg-amd64.c'; else $(CYGPATH_W) '$(srcdir)/cg-amd64.c'; fi`
+	mv -f $(DEPDIR)/cachegrind_amd64_linux-cg-amd64.Tpo $(DEPDIR)/cachegrind_amd64_linux-cg-amd64.Po
+#	source='cg-amd64.c' object='cachegrind_amd64_linux-cg-amd64.obj' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_amd64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_amd64_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_amd64_linux-cg-amd64.obj `if test -f 'cg-amd64.c'; then $(CYGPATH_W) 'cg-amd64.c'; else $(CYGPATH_W) '$(srcdir)/cg-amd64.c'; fi`
+
+cachegrind_ppc32_aix5-cg_main.o: cg_main.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_aix5_CFLAGS) $(CFLAGS) -MT cachegrind_ppc32_aix5-cg_main.o -MD -MP -MF $(DEPDIR)/cachegrind_ppc32_aix5-cg_main.Tpo -c -o cachegrind_ppc32_aix5-cg_main.o `test -f 'cg_main.c' || echo '$(srcdir)/'`cg_main.c
+	mv -f $(DEPDIR)/cachegrind_ppc32_aix5-cg_main.Tpo $(DEPDIR)/cachegrind_ppc32_aix5-cg_main.Po
+#	source='cg_main.c' object='cachegrind_ppc32_aix5-cg_main.o' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_aix5_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc32_aix5-cg_main.o `test -f 'cg_main.c' || echo '$(srcdir)/'`cg_main.c
+
+cachegrind_ppc32_aix5-cg_main.obj: cg_main.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_aix5_CFLAGS) $(CFLAGS) -MT cachegrind_ppc32_aix5-cg_main.obj -MD -MP -MF $(DEPDIR)/cachegrind_ppc32_aix5-cg_main.Tpo -c -o cachegrind_ppc32_aix5-cg_main.obj `if test -f 'cg_main.c'; then $(CYGPATH_W) 'cg_main.c'; else $(CYGPATH_W) '$(srcdir)/cg_main.c'; fi`
+	mv -f $(DEPDIR)/cachegrind_ppc32_aix5-cg_main.Tpo $(DEPDIR)/cachegrind_ppc32_aix5-cg_main.Po
+#	source='cg_main.c' object='cachegrind_ppc32_aix5-cg_main.obj' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_aix5_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc32_aix5-cg_main.obj `if test -f 'cg_main.c'; then $(CYGPATH_W) 'cg_main.c'; else $(CYGPATH_W) '$(srcdir)/cg_main.c'; fi`
+
+cachegrind_ppc32_aix5-cg-ppc32.o: cg-ppc32.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_aix5_CFLAGS) $(CFLAGS) -MT cachegrind_ppc32_aix5-cg-ppc32.o -MD -MP -MF $(DEPDIR)/cachegrind_ppc32_aix5-cg-ppc32.Tpo -c -o cachegrind_ppc32_aix5-cg-ppc32.o `test -f 'cg-ppc32.c' || echo '$(srcdir)/'`cg-ppc32.c
+	mv -f $(DEPDIR)/cachegrind_ppc32_aix5-cg-ppc32.Tpo $(DEPDIR)/cachegrind_ppc32_aix5-cg-ppc32.Po
+#	source='cg-ppc32.c' object='cachegrind_ppc32_aix5-cg-ppc32.o' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_aix5_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc32_aix5-cg-ppc32.o `test -f 'cg-ppc32.c' || echo '$(srcdir)/'`cg-ppc32.c
+
+cachegrind_ppc32_aix5-cg-ppc32.obj: cg-ppc32.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_aix5_CFLAGS) $(CFLAGS) -MT cachegrind_ppc32_aix5-cg-ppc32.obj -MD -MP -MF $(DEPDIR)/cachegrind_ppc32_aix5-cg-ppc32.Tpo -c -o cachegrind_ppc32_aix5-cg-ppc32.obj `if test -f 'cg-ppc32.c'; then $(CYGPATH_W) 'cg-ppc32.c'; else $(CYGPATH_W) '$(srcdir)/cg-ppc32.c'; fi`
+	mv -f $(DEPDIR)/cachegrind_ppc32_aix5-cg-ppc32.Tpo $(DEPDIR)/cachegrind_ppc32_aix5-cg-ppc32.Po
+#	source='cg-ppc32.c' object='cachegrind_ppc32_aix5-cg-ppc32.obj' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_aix5_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc32_aix5-cg-ppc32.obj `if test -f 'cg-ppc32.c'; then $(CYGPATH_W) 'cg-ppc32.c'; else $(CYGPATH_W) '$(srcdir)/cg-ppc32.c'; fi`
+
+cachegrind_ppc32_linux-cg_main.o: cg_main.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_linux_CFLAGS) $(CFLAGS) -MT cachegrind_ppc32_linux-cg_main.o -MD -MP -MF $(DEPDIR)/cachegrind_ppc32_linux-cg_main.Tpo -c -o cachegrind_ppc32_linux-cg_main.o `test -f 'cg_main.c' || echo '$(srcdir)/'`cg_main.c
+	mv -f $(DEPDIR)/cachegrind_ppc32_linux-cg_main.Tpo $(DEPDIR)/cachegrind_ppc32_linux-cg_main.Po
+#	source='cg_main.c' object='cachegrind_ppc32_linux-cg_main.o' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc32_linux-cg_main.o `test -f 'cg_main.c' || echo '$(srcdir)/'`cg_main.c
+
+cachegrind_ppc32_linux-cg_main.obj: cg_main.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_linux_CFLAGS) $(CFLAGS) -MT cachegrind_ppc32_linux-cg_main.obj -MD -MP -MF $(DEPDIR)/cachegrind_ppc32_linux-cg_main.Tpo -c -o cachegrind_ppc32_linux-cg_main.obj `if test -f 'cg_main.c'; then $(CYGPATH_W) 'cg_main.c'; else $(CYGPATH_W) '$(srcdir)/cg_main.c'; fi`
+	mv -f $(DEPDIR)/cachegrind_ppc32_linux-cg_main.Tpo $(DEPDIR)/cachegrind_ppc32_linux-cg_main.Po
+#	source='cg_main.c' object='cachegrind_ppc32_linux-cg_main.obj' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc32_linux-cg_main.obj `if test -f 'cg_main.c'; then $(CYGPATH_W) 'cg_main.c'; else $(CYGPATH_W) '$(srcdir)/cg_main.c'; fi`
+
+cachegrind_ppc32_linux-cg-ppc32.o: cg-ppc32.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_linux_CFLAGS) $(CFLAGS) -MT cachegrind_ppc32_linux-cg-ppc32.o -MD -MP -MF $(DEPDIR)/cachegrind_ppc32_linux-cg-ppc32.Tpo -c -o cachegrind_ppc32_linux-cg-ppc32.o `test -f 'cg-ppc32.c' || echo '$(srcdir)/'`cg-ppc32.c
+	mv -f $(DEPDIR)/cachegrind_ppc32_linux-cg-ppc32.Tpo $(DEPDIR)/cachegrind_ppc32_linux-cg-ppc32.Po
+#	source='cg-ppc32.c' object='cachegrind_ppc32_linux-cg-ppc32.o' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc32_linux-cg-ppc32.o `test -f 'cg-ppc32.c' || echo '$(srcdir)/'`cg-ppc32.c
+
+cachegrind_ppc32_linux-cg-ppc32.obj: cg-ppc32.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_linux_CFLAGS) $(CFLAGS) -MT cachegrind_ppc32_linux-cg-ppc32.obj -MD -MP -MF $(DEPDIR)/cachegrind_ppc32_linux-cg-ppc32.Tpo -c -o cachegrind_ppc32_linux-cg-ppc32.obj `if test -f 'cg-ppc32.c'; then $(CYGPATH_W) 'cg-ppc32.c'; else $(CYGPATH_W) '$(srcdir)/cg-ppc32.c'; fi`
+	mv -f $(DEPDIR)/cachegrind_ppc32_linux-cg-ppc32.Tpo $(DEPDIR)/cachegrind_ppc32_linux-cg-ppc32.Po
+#	source='cg-ppc32.c' object='cachegrind_ppc32_linux-cg-ppc32.obj' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc32_linux-cg-ppc32.obj `if test -f 'cg-ppc32.c'; then $(CYGPATH_W) 'cg-ppc32.c'; else $(CYGPATH_W) '$(srcdir)/cg-ppc32.c'; fi`
+
+cachegrind_ppc64_aix5-cg_main.o: cg_main.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_aix5_CFLAGS) $(CFLAGS) -MT cachegrind_ppc64_aix5-cg_main.o -MD -MP -MF $(DEPDIR)/cachegrind_ppc64_aix5-cg_main.Tpo -c -o cachegrind_ppc64_aix5-cg_main.o `test -f 'cg_main.c' || echo '$(srcdir)/'`cg_main.c
+	mv -f $(DEPDIR)/cachegrind_ppc64_aix5-cg_main.Tpo $(DEPDIR)/cachegrind_ppc64_aix5-cg_main.Po
+#	source='cg_main.c' object='cachegrind_ppc64_aix5-cg_main.o' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_aix5_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc64_aix5-cg_main.o `test -f 'cg_main.c' || echo '$(srcdir)/'`cg_main.c
+
+cachegrind_ppc64_aix5-cg_main.obj: cg_main.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_aix5_CFLAGS) $(CFLAGS) -MT cachegrind_ppc64_aix5-cg_main.obj -MD -MP -MF $(DEPDIR)/cachegrind_ppc64_aix5-cg_main.Tpo -c -o cachegrind_ppc64_aix5-cg_main.obj `if test -f 'cg_main.c'; then $(CYGPATH_W) 'cg_main.c'; else $(CYGPATH_W) '$(srcdir)/cg_main.c'; fi`
+	mv -f $(DEPDIR)/cachegrind_ppc64_aix5-cg_main.Tpo $(DEPDIR)/cachegrind_ppc64_aix5-cg_main.Po
+#	source='cg_main.c' object='cachegrind_ppc64_aix5-cg_main.obj' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_aix5_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc64_aix5-cg_main.obj `if test -f 'cg_main.c'; then $(CYGPATH_W) 'cg_main.c'; else $(CYGPATH_W) '$(srcdir)/cg_main.c'; fi`
+
+cachegrind_ppc64_aix5-cg-ppc64.o: cg-ppc64.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_aix5_CFLAGS) $(CFLAGS) -MT cachegrind_ppc64_aix5-cg-ppc64.o -MD -MP -MF $(DEPDIR)/cachegrind_ppc64_aix5-cg-ppc64.Tpo -c -o cachegrind_ppc64_aix5-cg-ppc64.o `test -f 'cg-ppc64.c' || echo '$(srcdir)/'`cg-ppc64.c
+	mv -f $(DEPDIR)/cachegrind_ppc64_aix5-cg-ppc64.Tpo $(DEPDIR)/cachegrind_ppc64_aix5-cg-ppc64.Po
+#	source='cg-ppc64.c' object='cachegrind_ppc64_aix5-cg-ppc64.o' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_aix5_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc64_aix5-cg-ppc64.o `test -f 'cg-ppc64.c' || echo '$(srcdir)/'`cg-ppc64.c
+
+cachegrind_ppc64_aix5-cg-ppc64.obj: cg-ppc64.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_aix5_CFLAGS) $(CFLAGS) -MT cachegrind_ppc64_aix5-cg-ppc64.obj -MD -MP -MF $(DEPDIR)/cachegrind_ppc64_aix5-cg-ppc64.Tpo -c -o cachegrind_ppc64_aix5-cg-ppc64.obj `if test -f 'cg-ppc64.c'; then $(CYGPATH_W) 'cg-ppc64.c'; else $(CYGPATH_W) '$(srcdir)/cg-ppc64.c'; fi`
+	mv -f $(DEPDIR)/cachegrind_ppc64_aix5-cg-ppc64.Tpo $(DEPDIR)/cachegrind_ppc64_aix5-cg-ppc64.Po
+#	source='cg-ppc64.c' object='cachegrind_ppc64_aix5-cg-ppc64.obj' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_aix5_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc64_aix5-cg-ppc64.obj `if test -f 'cg-ppc64.c'; then $(CYGPATH_W) 'cg-ppc64.c'; else $(CYGPATH_W) '$(srcdir)/cg-ppc64.c'; fi`
+
+cachegrind_ppc64_linux-cg_main.o: cg_main.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_linux_CFLAGS) $(CFLAGS) -MT cachegrind_ppc64_linux-cg_main.o -MD -MP -MF $(DEPDIR)/cachegrind_ppc64_linux-cg_main.Tpo -c -o cachegrind_ppc64_linux-cg_main.o `test -f 'cg_main.c' || echo '$(srcdir)/'`cg_main.c
+	mv -f $(DEPDIR)/cachegrind_ppc64_linux-cg_main.Tpo $(DEPDIR)/cachegrind_ppc64_linux-cg_main.Po
+#	source='cg_main.c' object='cachegrind_ppc64_linux-cg_main.o' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc64_linux-cg_main.o `test -f 'cg_main.c' || echo '$(srcdir)/'`cg_main.c
+
+cachegrind_ppc64_linux-cg_main.obj: cg_main.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_linux_CFLAGS) $(CFLAGS) -MT cachegrind_ppc64_linux-cg_main.obj -MD -MP -MF $(DEPDIR)/cachegrind_ppc64_linux-cg_main.Tpo -c -o cachegrind_ppc64_linux-cg_main.obj `if test -f 'cg_main.c'; then $(CYGPATH_W) 'cg_main.c'; else $(CYGPATH_W) '$(srcdir)/cg_main.c'; fi`
+	mv -f $(DEPDIR)/cachegrind_ppc64_linux-cg_main.Tpo $(DEPDIR)/cachegrind_ppc64_linux-cg_main.Po
+#	source='cg_main.c' object='cachegrind_ppc64_linux-cg_main.obj' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc64_linux-cg_main.obj `if test -f 'cg_main.c'; then $(CYGPATH_W) 'cg_main.c'; else $(CYGPATH_W) '$(srcdir)/cg_main.c'; fi`
+
+cachegrind_ppc64_linux-cg-ppc64.o: cg-ppc64.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_linux_CFLAGS) $(CFLAGS) -MT cachegrind_ppc64_linux-cg-ppc64.o -MD -MP -MF $(DEPDIR)/cachegrind_ppc64_linux-cg-ppc64.Tpo -c -o cachegrind_ppc64_linux-cg-ppc64.o `test -f 'cg-ppc64.c' || echo '$(srcdir)/'`cg-ppc64.c
+	mv -f $(DEPDIR)/cachegrind_ppc64_linux-cg-ppc64.Tpo $(DEPDIR)/cachegrind_ppc64_linux-cg-ppc64.Po
+#	source='cg-ppc64.c' object='cachegrind_ppc64_linux-cg-ppc64.o' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc64_linux-cg-ppc64.o `test -f 'cg-ppc64.c' || echo '$(srcdir)/'`cg-ppc64.c
+
+cachegrind_ppc64_linux-cg-ppc64.obj: cg-ppc64.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_linux_CFLAGS) $(CFLAGS) -MT cachegrind_ppc64_linux-cg-ppc64.obj -MD -MP -MF $(DEPDIR)/cachegrind_ppc64_linux-cg-ppc64.Tpo -c -o cachegrind_ppc64_linux-cg-ppc64.obj `if test -f 'cg-ppc64.c'; then $(CYGPATH_W) 'cg-ppc64.c'; else $(CYGPATH_W) '$(srcdir)/cg-ppc64.c'; fi`
+	mv -f $(DEPDIR)/cachegrind_ppc64_linux-cg-ppc64.Tpo $(DEPDIR)/cachegrind_ppc64_linux-cg-ppc64.Po
+#	source='cg-ppc64.c' object='cachegrind_ppc64_linux-cg-ppc64.obj' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc64_linux-cg-ppc64.obj `if test -f 'cg-ppc64.c'; then $(CYGPATH_W) 'cg-ppc64.c'; else $(CYGPATH_W) '$(srcdir)/cg-ppc64.c'; fi`
+
+cachegrind_x86_linux-cg_main.o: cg_main.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_x86_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_x86_linux_CFLAGS) $(CFLAGS) -MT cachegrind_x86_linux-cg_main.o -MD -MP -MF $(DEPDIR)/cachegrind_x86_linux-cg_main.Tpo -c -o cachegrind_x86_linux-cg_main.o `test -f 'cg_main.c' || echo '$(srcdir)/'`cg_main.c
+	mv -f $(DEPDIR)/cachegrind_x86_linux-cg_main.Tpo $(DEPDIR)/cachegrind_x86_linux-cg_main.Po
+#	source='cg_main.c' object='cachegrind_x86_linux-cg_main.o' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_x86_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_x86_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_x86_linux-cg_main.o `test -f 'cg_main.c' || echo '$(srcdir)/'`cg_main.c
+
+cachegrind_x86_linux-cg_main.obj: cg_main.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_x86_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_x86_linux_CFLAGS) $(CFLAGS) -MT cachegrind_x86_linux-cg_main.obj -MD -MP -MF $(DEPDIR)/cachegrind_x86_linux-cg_main.Tpo -c -o cachegrind_x86_linux-cg_main.obj `if test -f 'cg_main.c'; then $(CYGPATH_W) 'cg_main.c'; else $(CYGPATH_W) '$(srcdir)/cg_main.c'; fi`
+	mv -f $(DEPDIR)/cachegrind_x86_linux-cg_main.Tpo $(DEPDIR)/cachegrind_x86_linux-cg_main.Po
+#	source='cg_main.c' object='cachegrind_x86_linux-cg_main.obj' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_x86_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_x86_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_x86_linux-cg_main.obj `if test -f 'cg_main.c'; then $(CYGPATH_W) 'cg_main.c'; else $(CYGPATH_W) '$(srcdir)/cg_main.c'; fi`
+
+cachegrind_x86_linux-cg-x86.o: cg-x86.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_x86_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_x86_linux_CFLAGS) $(CFLAGS) -MT cachegrind_x86_linux-cg-x86.o -MD -MP -MF $(DEPDIR)/cachegrind_x86_linux-cg-x86.Tpo -c -o cachegrind_x86_linux-cg-x86.o `test -f 'cg-x86.c' || echo '$(srcdir)/'`cg-x86.c
+	mv -f $(DEPDIR)/cachegrind_x86_linux-cg-x86.Tpo $(DEPDIR)/cachegrind_x86_linux-cg-x86.Po
+#	source='cg-x86.c' object='cachegrind_x86_linux-cg-x86.o' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_x86_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_x86_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_x86_linux-cg-x86.o `test -f 'cg-x86.c' || echo '$(srcdir)/'`cg-x86.c
+
+cachegrind_x86_linux-cg-x86.obj: cg-x86.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_x86_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_x86_linux_CFLAGS) $(CFLAGS) -MT cachegrind_x86_linux-cg-x86.obj -MD -MP -MF $(DEPDIR)/cachegrind_x86_linux-cg-x86.Tpo -c -o cachegrind_x86_linux-cg-x86.obj `if test -f 'cg-x86.c'; then $(CYGPATH_W) 'cg-x86.c'; else $(CYGPATH_W) '$(srcdir)/cg-x86.c'; fi`
+	mv -f $(DEPDIR)/cachegrind_x86_linux-cg-x86.Tpo $(DEPDIR)/cachegrind_x86_linux-cg-x86.Po
+#	source='cg-x86.c' object='cachegrind_x86_linux-cg-x86.obj' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_x86_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_x86_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_x86_linux-cg-x86.obj `if test -f 'cg-x86.c'; then $(CYGPATH_W) 'cg-x86.c'; else $(CYGPATH_W) '$(srcdir)/cg-x86.c'; fi`
+
+cg_merge-cg_merge.o: cg_merge.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cg_merge_CPPFLAGS) $(CPPFLAGS) $(cg_merge_CFLAGS) $(CFLAGS) -MT cg_merge-cg_merge.o -MD -MP -MF $(DEPDIR)/cg_merge-cg_merge.Tpo -c -o cg_merge-cg_merge.o `test -f 'cg_merge.c' || echo '$(srcdir)/'`cg_merge.c
+	mv -f $(DEPDIR)/cg_merge-cg_merge.Tpo $(DEPDIR)/cg_merge-cg_merge.Po
+#	source='cg_merge.c' object='cg_merge-cg_merge.o' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cg_merge_CPPFLAGS) $(CPPFLAGS) $(cg_merge_CFLAGS) $(CFLAGS) -c -o cg_merge-cg_merge.o `test -f 'cg_merge.c' || echo '$(srcdir)/'`cg_merge.c
+
+cg_merge-cg_merge.obj: cg_merge.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cg_merge_CPPFLAGS) $(CPPFLAGS) $(cg_merge_CFLAGS) $(CFLAGS) -MT cg_merge-cg_merge.obj -MD -MP -MF $(DEPDIR)/cg_merge-cg_merge.Tpo -c -o cg_merge-cg_merge.obj `if test -f 'cg_merge.c'; then $(CYGPATH_W) 'cg_merge.c'; else $(CYGPATH_W) '$(srcdir)/cg_merge.c'; fi`
+	mv -f $(DEPDIR)/cg_merge-cg_merge.Tpo $(DEPDIR)/cg_merge-cg_merge.Po
+#	source='cg_merge.c' object='cg_merge-cg_merge.obj' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cg_merge_CPPFLAGS) $(CPPFLAGS) $(cg_merge_CFLAGS) $(CFLAGS) -c -o cg_merge-cg_merge.obj `if test -f 'cg_merge.c'; then $(CYGPATH_W) 'cg_merge.c'; else $(CYGPATH_W) '$(srcdir)/cg_merge.c'; fi`
+
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run `make' without going through this Makefile.
+# To change the values of `make' variables: instead of editing Makefiles,
+# (1) if the variable is set in `config.status', edit `config.status'
+#     (which will cause the Makefiles to be regenerated when you run `make');
+# (2) otherwise, pass the desired values on the `make' command line.
+$(RECURSIVE_TARGETS):
+	@failcom='exit 1'; \
+	for f in x $$MAKEFLAGS; do \
+	  case $$f in \
+	    *=* | --[!k]*);; \
+	    *k*) failcom='fail=yes';; \
+	  esac; \
+	done; \
+	dot_seen=no; \
+	target=`echo $@ | sed s/-recursive//`; \
+	list='$(SUBDIRS)'; for subdir in $$list; do \
+	  echo "Making $$target in $$subdir"; \
+	  if test "$$subdir" = "."; then \
+	    dot_seen=yes; \
+	    local_target="$$target-am"; \
+	  else \
+	    local_target="$$target"; \
+	  fi; \
+	  (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+	  || eval $$failcom; \
+	done; \
+	if test "$$dot_seen" = "no"; then \
+	  $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+	fi; test -z "$$fail"
+
+$(RECURSIVE_CLEAN_TARGETS):
+	@failcom='exit 1'; \
+	for f in x $$MAKEFLAGS; do \
+	  case $$f in \
+	    *=* | --[!k]*);; \
+	    *k*) failcom='fail=yes';; \
+	  esac; \
+	done; \
+	dot_seen=no; \
+	case "$@" in \
+	  distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
+	  *) list='$(SUBDIRS)' ;; \
+	esac; \
+	rev=''; for subdir in $$list; do \
+	  if test "$$subdir" = "."; then :; else \
+	    rev="$$subdir $$rev"; \
+	  fi; \
+	done; \
+	rev="$$rev ."; \
+	target=`echo $@ | sed s/-recursive//`; \
+	for subdir in $$rev; do \
+	  echo "Making $$target in $$subdir"; \
+	  if test "$$subdir" = "."; then \
+	    local_target="$$target-am"; \
+	  else \
+	    local_target="$$target"; \
+	  fi; \
+	  (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+	  || eval $$failcom; \
+	done && test -z "$$fail"
+tags-recursive:
+	list='$(SUBDIRS)'; for subdir in $$list; do \
+	  test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
+	done
+ctags-recursive:
+	list='$(SUBDIRS)'; for subdir in $$list; do \
+	  test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
+	done
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '{ files[$$0] = 1; nonemtpy = 1; } \
+	      END { if (nonempty) { for (i in files) print i; }; }'`; \
+	mkid -fID $$unique
+tags: TAGS
+
+TAGS: tags-recursive $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+		$(TAGS_FILES) $(LISP)
+	tags=; \
+	here=`pwd`; \
+	if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
+	  include_option=--etags-include; \
+	  empty_fix=.; \
+	else \
+	  include_option=--include; \
+	  empty_fix=; \
+	fi; \
+	list='$(SUBDIRS)'; for subdir in $$list; do \
+	  if test "$$subdir" = .; then :; else \
+	    test ! -f $$subdir/TAGS || \
+	      tags="$$tags $$include_option=$$here/$$subdir/TAGS"; \
+	  fi; \
+	done; \
+	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+	      END { if (nonempty) { for (i in files) print i; }; }'`; \
+	if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	    $$tags $$unique; \
+	fi
+ctags: CTAGS
+CTAGS: ctags-recursive $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+		$(TAGS_FILES) $(LISP)
+	tags=; \
+	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+	      END { if (nonempty) { for (i in files) print i; }; }'`; \
+	test -z "$(CTAGS_ARGS)$$tags$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$tags $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && cd $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) $$here
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+	    fi; \
+	    cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+	  else \
+	    test -f $(distdir)/$$file \
+	    || cp -p $$d/$$file $(distdir)/$$file \
+	    || exit 1; \
+	  fi; \
+	done
+	list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+	  if test "$$subdir" = .; then :; else \
+	    test -d "$(distdir)/$$subdir" \
+	    || $(MKDIR_P) "$(distdir)/$$subdir" \
+	    || exit 1; \
+	    distdir=`$(am__cd) $(distdir) && pwd`; \
+	    top_distdir=`$(am__cd) $(top_distdir) && pwd`; \
+	    (cd $$subdir && \
+	      $(MAKE) $(AM_MAKEFLAGS) \
+	        top_distdir="$$top_distdir" \
+	        distdir="$$distdir/$$subdir" \
+		am__remove_distdir=: \
+		am__skip_length_check=: \
+	        distdir) \
+	      || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-recursive
+all-am: Makefile $(PROGRAMS) $(SCRIPTS) $(HEADERS) all-local
+installdirs: installdirs-recursive
+installdirs-am:
+	for dir in "$(DESTDIR)$(bindir)" "$(DESTDIR)$(bindir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-recursive
+install-exec: install-exec-recursive
+install-data: install-data-recursive
+uninstall: uninstall-recursive
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-recursive
+install-strip:
+	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	  install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	  `test -z '$(STRIP)' || \
+	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-recursive
+
+clean-am: clean-binPROGRAMS clean-generic clean-noinstPROGRAMS \
+	mostlyclean-am
+
+distclean: distclean-recursive
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-recursive
+
+dvi-am:
+
+html: html-recursive
+
+info: info-recursive
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-recursive
+
+install-exec-am: install-binPROGRAMS install-binSCRIPTS \
+	install-exec-local
+
+install-html: install-html-recursive
+
+install-info: install-info-recursive
+
+install-man:
+
+install-pdf: install-pdf-recursive
+
+install-ps: install-ps-recursive
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-recursive
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-recursive
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic
+
+pdf: pdf-recursive
+
+pdf-am:
+
+ps: ps-recursive
+
+ps-am:
+
+uninstall-am: uninstall-binPROGRAMS uninstall-binSCRIPTS
+
+.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) install-am \
+	install-strip
+
+.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
+	all all-am all-local check check-am clean clean-binPROGRAMS \
+	clean-generic clean-noinstPROGRAMS ctags ctags-recursive \
+	distclean distclean-compile distclean-generic distclean-tags \
+	distdir dvi dvi-am html html-am info info-am install \
+	install-am install-binPROGRAMS install-binSCRIPTS install-data \
+	install-data-am install-dvi install-dvi-am install-exec \
+	install-exec-am install-exec-local install-html \
+	install-html-am install-info install-info-am install-man \
+	install-pdf install-pdf-am install-ps install-ps-am \
+	install-strip installcheck installcheck-am installdirs \
+	installdirs-am maintainer-clean maintainer-clean-generic \
+	mostlyclean mostlyclean-compile mostlyclean-generic pdf pdf-am \
+	ps ps-am tags tags-recursive uninstall uninstall-am \
+	uninstall-binPROGRAMS uninstall-binSCRIPTS
+
+
+# The kludge that passes for vex's build system can't handle parallel
+# builds.  So, for the time being, serialise all Valgrind building.
+# (this is equivalent to enforcing "make -j 1".
+.NOTPARALLEL:
+
+# This is used by coregrind/Makefile.am and Makefile.tool.am for doing
+# "in-place" installs.  It copies $(noinst_PROGRAMS) into $inplacedir.
+# It needs to be depended on by an 'all-local' rule.
+inplace-noinst_PROGRAMS:
+	if [ -n "$(noinst_PROGRAMS)" ] ; then \
+	  mkdir -p $(inplacedir); \
+	  for f in $(noinst_PROGRAMS) ; do \
+	    rm -f $(inplacedir)/$$f; \
+	    ln -f -s ../$(subdir)/$$f $(inplacedir); \
+	  done ; \
+	fi
+
+# This is used by coregrind/Makefile.am and by <tool>/Makefile.am for doing
+# "make install".  It copies $(noinst_PROGRAMS) into $prefix/lib/valgrind/.
+# It needs to be depended on by an 'install-exec-local' rule.
+install-noinst_PROGRAMS:
+	if [ -n "$(noinst_PROGRAMS)" ] ; then \
+	  $(mkinstalldirs) $(DESTDIR)$(valdir); \
+	  for f in $(noinst_PROGRAMS); do \
+	    $(INSTALL_PROGRAM) $$f $(DESTDIR)$(valdir); \
+	  done ; \
+	fi
+
+$(top_srcdir)/VEX/libvex-x86-linux.a: $(top_srcdir)/VEX/priv/main/vex_svnversion.h
+	$(MAKE) -C $(top_srcdir)/VEX CC="$(CC)" AR="$(AR)" \
+	libvex-x86-linux.a \
+	EXTRA_CFLAGS="$(AM_CFLAGS_X86_LINUX) -Wdeclaration-after-statement \
+			-fno-stack-protector"
+
+$(top_srcdir)/VEX/libvex-amd64-linux.a: $(top_srcdir)/VEX/priv/main/vex_svnversion.h
+	$(MAKE) -C $(top_srcdir)/VEX CC="$(CC)" AR="$(AR)" \
+	libvex-amd64-linux.a \
+	EXTRA_CFLAGS="$(AM_CFLAGS_AMD64_LINUX) -Wdeclaration-after-statement \
+			-fno-stack-protector"
+
+$(top_srcdir)/VEX/libvex-ppc32-linux.a: $(top_srcdir)/VEX/priv/main/vex_svnversion.h
+	$(MAKE) -C $(top_srcdir)/VEX CC="$(CC)" AR="$(AR)" \
+	libvex-ppc32-linux.a \
+	EXTRA_CFLAGS="$(AM_CFLAGS_PPC32_LINUX) -Wdeclaration-after-statement \
+			-fno-stack-protector"
+
+$(top_srcdir)/VEX/libvex-ppc64-linux.a: $(top_srcdir)/VEX/priv/main/vex_svnversion.h
+	$(MAKE) -C $(top_srcdir)/VEX CC="$(CC)" AR="$(AR)" \
+	libvex-ppc64-linux.a \
+	EXTRA_CFLAGS="$(AM_CFLAGS_PPC64_LINUX) -Wdeclaration-after-statement \
+			-fno-stack-protector"
+
+$(top_srcdir)/VEX/libvex-ppc32-aix5.a: $(top_srcdir)/VEX/priv/main/vex_svnversion.h
+	$(MAKE) -C $(top_srcdir)/VEX CC="$(CC)" AR="$(AR) -X32" \
+	libvex-ppc32-aix5.a \
+	EXTRA_CFLAGS="$(AM_CFLAGS_PPC32_AIX5) -Wdeclaration-after-statement \
+			-fno-stack-protector"
+
+$(top_srcdir)/VEX/libvex-ppc64-aix5.a: $(top_srcdir)/VEX/priv/main/vex_svnversion.h
+	$(MAKE) -C $(top_srcdir)/VEX CC="$(CC)" AR="$(AR) -X64" \
+	libvex-ppc64-aix5.a \
+	EXTRA_CFLAGS="$(AM_CFLAGS_PPC64_AIX5) -Wdeclaration-after-statement \
+			-fno-stack-protector"
+
+$(top_srcdir)/VEX/priv/main/vex_svnversion.h:
+	$(MAKE) -C $(top_srcdir)/VEX CC="$(CC)" version
+
+all-local: inplace-noinst_PROGRAMS
+
+install-exec-local: install-noinst_PROGRAMS
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/cachegrind/Makefile.am b/cachegrind/Makefile.am
new file mode 100644
index 0000000..eac2825
--- /dev/null
+++ b/cachegrind/Makefile.am
@@ -0,0 +1,82 @@
+include $(top_srcdir)/Makefile.tool.am
+
+bin_SCRIPTS = cg_annotate
+
+noinst_HEADERS = cg_arch.h cg_sim.c cg_branchpred.c
+
+noinst_PROGRAMS = 
+if VGCONF_PLATFORMS_INCLUDE_X86_LINUX
+noinst_PROGRAMS += cachegrind-x86-linux
+endif
+if VGCONF_PLATFORMS_INCLUDE_AMD64_LINUX
+noinst_PROGRAMS += cachegrind-amd64-linux
+endif
+if VGCONF_PLATFORMS_INCLUDE_PPC32_LINUX
+noinst_PROGRAMS += cachegrind-ppc32-linux
+endif
+if VGCONF_PLATFORMS_INCLUDE_PPC64_LINUX
+noinst_PROGRAMS += cachegrind-ppc64-linux
+endif
+if VGCONF_PLATFORMS_INCLUDE_PPC32_AIX5
+noinst_PROGRAMS += cachegrind-ppc32-aix5
+endif
+if VGCONF_PLATFORMS_INCLUDE_PPC64_AIX5
+noinst_PROGRAMS += cachegrind-ppc64-aix5
+endif
+
+# Build cg_merge for the primary target only.
+bin_PROGRAMS = cg_merge
+cg_merge_SOURCES = cg_merge.c
+cg_merge_CPPFLAGS  = $(AM_CPPFLAGS_PRI)
+cg_merge_CFLAGS    = $(AM_CFLAGS_PRI)
+cg_merge_CCASFLAGS = $(AM_CCASFLAGS_PRI)
+cg_merge_LDFLAGS   = $(AM_CFLAGS_PRI)
+
+
+CACHEGRIND_SOURCES_COMMON = cg_main.c
+CACHEGRIND_SOURCES_X86 = cg-x86.c
+CACHEGRIND_SOURCES_AMD64 = cg-amd64.c
+CACHEGRIND_SOURCES_PPC32 = cg-ppc32.c
+CACHEGRIND_SOURCES_PPC64 = cg-ppc64.c
+
+cachegrind_x86_linux_SOURCES      = $(CACHEGRIND_SOURCES_COMMON) $(CACHEGRIND_SOURCES_X86)
+cachegrind_x86_linux_CPPFLAGS     = $(AM_CPPFLAGS_X86_LINUX)
+cachegrind_x86_linux_CFLAGS       = $(AM_CFLAGS_X86_LINUX)
+cachegrind_x86_linux_DEPENDENCIES = $(COREGRIND_LIBS_X86_LINUX)
+cachegrind_x86_linux_LDADD        = $(TOOL_LDADD_X86_LINUX)
+cachegrind_x86_linux_LDFLAGS      = $(TOOL_LDFLAGS_X86_LINUX)
+
+cachegrind_amd64_linux_SOURCES      = $(CACHEGRIND_SOURCES_COMMON) $(CACHEGRIND_SOURCES_AMD64)
+cachegrind_amd64_linux_CPPFLAGS     = $(AM_CPPFLAGS_AMD64_LINUX)
+cachegrind_amd64_linux_CFLAGS       = $(AM_CFLAGS_AMD64_LINUX)
+cachegrind_amd64_linux_DEPENDENCIES = $(COREGRIND_LIBS_AMD64_LINUX)
+cachegrind_amd64_linux_LDADD        = $(TOOL_LDADD_AMD64_LINUX)
+cachegrind_amd64_linux_LDFLAGS      = $(TOOL_LDFLAGS_AMD64_LINUX)
+
+cachegrind_ppc32_linux_SOURCES      = $(CACHEGRIND_SOURCES_COMMON) $(CACHEGRIND_SOURCES_PPC32)
+cachegrind_ppc32_linux_CPPFLAGS     = $(AM_CPPFLAGS_PPC32_LINUX)
+cachegrind_ppc32_linux_CFLAGS       = $(AM_CFLAGS_PPC32_LINUX)
+cachegrind_ppc32_linux_DEPENDENCIES = $(COREGRIND_LIBS_PPC32_LINUX)
+cachegrind_ppc32_linux_LDADD        = $(TOOL_LDADD_PPC32_LINUX)
+cachegrind_ppc32_linux_LDFLAGS      = $(TOOL_LDFLAGS_PPC32_LINUX)
+
+cachegrind_ppc64_linux_SOURCES      = $(CACHEGRIND_SOURCES_COMMON) $(CACHEGRIND_SOURCES_PPC64)
+cachegrind_ppc64_linux_CPPFLAGS     = $(AM_CPPFLAGS_PPC64_LINUX)
+cachegrind_ppc64_linux_CFLAGS       = $(AM_CFLAGS_PPC64_LINUX)
+cachegrind_ppc64_linux_DEPENDENCIES = $(COREGRIND_LIBS_PPC64_LINUX)
+cachegrind_ppc64_linux_LDADD        = $(TOOL_LDADD_PPC64_LINUX)
+cachegrind_ppc64_linux_LDFLAGS      = $(TOOL_LDFLAGS_PPC64_LINUX)
+
+cachegrind_ppc32_aix5_SOURCES      = $(CACHEGRIND_SOURCES_COMMON) $(CACHEGRIND_SOURCES_PPC32)
+cachegrind_ppc32_aix5_CPPFLAGS     = $(AM_CPPFLAGS_PPC32_AIX5)
+cachegrind_ppc32_aix5_CFLAGS       = $(AM_CFLAGS_PPC32_AIX5)
+cachegrind_ppc32_aix5_DEPENDENCIES = $(COREGRIND_LIBS_PPC32_AIX5)
+cachegrind_ppc32_aix5_LDADD        = $(TOOL_LDADD_PPC32_AIX5)
+cachegrind_ppc32_aix5_LDFLAGS      = $(TOOL_LDFLAGS_PPC32_AIX5)
+
+cachegrind_ppc64_aix5_SOURCES      = $(CACHEGRIND_SOURCES_COMMON) $(CACHEGRIND_SOURCES_PPC64)
+cachegrind_ppc64_aix5_CPPFLAGS     = $(AM_CPPFLAGS_PPC64_AIX5)
+cachegrind_ppc64_aix5_CFLAGS       = $(AM_CFLAGS_PPC64_AIX5)
+cachegrind_ppc64_aix5_DEPENDENCIES = $(COREGRIND_LIBS_PPC64_AIX5)
+cachegrind_ppc64_aix5_LDADD        = $(TOOL_LDADD_PPC64_AIX5)
+cachegrind_ppc64_aix5_LDFLAGS      = $(TOOL_LDFLAGS_PPC64_AIX5)
diff --git a/cachegrind/Makefile.in b/cachegrind/Makefile.in
new file mode 100644
index 0000000..d210531
--- /dev/null
+++ b/cachegrind/Makefile.in
@@ -0,0 +1,1229 @@
+# Makefile.in generated by automake 1.10.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008  Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# This file contains things shared by coregrind/Makefile.am and tool
+# Makefile.am files.
+
+
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+DIST_COMMON = $(noinst_HEADERS) $(srcdir)/Makefile.am \
+	$(srcdir)/Makefile.in $(srcdir)/cg_annotate.in \
+	$(top_srcdir)/Makefile.all.am \
+	$(top_srcdir)/Makefile.core-tool.am \
+	$(top_srcdir)/Makefile.flags.am $(top_srcdir)/Makefile.tool.am
+noinst_PROGRAMS = $(am__EXEEXT_1) $(am__EXEEXT_2) $(am__EXEEXT_3) \
+	$(am__EXEEXT_4) $(am__EXEEXT_5) $(am__EXEEXT_6)
+@VGCONF_PLATFORMS_INCLUDE_X86_LINUX_TRUE@am__append_1 = cachegrind-x86-linux
+@VGCONF_PLATFORMS_INCLUDE_AMD64_LINUX_TRUE@am__append_2 = cachegrind-amd64-linux
+@VGCONF_PLATFORMS_INCLUDE_PPC32_LINUX_TRUE@am__append_3 = cachegrind-ppc32-linux
+@VGCONF_PLATFORMS_INCLUDE_PPC64_LINUX_TRUE@am__append_4 = cachegrind-ppc64-linux
+@VGCONF_PLATFORMS_INCLUDE_PPC32_AIX5_TRUE@am__append_5 = cachegrind-ppc32-aix5
+@VGCONF_PLATFORMS_INCLUDE_PPC64_AIX5_TRUE@am__append_6 = cachegrind-ppc64-aix5
+bin_PROGRAMS = cg_merge$(EXEEXT)
+subdir = cachegrind
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES = cg_annotate
+am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(bindir)"
+binPROGRAMS_INSTALL = $(INSTALL_PROGRAM)
+@VGCONF_PLATFORMS_INCLUDE_X86_LINUX_TRUE@am__EXEEXT_1 = cachegrind-x86-linux$(EXEEXT)
+@VGCONF_PLATFORMS_INCLUDE_AMD64_LINUX_TRUE@am__EXEEXT_2 = cachegrind-amd64-linux$(EXEEXT)
+@VGCONF_PLATFORMS_INCLUDE_PPC32_LINUX_TRUE@am__EXEEXT_3 = cachegrind-ppc32-linux$(EXEEXT)
+@VGCONF_PLATFORMS_INCLUDE_PPC64_LINUX_TRUE@am__EXEEXT_4 = cachegrind-ppc64-linux$(EXEEXT)
+@VGCONF_PLATFORMS_INCLUDE_PPC32_AIX5_TRUE@am__EXEEXT_5 = cachegrind-ppc32-aix5$(EXEEXT)
+@VGCONF_PLATFORMS_INCLUDE_PPC64_AIX5_TRUE@am__EXEEXT_6 = cachegrind-ppc64-aix5$(EXEEXT)
+PROGRAMS = $(bin_PROGRAMS) $(noinst_PROGRAMS)
+am__objects_1 = cachegrind_amd64_linux-cg_main.$(OBJEXT)
+am__objects_2 = cachegrind_amd64_linux-cg-amd64.$(OBJEXT)
+am_cachegrind_amd64_linux_OBJECTS = $(am__objects_1) $(am__objects_2)
+cachegrind_amd64_linux_OBJECTS = $(am_cachegrind_amd64_linux_OBJECTS)
+am__DEPENDENCIES_1 =
+am__DEPENDENCIES_2 = $(COREGRIND_LIBS_AMD64_LINUX) \
+	$(am__DEPENDENCIES_1)
+cachegrind_amd64_linux_LINK = $(CCLD) $(cachegrind_amd64_linux_CFLAGS) \
+	$(CFLAGS) $(cachegrind_amd64_linux_LDFLAGS) $(LDFLAGS) -o $@
+am__objects_3 = cachegrind_ppc32_aix5-cg_main.$(OBJEXT)
+am__objects_4 = cachegrind_ppc32_aix5-cg-ppc32.$(OBJEXT)
+am_cachegrind_ppc32_aix5_OBJECTS = $(am__objects_3) $(am__objects_4)
+cachegrind_ppc32_aix5_OBJECTS = $(am_cachegrind_ppc32_aix5_OBJECTS)
+am__DEPENDENCIES_3 = $(COREGRIND_LIBS_PPC32_AIX5) \
+	$(am__DEPENDENCIES_1)
+cachegrind_ppc32_aix5_LINK = $(CCLD) $(cachegrind_ppc32_aix5_CFLAGS) \
+	$(CFLAGS) $(cachegrind_ppc32_aix5_LDFLAGS) $(LDFLAGS) -o $@
+am__objects_5 = cachegrind_ppc32_linux-cg_main.$(OBJEXT)
+am__objects_6 = cachegrind_ppc32_linux-cg-ppc32.$(OBJEXT)
+am_cachegrind_ppc32_linux_OBJECTS = $(am__objects_5) $(am__objects_6)
+cachegrind_ppc32_linux_OBJECTS = $(am_cachegrind_ppc32_linux_OBJECTS)
+am__DEPENDENCIES_4 = $(COREGRIND_LIBS_PPC32_LINUX) \
+	$(am__DEPENDENCIES_1)
+cachegrind_ppc32_linux_LINK = $(CCLD) $(cachegrind_ppc32_linux_CFLAGS) \
+	$(CFLAGS) $(cachegrind_ppc32_linux_LDFLAGS) $(LDFLAGS) -o $@
+am__objects_7 = cachegrind_ppc64_aix5-cg_main.$(OBJEXT)
+am__objects_8 = cachegrind_ppc64_aix5-cg-ppc64.$(OBJEXT)
+am_cachegrind_ppc64_aix5_OBJECTS = $(am__objects_7) $(am__objects_8)
+cachegrind_ppc64_aix5_OBJECTS = $(am_cachegrind_ppc64_aix5_OBJECTS)
+am__DEPENDENCIES_5 = $(COREGRIND_LIBS_PPC64_AIX5) \
+	$(am__DEPENDENCIES_1)
+cachegrind_ppc64_aix5_LINK = $(CCLD) $(cachegrind_ppc64_aix5_CFLAGS) \
+	$(CFLAGS) $(cachegrind_ppc64_aix5_LDFLAGS) $(LDFLAGS) -o $@
+am__objects_9 = cachegrind_ppc64_linux-cg_main.$(OBJEXT)
+am__objects_10 = cachegrind_ppc64_linux-cg-ppc64.$(OBJEXT)
+am_cachegrind_ppc64_linux_OBJECTS = $(am__objects_9) $(am__objects_10)
+cachegrind_ppc64_linux_OBJECTS = $(am_cachegrind_ppc64_linux_OBJECTS)
+am__DEPENDENCIES_6 = $(COREGRIND_LIBS_PPC64_LINUX) \
+	$(am__DEPENDENCIES_1)
+cachegrind_ppc64_linux_LINK = $(CCLD) $(cachegrind_ppc64_linux_CFLAGS) \
+	$(CFLAGS) $(cachegrind_ppc64_linux_LDFLAGS) $(LDFLAGS) -o $@
+am__objects_11 = cachegrind_x86_linux-cg_main.$(OBJEXT)
+am__objects_12 = cachegrind_x86_linux-cg-x86.$(OBJEXT)
+am_cachegrind_x86_linux_OBJECTS = $(am__objects_11) $(am__objects_12)
+cachegrind_x86_linux_OBJECTS = $(am_cachegrind_x86_linux_OBJECTS)
+am__DEPENDENCIES_7 = $(COREGRIND_LIBS_X86_LINUX) $(am__DEPENDENCIES_1)
+cachegrind_x86_linux_LINK = $(CCLD) $(cachegrind_x86_linux_CFLAGS) \
+	$(CFLAGS) $(cachegrind_x86_linux_LDFLAGS) $(LDFLAGS) -o $@
+am_cg_merge_OBJECTS = cg_merge-cg_merge.$(OBJEXT)
+cg_merge_OBJECTS = $(am_cg_merge_OBJECTS)
+cg_merge_LDADD = $(LDADD)
+cg_merge_LINK = $(CCLD) $(cg_merge_CFLAGS) $(CFLAGS) \
+	$(cg_merge_LDFLAGS) $(LDFLAGS) -o $@
+binSCRIPT_INSTALL = $(INSTALL_SCRIPT)
+SCRIPTS = $(bin_SCRIPTS)
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+SOURCES = $(cachegrind_amd64_linux_SOURCES) \
+	$(cachegrind_ppc32_aix5_SOURCES) \
+	$(cachegrind_ppc32_linux_SOURCES) \
+	$(cachegrind_ppc64_aix5_SOURCES) \
+	$(cachegrind_ppc64_linux_SOURCES) \
+	$(cachegrind_x86_linux_SOURCES) $(cg_merge_SOURCES)
+DIST_SOURCES = $(cachegrind_amd64_linux_SOURCES) \
+	$(cachegrind_ppc32_aix5_SOURCES) \
+	$(cachegrind_ppc32_linux_SOURCES) \
+	$(cachegrind_ppc64_aix5_SOURCES) \
+	$(cachegrind_ppc64_linux_SOURCES) \
+	$(cachegrind_x86_linux_SOURCES) $(cg_merge_SOURCES)
+RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
+	html-recursive info-recursive install-data-recursive \
+	install-dvi-recursive install-exec-recursive \
+	install-html-recursive install-info-recursive \
+	install-pdf-recursive install-ps-recursive install-recursive \
+	installcheck-recursive installdirs-recursive pdf-recursive \
+	ps-recursive uninstall-recursive
+HEADERS = $(noinst_HEADERS)
+RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive	\
+  distclean-recursive maintainer-clean-recursive
+ETAGS = etags
+CTAGS = ctags
+DIST_SUBDIRS = $(SUBDIRS)
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BOOST_CFLAGS = @BOOST_CFLAGS@
+BOOST_LIBS = @BOOST_LIBS@
+CC = @CC@
+CCAS = @CCAS@
+CCASDEPMODE = @CCASDEPMODE@
+CCASFLAGS = @CCASFLAGS@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFAULT_SUPP = @DEFAULT_SUPP@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DIFF = @DIFF@
+DISTCHECK_CONFIGURE_FLAGS = @DISTCHECK_CONFIGURE_FLAGS@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FLAG_FNO_STACK_PROTECTOR = @FLAG_FNO_STACK_PROTECTOR@
+FLAG_M32 = @FLAG_M32@
+FLAG_M64 = @FLAG_M64@
+FLAG_MAIX32 = @FLAG_MAIX32@
+FLAG_MAIX64 = @FLAG_MAIX64@
+FLAG_MMMX = @FLAG_MMMX@
+FLAG_MSSE = @FLAG_MSSE@
+FLAG_UNLIMITED_INLINE_UNIT_GROWTH = @FLAG_UNLIMITED_INLINE_UNIT_GROWTH@
+FLAG_WDECL_AFTER_STMT = @FLAG_WDECL_AFTER_STMT@
+FLAG_W_EXTRA = @FLAG_W_EXTRA@
+FLAG_W_NO_FORMAT_ZERO_LENGTH = @FLAG_W_NO_FORMAT_ZERO_LENGTH@
+GDB = @GDB@
+GLIBC_VERSION = @GLIBC_VERSION@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+MPI_CC = @MPI_CC@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PERL = @PERL@
+PKG_CONFIG = @PKG_CONFIG@
+PREFERRED_STACK_BOUNDARY = @PREFERRED_STACK_BOUNDARY@
+QTCORE_CFLAGS = @QTCORE_CFLAGS@
+QTCORE_LIBS = @QTCORE_LIBS@
+RANLIB = @RANLIB@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VALT_LOAD_ADDRESS = @VALT_LOAD_ADDRESS@
+VERSION = @VERSION@
+VEX_DIR = @VEX_DIR@
+VGCONF_ARCH_PRI = @VGCONF_ARCH_PRI@
+VGCONF_OS = @VGCONF_OS@
+VGCONF_PLATFORM_PRI_CAPS = @VGCONF_PLATFORM_PRI_CAPS@
+VGCONF_PLATFORM_SEC_CAPS = @VGCONF_PLATFORM_SEC_CAPS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+SUBDIRS = . tests docs
+valdir = $(libdir)/valgrind
+inplacedir = $(top_builddir)/.in_place
+
+# Baseline flags for all compilations.  Aim here is to maximise
+# performance and get whatever useful warnings we can out of gcc.
+AM_CFLAGS_BASE = -O2 -g -Wmissing-prototypes -Wall -Wshadow \
+                 -Wpointer-arith -Wstrict-prototypes -Wmissing-declarations \
+		 @FLAG_W_NO_FORMAT_ZERO_LENGTH@ \
+                 -fno-strict-aliasing
+
+
+# These flags are used for building the preload shared objects.
+# The aim is to give reasonable performance but also to have good
+# stack traces, since users often see stack traces extending 
+# into (and through) the preloads.
+AM_CFLAGS_PIC = -O -g -fpic -fno-omit-frame-pointer -fno-strict-aliasing
+
+# Flags for specific targets.
+#
+# Nb: the AM_CPPFLAGS_* values are suitable for building tools and auxprogs.
+# For building the core, coregrind/Makefile.am files add some extra things.
+#
+# Also: in newer versions of automake (1.10 onwards?) asm files ending with
+# '.S' are considered "pre-processed" (as opposed to those ending in '.s')
+# and so the CPPFLAGS are passed to the assembler.  But this is not true for
+# older automakes (e.g. 1.8.5, 1.9.6), sigh.  So we include
+# AM_CPPFLAGS_<PLATFORM> in each AM_CCASFLAGS_<PLATFORM> variable.  This
+# means some of the flags are duplicated on systems with newer versions of
+# automake, but this does not really matter and seems hard to avoid.
+AM_CPPFLAGS_COMMON = \
+		-I$(top_srcdir) \
+		-I$(top_srcdir)/include \
+		-I@VEX_DIR@/pub
+
+AM_FLAG_M3264_X86_LINUX = @FLAG_M32@
+AM_CPPFLAGS_X86_LINUX = $(AM_CPPFLAGS_COMMON) \
+			    -DVGA_x86=1 \
+			    -DVGO_linux=1 \
+			    -DVGP_x86_linux=1
+
+AM_CFLAGS_X86_LINUX = @FLAG_M32@ @PREFERRED_STACK_BOUNDARY@ \
+			 	$(AM_CFLAGS_BASE)
+
+AM_CCASFLAGS_X86_LINUX = $(AM_CPPFLAGS_X86_LINUX) @FLAG_M32@ -g
+AM_FLAG_M3264_AMD64_LINUX = @FLAG_M64@
+AM_CPPFLAGS_AMD64_LINUX = $(AM_CPPFLAGS_COMMON) \
+			    -DVGA_amd64=1 \
+			    -DVGO_linux=1 \
+			    -DVGP_amd64_linux=1
+
+AM_CFLAGS_AMD64_LINUX = @FLAG_M64@ -fomit-frame-pointer \
+				@PREFERRED_STACK_BOUNDARY@ $(AM_CFLAGS_BASE)
+
+AM_CCASFLAGS_AMD64_LINUX = $(AM_CPPFLAGS_AMD64_LINUX) @FLAG_M64@ -g
+AM_FLAG_M3264_PPC32_LINUX = @FLAG_M32@
+AM_CPPFLAGS_PPC32_LINUX = $(AM_CPPFLAGS_COMMON) \
+		-DVGA_ppc32=1 \
+		-DVGO_linux=1 \
+		-DVGP_ppc32_linux=1
+
+AM_CFLAGS_PPC32_LINUX = @FLAG_M32@ $(AM_CFLAGS_BASE)
+AM_CCASFLAGS_PPC32_LINUX = $(AM_CPPFLAGS_PPC32_LINUX) @FLAG_M32@ -g
+AM_FLAG_M3264_PPC64_LINUX = @FLAG_M64@
+AM_CPPFLAGS_PPC64_LINUX = $(AM_CPPFLAGS_COMMON) \
+			    -DVGA_ppc64=1 \
+			    -DVGO_linux=1 \
+			    -DVGP_ppc64_linux=1
+
+AM_CFLAGS_PPC64_LINUX = @FLAG_M64@ $(AM_CFLAGS_BASE)
+AM_CCASFLAGS_PPC64_LINUX = $(AM_CPPFLAGS_PPC64_LINUX) @FLAG_M64@ -g
+AM_FLAG_M3264_PPC32_AIX5 = @FLAG_MAIX32@
+AM_CPPFLAGS_PPC32_AIX5 = $(AM_CPPFLAGS_COMMON) \
+			    -DVGA_ppc32=1 \
+			    -DVGO_aix5=1 \
+			    -DVGP_ppc32_aix5=1
+
+AM_CFLAGS_PPC32_AIX5 = @FLAG_MAIX32@ -mcpu=powerpc $(AM_CFLAGS_BASE)
+AM_CCASFLAGS_PPC32_AIX5 = $(AM_CPPFLAGS_PPC32_AIX5) \
+			    @FLAG_MAIX32@ -mcpu=powerpc -g
+
+AM_FLAG_M3264_PPC64_AIX5 = @FLAG_MAIX64@
+AM_CPPFLAGS_PPC64_AIX5 = $(AM_CPPFLAGS_COMMON) \
+			    -DVGA_ppc64=1 \
+			    -DVGO_aix5=1 \
+			    -DVGP_ppc64_aix5=1
+
+AM_CFLAGS_PPC64_AIX5 = @FLAG_MAIX64@ -mcpu=powerpc64 $(AM_CFLAGS_BASE)
+AM_CCASFLAGS_PPC64_AIX5 = $(AM_CPPFLAGS_PPC64_AIX5) \
+			    @FLAG_MAIX64@ -mcpu=powerpc64 -g
+
+
+# Flags for the primary target.  These must be used to build the
+# regtests and performance tests.  In fact, these must be used to
+# build anything which is built only once on a dual-arch build.
+#
+AM_FLAG_M3264_PRI = $(AM_FLAG_M3264_@VGCONF_PLATFORM_PRI_CAPS@)
+AM_CPPFLAGS_PRI = $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
+AM_CFLAGS_PRI = $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
+AM_CCASFLAGS_PRI = $(AM_CCASFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
+@VGCONF_HAVE_PLATFORM_SEC_CAPS_FALSE@AM_FLAG_M3264_SEC = 
+@VGCONF_HAVE_PLATFORM_SEC_CAPS_TRUE@AM_FLAG_M3264_SEC = $(AM_FLAG_M3264_@VGCONF_PLATFORM_SEC_CAPS@)
+
+# Baseline link flags for making dynamic shared objects.
+#
+PRELOAD_LDFLAGS_COMMON_LINUX = -nodefaultlibs -shared -Wl,-z,interpose,-z,initfirst
+PRELOAD_LDFLAGS_COMMON_AIX5 = -nodefaultlibs -shared -Wl,-G -Wl,-bnogc
+PRELOAD_LDFLAGS_X86_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M32@
+PRELOAD_LDFLAGS_AMD64_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M64@
+PRELOAD_LDFLAGS_PPC32_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M32@
+PRELOAD_LDFLAGS_PPC64_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M64@
+PRELOAD_LDFLAGS_PPC32_AIX5 = $(PRELOAD_LDFLAGS_COMMON_AIX5)  @FLAG_MAIX32@
+PRELOAD_LDFLAGS_PPC64_AIX5 = $(PRELOAD_LDFLAGS_COMMON_AIX5)  @FLAG_MAIX64@
+LIBREPLACEMALLOC_X86_LINUX = \
+	$(top_builddir)/coregrind/libreplacemalloc_toolpreload-x86-linux.a
+
+LIBREPLACEMALLOC_AMD64_LINUX = \
+	$(top_builddir)/coregrind/libreplacemalloc_toolpreload-amd64-linux.a
+
+LIBREPLACEMALLOC_PPC32_LINUX = \
+	$(top_builddir)/coregrind/libreplacemalloc_toolpreload-ppc32-linux.a
+
+LIBREPLACEMALLOC_PPC64_LINUX = \
+	$(top_builddir)/coregrind/libreplacemalloc_toolpreload-ppc64-linux.a
+
+LIBREPLACEMALLOC_PPC32_AIX5 = \
+	$(top_builddir)/coregrind/libreplacemalloc_toolpreload-ppc32-aix5.a
+
+LIBREPLACEMALLOC_PPC64_AIX5 = \
+	$(top_builddir)/coregrind/libreplacemalloc_toolpreload-ppc64-aix5.a
+
+COREGRIND_LIBS_X86_LINUX = \
+	$(top_builddir)/coregrind/libcoregrind-x86-linux.a \
+	@VEX_DIR@/libvex-x86-linux.a
+
+COREGRIND_LIBS_AMD64_LINUX = \
+	$(top_builddir)/coregrind/libcoregrind-amd64-linux.a \
+	@VEX_DIR@/libvex-amd64-linux.a
+
+COREGRIND_LIBS_PPC32_LINUX = \
+	$(top_builddir)/coregrind/libcoregrind-ppc32-linux.a \
+	@VEX_DIR@/libvex-ppc32-linux.a
+
+COREGRIND_LIBS_PPC64_LINUX = \
+	$(top_builddir)/coregrind/libcoregrind-ppc64-linux.a \
+	@VEX_DIR@/libvex-ppc64-linux.a
+
+COREGRIND_LIBS_PPC32_AIX5 = \
+	$(top_builddir)/coregrind/libcoregrind-ppc32-aix5.a \
+	@VEX_DIR@/libvex-ppc32-aix5.a
+
+COREGRIND_LIBS_PPC64_AIX5 = \
+	$(top_builddir)/coregrind/libcoregrind-ppc64-aix5.a \
+	@VEX_DIR@/libvex-ppc64-aix5.a
+
+TOOL_LDADD_COMMON = -lgcc
+TOOL_LDFLAGS_COMMON_LINUX = -static \
+	-Wl,-defsym,valt_load_address=@VALT_LOAD_ADDRESS@ \
+	-nodefaultlibs -nostartfiles -u _start
+
+TOOL_LDFLAGS_COMMON_AIX5 = -static -Wl,-e_start_valgrind
+TOOL_LDADD_X86_LINUX = $(COREGRIND_LIBS_X86_LINUX) $(TOOL_LDADD_COMMON)
+TOOL_LDFLAGS_X86_LINUX = \
+	$(TOOL_LDFLAGS_COMMON_LINUX) @FLAG_M32@ \
+	-Wl,-T,$(top_builddir)/valt_load_address_x86_linux.lds
+
+TOOL_LDADD_AMD64_LINUX = $(COREGRIND_LIBS_AMD64_LINUX) $(TOOL_LDADD_COMMON)
+TOOL_LDFLAGS_AMD64_LINUX = \
+	$(TOOL_LDFLAGS_COMMON_LINUX) @FLAG_M64@ \
+	-Wl,-T,$(top_builddir)/valt_load_address_amd64_linux.lds
+
+TOOL_LDADD_PPC32_LINUX = $(COREGRIND_LIBS_PPC32_LINUX) $(TOOL_LDADD_COMMON)
+TOOL_LDFLAGS_PPC32_LINUX = \
+	$(TOOL_LDFLAGS_COMMON_LINUX) @FLAG_M32@ \
+	-Wl,-T,$(top_builddir)/valt_load_address_ppc32_linux.lds
+
+TOOL_LDADD_PPC64_LINUX = $(COREGRIND_LIBS_PPC64_LINUX) $(TOOL_LDADD_COMMON)
+TOOL_LDFLAGS_PPC64_LINUX = \
+	$(TOOL_LDFLAGS_COMMON_LINUX) @FLAG_M64@ \
+	-Wl,-T,$(top_builddir)/valt_load_address_ppc64_linux.lds
+
+TOOL_LDADD_PPC32_AIX5 = $(COREGRIND_LIBS_PPC32_AIX5) $(TOOL_LDADD_COMMON)
+TOOL_LDFLAGS_PPC32_AIX5 = \
+	$(TOOL_LDFLAGS_COMMON_AIX5) @FLAG_MAIX32@
+
+TOOL_LDADD_PPC64_AIX5 = $(COREGRIND_LIBS_PPC64_AIX5) $(TOOL_LDADD_COMMON)
+TOOL_LDFLAGS_PPC64_AIX5 = \
+	$(TOOL_LDFLAGS_COMMON_AIX5) @FLAG_MAIX64@ -Wl,-bbigtoc
+
+LIBREPLACEMALLOC_LDFLAGS_X86_LINUX = \
+	-Wl,--whole-archive \
+	$(LIBREPLACEMALLOC_X86_LINUX) \
+	-Wl,--no-whole-archive
+
+LIBREPLACEMALLOC_LDFLAGS_AMD64_LINUX = \
+	-Wl,--whole-archive \
+	$(LIBREPLACEMALLOC_AMD64_LINUX) \
+	-Wl,--no-whole-archive
+
+LIBREPLACEMALLOC_LDFLAGS_PPC32_LINUX = \
+	-Wl,--whole-archive \
+	$(LIBREPLACEMALLOC_PPC32_LINUX) \
+	-Wl,--no-whole-archive
+
+LIBREPLACEMALLOC_LDFLAGS_PPC64_LINUX = \
+	-Wl,--whole-archive \
+	$(LIBREPLACEMALLOC_PPC64_LINUX) \
+	-Wl,--no-whole-archive
+
+LIBREPLACEMALLOC_LDFLAGS_PPC32_AIX5 = \
+	$(LIBREPLACEMALLOC_PPC32_AIX5)
+
+LIBREPLACEMALLOC_LDFLAGS_PPC64_AIX5 = \
+	$(LIBREPLACEMALLOC_PPC64_AIX5)
+
+bin_SCRIPTS = cg_annotate
+noinst_HEADERS = cg_arch.h cg_sim.c cg_branchpred.c
+cg_merge_SOURCES = cg_merge.c
+cg_merge_CPPFLAGS = $(AM_CPPFLAGS_PRI)
+cg_merge_CFLAGS = $(AM_CFLAGS_PRI)
+cg_merge_CCASFLAGS = $(AM_CCASFLAGS_PRI)
+cg_merge_LDFLAGS = $(AM_CFLAGS_PRI)
+CACHEGRIND_SOURCES_COMMON = cg_main.c
+CACHEGRIND_SOURCES_X86 = cg-x86.c
+CACHEGRIND_SOURCES_AMD64 = cg-amd64.c
+CACHEGRIND_SOURCES_PPC32 = cg-ppc32.c
+CACHEGRIND_SOURCES_PPC64 = cg-ppc64.c
+cachegrind_x86_linux_SOURCES = $(CACHEGRIND_SOURCES_COMMON) $(CACHEGRIND_SOURCES_X86)
+cachegrind_x86_linux_CPPFLAGS = $(AM_CPPFLAGS_X86_LINUX)
+cachegrind_x86_linux_CFLAGS = $(AM_CFLAGS_X86_LINUX)
+cachegrind_x86_linux_DEPENDENCIES = $(COREGRIND_LIBS_X86_LINUX)
+cachegrind_x86_linux_LDADD = $(TOOL_LDADD_X86_LINUX)
+cachegrind_x86_linux_LDFLAGS = $(TOOL_LDFLAGS_X86_LINUX)
+cachegrind_amd64_linux_SOURCES = $(CACHEGRIND_SOURCES_COMMON) $(CACHEGRIND_SOURCES_AMD64)
+cachegrind_amd64_linux_CPPFLAGS = $(AM_CPPFLAGS_AMD64_LINUX)
+cachegrind_amd64_linux_CFLAGS = $(AM_CFLAGS_AMD64_LINUX)
+cachegrind_amd64_linux_DEPENDENCIES = $(COREGRIND_LIBS_AMD64_LINUX)
+cachegrind_amd64_linux_LDADD = $(TOOL_LDADD_AMD64_LINUX)
+cachegrind_amd64_linux_LDFLAGS = $(TOOL_LDFLAGS_AMD64_LINUX)
+cachegrind_ppc32_linux_SOURCES = $(CACHEGRIND_SOURCES_COMMON) $(CACHEGRIND_SOURCES_PPC32)
+cachegrind_ppc32_linux_CPPFLAGS = $(AM_CPPFLAGS_PPC32_LINUX)
+cachegrind_ppc32_linux_CFLAGS = $(AM_CFLAGS_PPC32_LINUX)
+cachegrind_ppc32_linux_DEPENDENCIES = $(COREGRIND_LIBS_PPC32_LINUX)
+cachegrind_ppc32_linux_LDADD = $(TOOL_LDADD_PPC32_LINUX)
+cachegrind_ppc32_linux_LDFLAGS = $(TOOL_LDFLAGS_PPC32_LINUX)
+cachegrind_ppc64_linux_SOURCES = $(CACHEGRIND_SOURCES_COMMON) $(CACHEGRIND_SOURCES_PPC64)
+cachegrind_ppc64_linux_CPPFLAGS = $(AM_CPPFLAGS_PPC64_LINUX)
+cachegrind_ppc64_linux_CFLAGS = $(AM_CFLAGS_PPC64_LINUX)
+cachegrind_ppc64_linux_DEPENDENCIES = $(COREGRIND_LIBS_PPC64_LINUX)
+cachegrind_ppc64_linux_LDADD = $(TOOL_LDADD_PPC64_LINUX)
+cachegrind_ppc64_linux_LDFLAGS = $(TOOL_LDFLAGS_PPC64_LINUX)
+cachegrind_ppc32_aix5_SOURCES = $(CACHEGRIND_SOURCES_COMMON) $(CACHEGRIND_SOURCES_PPC32)
+cachegrind_ppc32_aix5_CPPFLAGS = $(AM_CPPFLAGS_PPC32_AIX5)
+cachegrind_ppc32_aix5_CFLAGS = $(AM_CFLAGS_PPC32_AIX5)
+cachegrind_ppc32_aix5_DEPENDENCIES = $(COREGRIND_LIBS_PPC32_AIX5)
+cachegrind_ppc32_aix5_LDADD = $(TOOL_LDADD_PPC32_AIX5)
+cachegrind_ppc32_aix5_LDFLAGS = $(TOOL_LDFLAGS_PPC32_AIX5)
+cachegrind_ppc64_aix5_SOURCES = $(CACHEGRIND_SOURCES_COMMON) $(CACHEGRIND_SOURCES_PPC64)
+cachegrind_ppc64_aix5_CPPFLAGS = $(AM_CPPFLAGS_PPC64_AIX5)
+cachegrind_ppc64_aix5_CFLAGS = $(AM_CFLAGS_PPC64_AIX5)
+cachegrind_ppc64_aix5_DEPENDENCIES = $(COREGRIND_LIBS_PPC64_AIX5)
+cachegrind_ppc64_aix5_LDADD = $(TOOL_LDADD_PPC64_AIX5)
+cachegrind_ppc64_aix5_LDFLAGS = $(TOOL_LDFLAGS_PPC64_AIX5)
+all: all-recursive
+
+.SUFFIXES:
+.SUFFIXES: .c .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/Makefile.tool.am $(top_srcdir)/Makefile.all.am $(top_srcdir)/Makefile.flags.am $(top_srcdir)/Makefile.core-tool.am $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
+		&& exit 0; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign  cachegrind/Makefile'; \
+	cd $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign  cachegrind/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+cg_annotate: $(top_builddir)/config.status $(srcdir)/cg_annotate.in
+	cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
+install-binPROGRAMS: $(bin_PROGRAMS)
+	@$(NORMAL_INSTALL)
+	test -z "$(bindir)" || $(MKDIR_P) "$(DESTDIR)$(bindir)"
+	@list='$(bin_PROGRAMS)'; for p in $$list; do \
+	  p1=`echo $$p|sed 's/$(EXEEXT)$$//'`; \
+	  if test -f $$p \
+	  ; then \
+	    f=`echo "$$p1" | sed 's,^.*/,,;$(transform);s/$$/$(EXEEXT)/'`; \
+	   echo " $(INSTALL_PROGRAM_ENV) $(binPROGRAMS_INSTALL) '$$p' '$(DESTDIR)$(bindir)/$$f'"; \
+	   $(INSTALL_PROGRAM_ENV) $(binPROGRAMS_INSTALL) "$$p" "$(DESTDIR)$(bindir)/$$f" || exit 1; \
+	  else :; fi; \
+	done
+
+uninstall-binPROGRAMS:
+	@$(NORMAL_UNINSTALL)
+	@list='$(bin_PROGRAMS)'; for p in $$list; do \
+	  f=`echo "$$p" | sed 's,^.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/'`; \
+	  echo " rm -f '$(DESTDIR)$(bindir)/$$f'"; \
+	  rm -f "$(DESTDIR)$(bindir)/$$f"; \
+	done
+
+clean-binPROGRAMS:
+	-test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS)
+
+clean-noinstPROGRAMS:
+	-test -z "$(noinst_PROGRAMS)" || rm -f $(noinst_PROGRAMS)
+cachegrind-amd64-linux$(EXEEXT): $(cachegrind_amd64_linux_OBJECTS) $(cachegrind_amd64_linux_DEPENDENCIES) 
+	@rm -f cachegrind-amd64-linux$(EXEEXT)
+	$(cachegrind_amd64_linux_LINK) $(cachegrind_amd64_linux_OBJECTS) $(cachegrind_amd64_linux_LDADD) $(LIBS)
+cachegrind-ppc32-aix5$(EXEEXT): $(cachegrind_ppc32_aix5_OBJECTS) $(cachegrind_ppc32_aix5_DEPENDENCIES) 
+	@rm -f cachegrind-ppc32-aix5$(EXEEXT)
+	$(cachegrind_ppc32_aix5_LINK) $(cachegrind_ppc32_aix5_OBJECTS) $(cachegrind_ppc32_aix5_LDADD) $(LIBS)
+cachegrind-ppc32-linux$(EXEEXT): $(cachegrind_ppc32_linux_OBJECTS) $(cachegrind_ppc32_linux_DEPENDENCIES) 
+	@rm -f cachegrind-ppc32-linux$(EXEEXT)
+	$(cachegrind_ppc32_linux_LINK) $(cachegrind_ppc32_linux_OBJECTS) $(cachegrind_ppc32_linux_LDADD) $(LIBS)
+cachegrind-ppc64-aix5$(EXEEXT): $(cachegrind_ppc64_aix5_OBJECTS) $(cachegrind_ppc64_aix5_DEPENDENCIES) 
+	@rm -f cachegrind-ppc64-aix5$(EXEEXT)
+	$(cachegrind_ppc64_aix5_LINK) $(cachegrind_ppc64_aix5_OBJECTS) $(cachegrind_ppc64_aix5_LDADD) $(LIBS)
+cachegrind-ppc64-linux$(EXEEXT): $(cachegrind_ppc64_linux_OBJECTS) $(cachegrind_ppc64_linux_DEPENDENCIES) 
+	@rm -f cachegrind-ppc64-linux$(EXEEXT)
+	$(cachegrind_ppc64_linux_LINK) $(cachegrind_ppc64_linux_OBJECTS) $(cachegrind_ppc64_linux_LDADD) $(LIBS)
+cachegrind-x86-linux$(EXEEXT): $(cachegrind_x86_linux_OBJECTS) $(cachegrind_x86_linux_DEPENDENCIES) 
+	@rm -f cachegrind-x86-linux$(EXEEXT)
+	$(cachegrind_x86_linux_LINK) $(cachegrind_x86_linux_OBJECTS) $(cachegrind_x86_linux_LDADD) $(LIBS)
+cg_merge$(EXEEXT): $(cg_merge_OBJECTS) $(cg_merge_DEPENDENCIES) 
+	@rm -f cg_merge$(EXEEXT)
+	$(cg_merge_LINK) $(cg_merge_OBJECTS) $(cg_merge_LDADD) $(LIBS)
+install-binSCRIPTS: $(bin_SCRIPTS)
+	@$(NORMAL_INSTALL)
+	test -z "$(bindir)" || $(MKDIR_P) "$(DESTDIR)$(bindir)"
+	@list='$(bin_SCRIPTS)'; for p in $$list; do \
+	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+	  if test -f $$d$$p; then \
+	    f=`echo "$$p" | sed 's|^.*/||;$(transform)'`; \
+	    echo " $(binSCRIPT_INSTALL) '$$d$$p' '$(DESTDIR)$(bindir)/$$f'"; \
+	    $(binSCRIPT_INSTALL) "$$d$$p" "$(DESTDIR)$(bindir)/$$f"; \
+	  else :; fi; \
+	done
+
+uninstall-binSCRIPTS:
+	@$(NORMAL_UNINSTALL)
+	@list='$(bin_SCRIPTS)'; for p in $$list; do \
+	  f=`echo "$$p" | sed 's|^.*/||;$(transform)'`; \
+	  echo " rm -f '$(DESTDIR)$(bindir)/$$f'"; \
+	  rm -f "$(DESTDIR)$(bindir)/$$f"; \
+	done
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cachegrind_amd64_linux-cg-amd64.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cachegrind_amd64_linux-cg_main.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cachegrind_ppc32_aix5-cg-ppc32.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cachegrind_ppc32_aix5-cg_main.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cachegrind_ppc32_linux-cg-ppc32.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cachegrind_ppc32_linux-cg_main.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cachegrind_ppc64_aix5-cg-ppc64.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cachegrind_ppc64_aix5-cg_main.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cachegrind_ppc64_linux-cg-ppc64.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cachegrind_ppc64_linux-cg_main.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cachegrind_x86_linux-cg-x86.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cachegrind_x86_linux-cg_main.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cg_merge-cg_merge.Po@am__quote@
+
+.c.o:
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(COMPILE) -c $<
+
+.c.obj:
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+cachegrind_amd64_linux-cg_main.o: cg_main.c
+@am__fastdepCC_TRUE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_amd64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_amd64_linux_CFLAGS) $(CFLAGS) -MT cachegrind_amd64_linux-cg_main.o -MD -MP -MF $(DEPDIR)/cachegrind_amd64_linux-cg_main.Tpo -c -o cachegrind_amd64_linux-cg_main.o `test -f 'cg_main.c' || echo '$(srcdir)/'`cg_main.c
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/cachegrind_amd64_linux-cg_main.Tpo $(DEPDIR)/cachegrind_amd64_linux-cg_main.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='cg_main.c' object='cachegrind_amd64_linux-cg_main.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_amd64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_amd64_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_amd64_linux-cg_main.o `test -f 'cg_main.c' || echo '$(srcdir)/'`cg_main.c
+
+cachegrind_amd64_linux-cg_main.obj: cg_main.c
+@am__fastdepCC_TRUE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_amd64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_amd64_linux_CFLAGS) $(CFLAGS) -MT cachegrind_amd64_linux-cg_main.obj -MD -MP -MF $(DEPDIR)/cachegrind_amd64_linux-cg_main.Tpo -c -o cachegrind_amd64_linux-cg_main.obj `if test -f 'cg_main.c'; then $(CYGPATH_W) 'cg_main.c'; else $(CYGPATH_W) '$(srcdir)/cg_main.c'; fi`
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/cachegrind_amd64_linux-cg_main.Tpo $(DEPDIR)/cachegrind_amd64_linux-cg_main.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='cg_main.c' object='cachegrind_amd64_linux-cg_main.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_amd64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_amd64_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_amd64_linux-cg_main.obj `if test -f 'cg_main.c'; then $(CYGPATH_W) 'cg_main.c'; else $(CYGPATH_W) '$(srcdir)/cg_main.c'; fi`
+
+cachegrind_amd64_linux-cg-amd64.o: cg-amd64.c
+@am__fastdepCC_TRUE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_amd64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_amd64_linux_CFLAGS) $(CFLAGS) -MT cachegrind_amd64_linux-cg-amd64.o -MD -MP -MF $(DEPDIR)/cachegrind_amd64_linux-cg-amd64.Tpo -c -o cachegrind_amd64_linux-cg-amd64.o `test -f 'cg-amd64.c' || echo '$(srcdir)/'`cg-amd64.c
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/cachegrind_amd64_linux-cg-amd64.Tpo $(DEPDIR)/cachegrind_amd64_linux-cg-amd64.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='cg-amd64.c' object='cachegrind_amd64_linux-cg-amd64.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_amd64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_amd64_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_amd64_linux-cg-amd64.o `test -f 'cg-amd64.c' || echo '$(srcdir)/'`cg-amd64.c
+
+cachegrind_amd64_linux-cg-amd64.obj: cg-amd64.c
+@am__fastdepCC_TRUE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_amd64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_amd64_linux_CFLAGS) $(CFLAGS) -MT cachegrind_amd64_linux-cg-amd64.obj -MD -MP -MF $(DEPDIR)/cachegrind_amd64_linux-cg-amd64.Tpo -c -o cachegrind_amd64_linux-cg-amd64.obj `if test -f 'cg-amd64.c'; then $(CYGPATH_W) 'cg-amd64.c'; else $(CYGPATH_W) '$(srcdir)/cg-amd64.c'; fi`
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/cachegrind_amd64_linux-cg-amd64.Tpo $(DEPDIR)/cachegrind_amd64_linux-cg-amd64.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='cg-amd64.c' object='cachegrind_amd64_linux-cg-amd64.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_amd64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_amd64_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_amd64_linux-cg-amd64.obj `if test -f 'cg-amd64.c'; then $(CYGPATH_W) 'cg-amd64.c'; else $(CYGPATH_W) '$(srcdir)/cg-amd64.c'; fi`
+
+cachegrind_ppc32_aix5-cg_main.o: cg_main.c
+@am__fastdepCC_TRUE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_aix5_CFLAGS) $(CFLAGS) -MT cachegrind_ppc32_aix5-cg_main.o -MD -MP -MF $(DEPDIR)/cachegrind_ppc32_aix5-cg_main.Tpo -c -o cachegrind_ppc32_aix5-cg_main.o `test -f 'cg_main.c' || echo '$(srcdir)/'`cg_main.c
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/cachegrind_ppc32_aix5-cg_main.Tpo $(DEPDIR)/cachegrind_ppc32_aix5-cg_main.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='cg_main.c' object='cachegrind_ppc32_aix5-cg_main.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_aix5_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc32_aix5-cg_main.o `test -f 'cg_main.c' || echo '$(srcdir)/'`cg_main.c
+
+cachegrind_ppc32_aix5-cg_main.obj: cg_main.c
+@am__fastdepCC_TRUE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_aix5_CFLAGS) $(CFLAGS) -MT cachegrind_ppc32_aix5-cg_main.obj -MD -MP -MF $(DEPDIR)/cachegrind_ppc32_aix5-cg_main.Tpo -c -o cachegrind_ppc32_aix5-cg_main.obj `if test -f 'cg_main.c'; then $(CYGPATH_W) 'cg_main.c'; else $(CYGPATH_W) '$(srcdir)/cg_main.c'; fi`
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/cachegrind_ppc32_aix5-cg_main.Tpo $(DEPDIR)/cachegrind_ppc32_aix5-cg_main.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='cg_main.c' object='cachegrind_ppc32_aix5-cg_main.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_aix5_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc32_aix5-cg_main.obj `if test -f 'cg_main.c'; then $(CYGPATH_W) 'cg_main.c'; else $(CYGPATH_W) '$(srcdir)/cg_main.c'; fi`
+
+cachegrind_ppc32_aix5-cg-ppc32.o: cg-ppc32.c
+@am__fastdepCC_TRUE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_aix5_CFLAGS) $(CFLAGS) -MT cachegrind_ppc32_aix5-cg-ppc32.o -MD -MP -MF $(DEPDIR)/cachegrind_ppc32_aix5-cg-ppc32.Tpo -c -o cachegrind_ppc32_aix5-cg-ppc32.o `test -f 'cg-ppc32.c' || echo '$(srcdir)/'`cg-ppc32.c
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/cachegrind_ppc32_aix5-cg-ppc32.Tpo $(DEPDIR)/cachegrind_ppc32_aix5-cg-ppc32.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='cg-ppc32.c' object='cachegrind_ppc32_aix5-cg-ppc32.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_aix5_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc32_aix5-cg-ppc32.o `test -f 'cg-ppc32.c' || echo '$(srcdir)/'`cg-ppc32.c
+
+cachegrind_ppc32_aix5-cg-ppc32.obj: cg-ppc32.c
+@am__fastdepCC_TRUE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_aix5_CFLAGS) $(CFLAGS) -MT cachegrind_ppc32_aix5-cg-ppc32.obj -MD -MP -MF $(DEPDIR)/cachegrind_ppc32_aix5-cg-ppc32.Tpo -c -o cachegrind_ppc32_aix5-cg-ppc32.obj `if test -f 'cg-ppc32.c'; then $(CYGPATH_W) 'cg-ppc32.c'; else $(CYGPATH_W) '$(srcdir)/cg-ppc32.c'; fi`
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/cachegrind_ppc32_aix5-cg-ppc32.Tpo $(DEPDIR)/cachegrind_ppc32_aix5-cg-ppc32.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='cg-ppc32.c' object='cachegrind_ppc32_aix5-cg-ppc32.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_aix5_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc32_aix5-cg-ppc32.obj `if test -f 'cg-ppc32.c'; then $(CYGPATH_W) 'cg-ppc32.c'; else $(CYGPATH_W) '$(srcdir)/cg-ppc32.c'; fi`
+
+cachegrind_ppc32_linux-cg_main.o: cg_main.c
+@am__fastdepCC_TRUE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_linux_CFLAGS) $(CFLAGS) -MT cachegrind_ppc32_linux-cg_main.o -MD -MP -MF $(DEPDIR)/cachegrind_ppc32_linux-cg_main.Tpo -c -o cachegrind_ppc32_linux-cg_main.o `test -f 'cg_main.c' || echo '$(srcdir)/'`cg_main.c
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/cachegrind_ppc32_linux-cg_main.Tpo $(DEPDIR)/cachegrind_ppc32_linux-cg_main.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='cg_main.c' object='cachegrind_ppc32_linux-cg_main.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc32_linux-cg_main.o `test -f 'cg_main.c' || echo '$(srcdir)/'`cg_main.c
+
+cachegrind_ppc32_linux-cg_main.obj: cg_main.c
+@am__fastdepCC_TRUE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_linux_CFLAGS) $(CFLAGS) -MT cachegrind_ppc32_linux-cg_main.obj -MD -MP -MF $(DEPDIR)/cachegrind_ppc32_linux-cg_main.Tpo -c -o cachegrind_ppc32_linux-cg_main.obj `if test -f 'cg_main.c'; then $(CYGPATH_W) 'cg_main.c'; else $(CYGPATH_W) '$(srcdir)/cg_main.c'; fi`
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/cachegrind_ppc32_linux-cg_main.Tpo $(DEPDIR)/cachegrind_ppc32_linux-cg_main.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='cg_main.c' object='cachegrind_ppc32_linux-cg_main.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc32_linux-cg_main.obj `if test -f 'cg_main.c'; then $(CYGPATH_W) 'cg_main.c'; else $(CYGPATH_W) '$(srcdir)/cg_main.c'; fi`
+
+cachegrind_ppc32_linux-cg-ppc32.o: cg-ppc32.c
+@am__fastdepCC_TRUE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_linux_CFLAGS) $(CFLAGS) -MT cachegrind_ppc32_linux-cg-ppc32.o -MD -MP -MF $(DEPDIR)/cachegrind_ppc32_linux-cg-ppc32.Tpo -c -o cachegrind_ppc32_linux-cg-ppc32.o `test -f 'cg-ppc32.c' || echo '$(srcdir)/'`cg-ppc32.c
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/cachegrind_ppc32_linux-cg-ppc32.Tpo $(DEPDIR)/cachegrind_ppc32_linux-cg-ppc32.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='cg-ppc32.c' object='cachegrind_ppc32_linux-cg-ppc32.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc32_linux-cg-ppc32.o `test -f 'cg-ppc32.c' || echo '$(srcdir)/'`cg-ppc32.c
+
+cachegrind_ppc32_linux-cg-ppc32.obj: cg-ppc32.c
+@am__fastdepCC_TRUE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_linux_CFLAGS) $(CFLAGS) -MT cachegrind_ppc32_linux-cg-ppc32.obj -MD -MP -MF $(DEPDIR)/cachegrind_ppc32_linux-cg-ppc32.Tpo -c -o cachegrind_ppc32_linux-cg-ppc32.obj `if test -f 'cg-ppc32.c'; then $(CYGPATH_W) 'cg-ppc32.c'; else $(CYGPATH_W) '$(srcdir)/cg-ppc32.c'; fi`
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/cachegrind_ppc32_linux-cg-ppc32.Tpo $(DEPDIR)/cachegrind_ppc32_linux-cg-ppc32.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='cg-ppc32.c' object='cachegrind_ppc32_linux-cg-ppc32.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc32_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc32_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc32_linux-cg-ppc32.obj `if test -f 'cg-ppc32.c'; then $(CYGPATH_W) 'cg-ppc32.c'; else $(CYGPATH_W) '$(srcdir)/cg-ppc32.c'; fi`
+
+cachegrind_ppc64_aix5-cg_main.o: cg_main.c
+@am__fastdepCC_TRUE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_aix5_CFLAGS) $(CFLAGS) -MT cachegrind_ppc64_aix5-cg_main.o -MD -MP -MF $(DEPDIR)/cachegrind_ppc64_aix5-cg_main.Tpo -c -o cachegrind_ppc64_aix5-cg_main.o `test -f 'cg_main.c' || echo '$(srcdir)/'`cg_main.c
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/cachegrind_ppc64_aix5-cg_main.Tpo $(DEPDIR)/cachegrind_ppc64_aix5-cg_main.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='cg_main.c' object='cachegrind_ppc64_aix5-cg_main.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_aix5_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc64_aix5-cg_main.o `test -f 'cg_main.c' || echo '$(srcdir)/'`cg_main.c
+
+cachegrind_ppc64_aix5-cg_main.obj: cg_main.c
+@am__fastdepCC_TRUE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_aix5_CFLAGS) $(CFLAGS) -MT cachegrind_ppc64_aix5-cg_main.obj -MD -MP -MF $(DEPDIR)/cachegrind_ppc64_aix5-cg_main.Tpo -c -o cachegrind_ppc64_aix5-cg_main.obj `if test -f 'cg_main.c'; then $(CYGPATH_W) 'cg_main.c'; else $(CYGPATH_W) '$(srcdir)/cg_main.c'; fi`
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/cachegrind_ppc64_aix5-cg_main.Tpo $(DEPDIR)/cachegrind_ppc64_aix5-cg_main.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='cg_main.c' object='cachegrind_ppc64_aix5-cg_main.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_aix5_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc64_aix5-cg_main.obj `if test -f 'cg_main.c'; then $(CYGPATH_W) 'cg_main.c'; else $(CYGPATH_W) '$(srcdir)/cg_main.c'; fi`
+
+cachegrind_ppc64_aix5-cg-ppc64.o: cg-ppc64.c
+@am__fastdepCC_TRUE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_aix5_CFLAGS) $(CFLAGS) -MT cachegrind_ppc64_aix5-cg-ppc64.o -MD -MP -MF $(DEPDIR)/cachegrind_ppc64_aix5-cg-ppc64.Tpo -c -o cachegrind_ppc64_aix5-cg-ppc64.o `test -f 'cg-ppc64.c' || echo '$(srcdir)/'`cg-ppc64.c
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/cachegrind_ppc64_aix5-cg-ppc64.Tpo $(DEPDIR)/cachegrind_ppc64_aix5-cg-ppc64.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='cg-ppc64.c' object='cachegrind_ppc64_aix5-cg-ppc64.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_aix5_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc64_aix5-cg-ppc64.o `test -f 'cg-ppc64.c' || echo '$(srcdir)/'`cg-ppc64.c
+
+cachegrind_ppc64_aix5-cg-ppc64.obj: cg-ppc64.c
+@am__fastdepCC_TRUE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_aix5_CFLAGS) $(CFLAGS) -MT cachegrind_ppc64_aix5-cg-ppc64.obj -MD -MP -MF $(DEPDIR)/cachegrind_ppc64_aix5-cg-ppc64.Tpo -c -o cachegrind_ppc64_aix5-cg-ppc64.obj `if test -f 'cg-ppc64.c'; then $(CYGPATH_W) 'cg-ppc64.c'; else $(CYGPATH_W) '$(srcdir)/cg-ppc64.c'; fi`
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/cachegrind_ppc64_aix5-cg-ppc64.Tpo $(DEPDIR)/cachegrind_ppc64_aix5-cg-ppc64.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='cg-ppc64.c' object='cachegrind_ppc64_aix5-cg-ppc64.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_aix5_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_aix5_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc64_aix5-cg-ppc64.obj `if test -f 'cg-ppc64.c'; then $(CYGPATH_W) 'cg-ppc64.c'; else $(CYGPATH_W) '$(srcdir)/cg-ppc64.c'; fi`
+
+cachegrind_ppc64_linux-cg_main.o: cg_main.c
+@am__fastdepCC_TRUE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_linux_CFLAGS) $(CFLAGS) -MT cachegrind_ppc64_linux-cg_main.o -MD -MP -MF $(DEPDIR)/cachegrind_ppc64_linux-cg_main.Tpo -c -o cachegrind_ppc64_linux-cg_main.o `test -f 'cg_main.c' || echo '$(srcdir)/'`cg_main.c
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/cachegrind_ppc64_linux-cg_main.Tpo $(DEPDIR)/cachegrind_ppc64_linux-cg_main.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='cg_main.c' object='cachegrind_ppc64_linux-cg_main.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc64_linux-cg_main.o `test -f 'cg_main.c' || echo '$(srcdir)/'`cg_main.c
+
+cachegrind_ppc64_linux-cg_main.obj: cg_main.c
+@am__fastdepCC_TRUE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_linux_CFLAGS) $(CFLAGS) -MT cachegrind_ppc64_linux-cg_main.obj -MD -MP -MF $(DEPDIR)/cachegrind_ppc64_linux-cg_main.Tpo -c -o cachegrind_ppc64_linux-cg_main.obj `if test -f 'cg_main.c'; then $(CYGPATH_W) 'cg_main.c'; else $(CYGPATH_W) '$(srcdir)/cg_main.c'; fi`
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/cachegrind_ppc64_linux-cg_main.Tpo $(DEPDIR)/cachegrind_ppc64_linux-cg_main.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='cg_main.c' object='cachegrind_ppc64_linux-cg_main.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc64_linux-cg_main.obj `if test -f 'cg_main.c'; then $(CYGPATH_W) 'cg_main.c'; else $(CYGPATH_W) '$(srcdir)/cg_main.c'; fi`
+
+cachegrind_ppc64_linux-cg-ppc64.o: cg-ppc64.c
+@am__fastdepCC_TRUE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_linux_CFLAGS) $(CFLAGS) -MT cachegrind_ppc64_linux-cg-ppc64.o -MD -MP -MF $(DEPDIR)/cachegrind_ppc64_linux-cg-ppc64.Tpo -c -o cachegrind_ppc64_linux-cg-ppc64.o `test -f 'cg-ppc64.c' || echo '$(srcdir)/'`cg-ppc64.c
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/cachegrind_ppc64_linux-cg-ppc64.Tpo $(DEPDIR)/cachegrind_ppc64_linux-cg-ppc64.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='cg-ppc64.c' object='cachegrind_ppc64_linux-cg-ppc64.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc64_linux-cg-ppc64.o `test -f 'cg-ppc64.c' || echo '$(srcdir)/'`cg-ppc64.c
+
+cachegrind_ppc64_linux-cg-ppc64.obj: cg-ppc64.c
+@am__fastdepCC_TRUE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_linux_CFLAGS) $(CFLAGS) -MT cachegrind_ppc64_linux-cg-ppc64.obj -MD -MP -MF $(DEPDIR)/cachegrind_ppc64_linux-cg-ppc64.Tpo -c -o cachegrind_ppc64_linux-cg-ppc64.obj `if test -f 'cg-ppc64.c'; then $(CYGPATH_W) 'cg-ppc64.c'; else $(CYGPATH_W) '$(srcdir)/cg-ppc64.c'; fi`
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/cachegrind_ppc64_linux-cg-ppc64.Tpo $(DEPDIR)/cachegrind_ppc64_linux-cg-ppc64.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='cg-ppc64.c' object='cachegrind_ppc64_linux-cg-ppc64.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_ppc64_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_ppc64_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_ppc64_linux-cg-ppc64.obj `if test -f 'cg-ppc64.c'; then $(CYGPATH_W) 'cg-ppc64.c'; else $(CYGPATH_W) '$(srcdir)/cg-ppc64.c'; fi`
+
+cachegrind_x86_linux-cg_main.o: cg_main.c
+@am__fastdepCC_TRUE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_x86_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_x86_linux_CFLAGS) $(CFLAGS) -MT cachegrind_x86_linux-cg_main.o -MD -MP -MF $(DEPDIR)/cachegrind_x86_linux-cg_main.Tpo -c -o cachegrind_x86_linux-cg_main.o `test -f 'cg_main.c' || echo '$(srcdir)/'`cg_main.c
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/cachegrind_x86_linux-cg_main.Tpo $(DEPDIR)/cachegrind_x86_linux-cg_main.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='cg_main.c' object='cachegrind_x86_linux-cg_main.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_x86_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_x86_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_x86_linux-cg_main.o `test -f 'cg_main.c' || echo '$(srcdir)/'`cg_main.c
+
+cachegrind_x86_linux-cg_main.obj: cg_main.c
+@am__fastdepCC_TRUE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_x86_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_x86_linux_CFLAGS) $(CFLAGS) -MT cachegrind_x86_linux-cg_main.obj -MD -MP -MF $(DEPDIR)/cachegrind_x86_linux-cg_main.Tpo -c -o cachegrind_x86_linux-cg_main.obj `if test -f 'cg_main.c'; then $(CYGPATH_W) 'cg_main.c'; else $(CYGPATH_W) '$(srcdir)/cg_main.c'; fi`
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/cachegrind_x86_linux-cg_main.Tpo $(DEPDIR)/cachegrind_x86_linux-cg_main.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='cg_main.c' object='cachegrind_x86_linux-cg_main.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_x86_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_x86_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_x86_linux-cg_main.obj `if test -f 'cg_main.c'; then $(CYGPATH_W) 'cg_main.c'; else $(CYGPATH_W) '$(srcdir)/cg_main.c'; fi`
+
+cachegrind_x86_linux-cg-x86.o: cg-x86.c
+@am__fastdepCC_TRUE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_x86_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_x86_linux_CFLAGS) $(CFLAGS) -MT cachegrind_x86_linux-cg-x86.o -MD -MP -MF $(DEPDIR)/cachegrind_x86_linux-cg-x86.Tpo -c -o cachegrind_x86_linux-cg-x86.o `test -f 'cg-x86.c' || echo '$(srcdir)/'`cg-x86.c
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/cachegrind_x86_linux-cg-x86.Tpo $(DEPDIR)/cachegrind_x86_linux-cg-x86.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='cg-x86.c' object='cachegrind_x86_linux-cg-x86.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_x86_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_x86_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_x86_linux-cg-x86.o `test -f 'cg-x86.c' || echo '$(srcdir)/'`cg-x86.c
+
+cachegrind_x86_linux-cg-x86.obj: cg-x86.c
+@am__fastdepCC_TRUE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_x86_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_x86_linux_CFLAGS) $(CFLAGS) -MT cachegrind_x86_linux-cg-x86.obj -MD -MP -MF $(DEPDIR)/cachegrind_x86_linux-cg-x86.Tpo -c -o cachegrind_x86_linux-cg-x86.obj `if test -f 'cg-x86.c'; then $(CYGPATH_W) 'cg-x86.c'; else $(CYGPATH_W) '$(srcdir)/cg-x86.c'; fi`
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/cachegrind_x86_linux-cg-x86.Tpo $(DEPDIR)/cachegrind_x86_linux-cg-x86.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='cg-x86.c' object='cachegrind_x86_linux-cg-x86.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cachegrind_x86_linux_CPPFLAGS) $(CPPFLAGS) $(cachegrind_x86_linux_CFLAGS) $(CFLAGS) -c -o cachegrind_x86_linux-cg-x86.obj `if test -f 'cg-x86.c'; then $(CYGPATH_W) 'cg-x86.c'; else $(CYGPATH_W) '$(srcdir)/cg-x86.c'; fi`
+
+cg_merge-cg_merge.o: cg_merge.c
+@am__fastdepCC_TRUE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cg_merge_CPPFLAGS) $(CPPFLAGS) $(cg_merge_CFLAGS) $(CFLAGS) -MT cg_merge-cg_merge.o -MD -MP -MF $(DEPDIR)/cg_merge-cg_merge.Tpo -c -o cg_merge-cg_merge.o `test -f 'cg_merge.c' || echo '$(srcdir)/'`cg_merge.c
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/cg_merge-cg_merge.Tpo $(DEPDIR)/cg_merge-cg_merge.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='cg_merge.c' object='cg_merge-cg_merge.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cg_merge_CPPFLAGS) $(CPPFLAGS) $(cg_merge_CFLAGS) $(CFLAGS) -c -o cg_merge-cg_merge.o `test -f 'cg_merge.c' || echo '$(srcdir)/'`cg_merge.c
+
+cg_merge-cg_merge.obj: cg_merge.c
+@am__fastdepCC_TRUE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cg_merge_CPPFLAGS) $(CPPFLAGS) $(cg_merge_CFLAGS) $(CFLAGS) -MT cg_merge-cg_merge.obj -MD -MP -MF $(DEPDIR)/cg_merge-cg_merge.Tpo -c -o cg_merge-cg_merge.obj `if test -f 'cg_merge.c'; then $(CYGPATH_W) 'cg_merge.c'; else $(CYGPATH_W) '$(srcdir)/cg_merge.c'; fi`
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/cg_merge-cg_merge.Tpo $(DEPDIR)/cg_merge-cg_merge.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='cg_merge.c' object='cg_merge-cg_merge.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(cg_merge_CPPFLAGS) $(CPPFLAGS) $(cg_merge_CFLAGS) $(CFLAGS) -c -o cg_merge-cg_merge.obj `if test -f 'cg_merge.c'; then $(CYGPATH_W) 'cg_merge.c'; else $(CYGPATH_W) '$(srcdir)/cg_merge.c'; fi`
+
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run `make' without going through this Makefile.
+# To change the values of `make' variables: instead of editing Makefiles,
+# (1) if the variable is set in `config.status', edit `config.status'
+#     (which will cause the Makefiles to be regenerated when you run `make');
+# (2) otherwise, pass the desired values on the `make' command line.
+$(RECURSIVE_TARGETS):
+	@failcom='exit 1'; \
+	for f in x $$MAKEFLAGS; do \
+	  case $$f in \
+	    *=* | --[!k]*);; \
+	    *k*) failcom='fail=yes';; \
+	  esac; \
+	done; \
+	dot_seen=no; \
+	target=`echo $@ | sed s/-recursive//`; \
+	list='$(SUBDIRS)'; for subdir in $$list; do \
+	  echo "Making $$target in $$subdir"; \
+	  if test "$$subdir" = "."; then \
+	    dot_seen=yes; \
+	    local_target="$$target-am"; \
+	  else \
+	    local_target="$$target"; \
+	  fi; \
+	  (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+	  || eval $$failcom; \
+	done; \
+	if test "$$dot_seen" = "no"; then \
+	  $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+	fi; test -z "$$fail"
+
+$(RECURSIVE_CLEAN_TARGETS):
+	@failcom='exit 1'; \
+	for f in x $$MAKEFLAGS; do \
+	  case $$f in \
+	    *=* | --[!k]*);; \
+	    *k*) failcom='fail=yes';; \
+	  esac; \
+	done; \
+	dot_seen=no; \
+	case "$@" in \
+	  distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
+	  *) list='$(SUBDIRS)' ;; \
+	esac; \
+	rev=''; for subdir in $$list; do \
+	  if test "$$subdir" = "."; then :; else \
+	    rev="$$subdir $$rev"; \
+	  fi; \
+	done; \
+	rev="$$rev ."; \
+	target=`echo $@ | sed s/-recursive//`; \
+	for subdir in $$rev; do \
+	  echo "Making $$target in $$subdir"; \
+	  if test "$$subdir" = "."; then \
+	    local_target="$$target-am"; \
+	  else \
+	    local_target="$$target"; \
+	  fi; \
+	  (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+	  || eval $$failcom; \
+	done && test -z "$$fail"
+tags-recursive:
+	list='$(SUBDIRS)'; for subdir in $$list; do \
+	  test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
+	done
+ctags-recursive:
+	list='$(SUBDIRS)'; for subdir in $$list; do \
+	  test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
+	done
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '{ files[$$0] = 1; nonemtpy = 1; } \
+	      END { if (nonempty) { for (i in files) print i; }; }'`; \
+	mkid -fID $$unique
+tags: TAGS
+
+TAGS: tags-recursive $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+		$(TAGS_FILES) $(LISP)
+	tags=; \
+	here=`pwd`; \
+	if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
+	  include_option=--etags-include; \
+	  empty_fix=.; \
+	else \
+	  include_option=--include; \
+	  empty_fix=; \
+	fi; \
+	list='$(SUBDIRS)'; for subdir in $$list; do \
+	  if test "$$subdir" = .; then :; else \
+	    test ! -f $$subdir/TAGS || \
+	      tags="$$tags $$include_option=$$here/$$subdir/TAGS"; \
+	  fi; \
+	done; \
+	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+	      END { if (nonempty) { for (i in files) print i; }; }'`; \
+	if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	    $$tags $$unique; \
+	fi
+ctags: CTAGS
+CTAGS: ctags-recursive $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+		$(TAGS_FILES) $(LISP)
+	tags=; \
+	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+	      END { if (nonempty) { for (i in files) print i; }; }'`; \
+	test -z "$(CTAGS_ARGS)$$tags$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$tags $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && cd $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) $$here
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+	    fi; \
+	    cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+	  else \
+	    test -f $(distdir)/$$file \
+	    || cp -p $$d/$$file $(distdir)/$$file \
+	    || exit 1; \
+	  fi; \
+	done
+	list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+	  if test "$$subdir" = .; then :; else \
+	    test -d "$(distdir)/$$subdir" \
+	    || $(MKDIR_P) "$(distdir)/$$subdir" \
+	    || exit 1; \
+	    distdir=`$(am__cd) $(distdir) && pwd`; \
+	    top_distdir=`$(am__cd) $(top_distdir) && pwd`; \
+	    (cd $$subdir && \
+	      $(MAKE) $(AM_MAKEFLAGS) \
+	        top_distdir="$$top_distdir" \
+	        distdir="$$distdir/$$subdir" \
+		am__remove_distdir=: \
+		am__skip_length_check=: \
+	        distdir) \
+	      || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-recursive
+all-am: Makefile $(PROGRAMS) $(SCRIPTS) $(HEADERS) all-local
+installdirs: installdirs-recursive
+installdirs-am:
+	for dir in "$(DESTDIR)$(bindir)" "$(DESTDIR)$(bindir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-recursive
+install-exec: install-exec-recursive
+install-data: install-data-recursive
+uninstall: uninstall-recursive
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-recursive
+install-strip:
+	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	  install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	  `test -z '$(STRIP)' || \
+	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-recursive
+
+clean-am: clean-binPROGRAMS clean-generic clean-noinstPROGRAMS \
+	mostlyclean-am
+
+distclean: distclean-recursive
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-recursive
+
+dvi-am:
+
+html: html-recursive
+
+info: info-recursive
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-recursive
+
+install-exec-am: install-binPROGRAMS install-binSCRIPTS \
+	install-exec-local
+
+install-html: install-html-recursive
+
+install-info: install-info-recursive
+
+install-man:
+
+install-pdf: install-pdf-recursive
+
+install-ps: install-ps-recursive
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-recursive
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-recursive
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic
+
+pdf: pdf-recursive
+
+pdf-am:
+
+ps: ps-recursive
+
+ps-am:
+
+uninstall-am: uninstall-binPROGRAMS uninstall-binSCRIPTS
+
+.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) install-am \
+	install-strip
+
+.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
+	all all-am all-local check check-am clean clean-binPROGRAMS \
+	clean-generic clean-noinstPROGRAMS ctags ctags-recursive \
+	distclean distclean-compile distclean-generic distclean-tags \
+	distdir dvi dvi-am html html-am info info-am install \
+	install-am install-binPROGRAMS install-binSCRIPTS install-data \
+	install-data-am install-dvi install-dvi-am install-exec \
+	install-exec-am install-exec-local install-html \
+	install-html-am install-info install-info-am install-man \
+	install-pdf install-pdf-am install-ps install-ps-am \
+	install-strip installcheck installcheck-am installdirs \
+	installdirs-am maintainer-clean maintainer-clean-generic \
+	mostlyclean mostlyclean-compile mostlyclean-generic pdf pdf-am \
+	ps ps-am tags tags-recursive uninstall uninstall-am \
+	uninstall-binPROGRAMS uninstall-binSCRIPTS
+
+
+# The kludge that passes for vex's build system can't handle parallel
+# builds.  So, for the time being, serialise all Valgrind building.
+# (this is equivalent to enforcing "make -j 1".
+.NOTPARALLEL:
+
+# This is used by coregrind/Makefile.am and Makefile.tool.am for doing
+# "in-place" installs.  It copies $(noinst_PROGRAMS) into $inplacedir.
+# It needs to be depended on by an 'all-local' rule.
+inplace-noinst_PROGRAMS:
+	if [ -n "$(noinst_PROGRAMS)" ] ; then \
+	  mkdir -p $(inplacedir); \
+	  for f in $(noinst_PROGRAMS) ; do \
+	    rm -f $(inplacedir)/$$f; \
+	    ln -f -s ../$(subdir)/$$f $(inplacedir); \
+	  done ; \
+	fi
+
+# This is used by coregrind/Makefile.am and by <tool>/Makefile.am for doing
+# "make install".  It copies $(noinst_PROGRAMS) into $prefix/lib/valgrind/.
+# It needs to be depended on by an 'install-exec-local' rule.
+install-noinst_PROGRAMS:
+	if [ -n "$(noinst_PROGRAMS)" ] ; then \
+	  $(mkinstalldirs) $(DESTDIR)$(valdir); \
+	  for f in $(noinst_PROGRAMS); do \
+	    $(INSTALL_PROGRAM) $$f $(DESTDIR)$(valdir); \
+	  done ; \
+	fi
+
+@VEX_DIR@/libvex-x86-linux.a: @VEX_DIR@/priv/main/vex_svnversion.h
+	$(MAKE) -C @VEX_DIR@ CC="$(CC)" AR="$(AR)" \
+	libvex-x86-linux.a \
+	EXTRA_CFLAGS="$(AM_CFLAGS_X86_LINUX) @FLAG_WDECL_AFTER_STMT@ \
+			@FLAG_FNO_STACK_PROTECTOR@"
+
+@VEX_DIR@/libvex-amd64-linux.a: @VEX_DIR@/priv/main/vex_svnversion.h
+	$(MAKE) -C @VEX_DIR@ CC="$(CC)" AR="$(AR)" \
+	libvex-amd64-linux.a \
+	EXTRA_CFLAGS="$(AM_CFLAGS_AMD64_LINUX) @FLAG_WDECL_AFTER_STMT@ \
+			@FLAG_FNO_STACK_PROTECTOR@"
+
+@VEX_DIR@/libvex-ppc32-linux.a: @VEX_DIR@/priv/main/vex_svnversion.h
+	$(MAKE) -C @VEX_DIR@ CC="$(CC)" AR="$(AR)" \
+	libvex-ppc32-linux.a \
+	EXTRA_CFLAGS="$(AM_CFLAGS_PPC32_LINUX) @FLAG_WDECL_AFTER_STMT@ \
+			@FLAG_FNO_STACK_PROTECTOR@"
+
+@VEX_DIR@/libvex-ppc64-linux.a: @VEX_DIR@/priv/main/vex_svnversion.h
+	$(MAKE) -C @VEX_DIR@ CC="$(CC)" AR="$(AR)" \
+	libvex-ppc64-linux.a \
+	EXTRA_CFLAGS="$(AM_CFLAGS_PPC64_LINUX) @FLAG_WDECL_AFTER_STMT@ \
+			@FLAG_FNO_STACK_PROTECTOR@"
+
+@VEX_DIR@/libvex-ppc32-aix5.a: @VEX_DIR@/priv/main/vex_svnversion.h
+	$(MAKE) -C @VEX_DIR@ CC="$(CC)" AR="$(AR) -X32" \
+	libvex-ppc32-aix5.a \
+	EXTRA_CFLAGS="$(AM_CFLAGS_PPC32_AIX5) @FLAG_WDECL_AFTER_STMT@ \
+			@FLAG_FNO_STACK_PROTECTOR@"
+
+@VEX_DIR@/libvex-ppc64-aix5.a: @VEX_DIR@/priv/main/vex_svnversion.h
+	$(MAKE) -C @VEX_DIR@ CC="$(CC)" AR="$(AR) -X64" \
+	libvex-ppc64-aix5.a \
+	EXTRA_CFLAGS="$(AM_CFLAGS_PPC64_AIX5) @FLAG_WDECL_AFTER_STMT@ \
+			@FLAG_FNO_STACK_PROTECTOR@"
+
+@VEX_DIR@/priv/main/vex_svnversion.h:
+	$(MAKE) -C @VEX_DIR@ CC="$(CC)" version
+
+all-local: inplace-noinst_PROGRAMS
+
+install-exec-local: install-noinst_PROGRAMS
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/cachegrind/cachegrind-x86-linux b/cachegrind/cachegrind-x86-linux
new file mode 100755
index 0000000..606a542
--- /dev/null
+++ b/cachegrind/cachegrind-x86-linux
diff --git a/cachegrind/cg-amd64.c b/cachegrind/cg-amd64.c
new file mode 100644
index 0000000..9b0c653
--- /dev/null
+++ b/cachegrind/cg-amd64.c
@@ -0,0 +1,35 @@
+
+/*--------------------------------------------------------------------*/
+/*--- AMD64-specific definitions.                       cg-amd64.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Cachegrind, a Valgrind tool for cache
+   profiling programs.
+
+   Copyright (C) 2002-2009 Nicholas Nethercote
+      njn@valgrind.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "cg-x86.c"
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
diff --git a/cachegrind/cg-ppc32.c b/cachegrind/cg-ppc32.c
new file mode 100644
index 0000000..570e208
--- /dev/null
+++ b/cachegrind/cg-ppc32.c
@@ -0,0 +1,64 @@
+
+/*--------------------------------------------------------------------*/
+/*--- PPC32-specific definitions.                       cg-ppc32.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Cachegrind, a Valgrind tool for cache
+   profiling programs.
+
+   Copyright (C) 2005-2009 Nicholas Nethercote
+      njn@valgrind.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "pub_tool_basics.h"
+#include "pub_tool_libcbase.h"
+#include "pub_tool_libcassert.h"
+#include "pub_tool_libcprint.h"
+
+#include "cg_arch.h"
+
+void VG_(configure_caches)(cache_t* I1c, cache_t* D1c, cache_t* L2c,
+                           Bool all_caches_clo_defined)
+{
+   // Set caches to default.
+   *I1c = (cache_t) {  65536, 2, 64 };
+   *D1c = (cache_t) {  65536, 2, 64 };
+   *L2c = (cache_t) { 262144, 8, 64 };
+
+   // Warn if config not completely specified from cmd line.  Note that
+   // this message is slightly different from the one we give on x86/AMD64
+   // when auto-detection fails;  this lets us filter out this one (which is
+   // not important) in the regression test suite without filtering the
+   // x86/AMD64 one (which we want to see if it ever occurs in the
+   // regression test suite).
+   //
+   // If you change this message, please update
+   // cachegrind/tests/filter_stderr!
+   //
+   if (!all_caches_clo_defined) {
+      VG_DMSG("Warning: Cannot auto-detect cache config on PPC32, using one "
+              "or more defaults ");
+   }
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
diff --git a/cachegrind/cg-ppc64.c b/cachegrind/cg-ppc64.c
new file mode 100644
index 0000000..beb1f34
--- /dev/null
+++ b/cachegrind/cg-ppc64.c
@@ -0,0 +1,64 @@
+
+/*--------------------------------------------------------------------*/
+/*--- PPC64-specific definitions.                       cg-ppc64.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Cachegrind, a Valgrind tool for cache
+   profiling programs.
+
+   Copyright (C) 2005-2009 Nicholas Nethercote
+      njn@valgrind.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "pub_tool_basics.h"
+#include "pub_tool_libcbase.h"
+#include "pub_tool_libcassert.h"
+#include "pub_tool_libcprint.h"
+
+#include "cg_arch.h"
+
+void VG_(configure_caches)(cache_t* I1c, cache_t* D1c, cache_t* L2c,
+                           Bool all_caches_clo_defined)
+{
+   // Set caches to default.
+   *I1c = (cache_t) {  65536, 2, 64 };
+   *D1c = (cache_t) {  65536, 2, 64 };
+   *L2c = (cache_t) { 262144, 8, 64 };
+
+   // Warn if config not completely specified from cmd line.  Note that
+   // this message is slightly different from the one we give on x86/AMD64
+   // when auto-detection fails;  this lets us filter out this one (which is
+   // not important) in the regression test suite without filtering the
+   // x86/AMD64 one (which we want to see if it ever occurs in the
+   // regression test suite).
+   //
+   // If you change this message, please update
+   // cachegrind/tests/filter_stderr!
+   //
+   if (!all_caches_clo_defined) {
+      VG_DMSG("Warning: Cannot auto-detect cache config on PPC64, using one "
+              "or more defaults ");
+   }
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
diff --git a/cachegrind/cg-x86.c b/cachegrind/cg-x86.c
new file mode 100644
index 0000000..be5eb82
--- /dev/null
+++ b/cachegrind/cg-x86.c
@@ -0,0 +1,352 @@
+
+/*--------------------------------------------------------------------*/
+/*--- x86-specific (and AMD64-specific) definitions.      cg-x86.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Cachegrind, a Valgrind tool for cache
+   profiling programs.
+
+   Copyright (C) 2002-2009 Nicholas Nethercote
+      njn@valgrind.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "pub_tool_basics.h"
+#include "pub_tool_cpuid.h"
+#include "pub_tool_libcbase.h"
+#include "pub_tool_libcassert.h"
+#include "pub_tool_libcprint.h"
+
+#include "cg_arch.h"
+
+// All CPUID info taken from sandpile.org/a32/cpuid.htm */
+// Probably only works for Intel and AMD chips, and probably only for some of
+// them. 
+
+static void micro_ops_warn(Int actual_size, Int used_size, Int line_size)
+{
+   VG_DMSG("warning: Pentium 4 with %d KB micro-op instruction trace cache", 
+           actual_size);
+   VG_DMSG("         Simulating a %d KB I-cache with %d B lines", 
+           used_size, line_size);
+}
+
+/* Intel method is truly wretched.  We have to do an insane indexing into an
+ * array of pre-defined configurations for various parts of the memory
+ * hierarchy.
+ * According to Intel Processor Identification, App Note 485.
+ */
+static
+Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* L2c)
+{
+   Int cpuid1_eax;
+   Int cpuid1_ignore;
+   Int family;
+   Int model;
+   UChar info[16];
+   Int   i, trials;
+   Bool  L2_found = False;
+
+   if (level < 2) {
+      VG_DMSG("warning: CPUID level < 2 for Intel processor (%d)", level);
+      return -1;
+   }
+
+   /* family/model needed to distinguish code reuse (currently 0x49) */
+   VG_(cpuid)(1, &cpuid1_eax, &cpuid1_ignore,
+	      &cpuid1_ignore, &cpuid1_ignore);
+   family = (((cpuid1_eax >> 20) & 0xff) << 4) + ((cpuid1_eax >> 8) & 0xf);
+   model =  (((cpuid1_eax >> 16) & 0xf) << 4) + ((cpuid1_eax >> 4) & 0xf);
+
+   VG_(cpuid)(2, (Int*)&info[0], (Int*)&info[4], 
+                 (Int*)&info[8], (Int*)&info[12]);
+   trials  = info[0] - 1;   /* AL register - bits 0..7 of %eax */
+   info[0] = 0x0;           /* reset AL */
+
+   if (0 != trials) {
+      VG_DMSG("warning: non-zero CPUID trials for Intel processor (%d)",
+              trials);
+      return -1;
+   }
+
+   for (i = 0; i < 16; i++) {
+
+      switch (info[i]) {
+
+      case 0x0:       /* ignore zeros */
+          break;
+          
+      /* TLB info, ignore */
+      case 0x01: case 0x02: case 0x03: case 0x04: case 0x05:
+      case 0x4f: case 0x50: case 0x51: case 0x52:
+      case 0x56: case 0x57: case 0x59:
+      case 0x5b: case 0x5c: case 0x5d:
+      case 0xb0: case 0xb1:
+      case 0xb3: case 0xb4: case 0xba: case 0xc0:
+          break;      
+
+      case 0x06: *I1c = (cache_t) {  8, 4, 32 }; break;
+      case 0x08: *I1c = (cache_t) { 16, 4, 32 }; break;
+      case 0x30: *I1c = (cache_t) { 32, 8, 64 }; break;
+
+      case 0x0a: *D1c = (cache_t) {  8, 2, 32 }; break;
+      case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break;
+      case 0x0e: *D1c = (cache_t) { 24, 6, 64 }; break;
+      case 0x2c: *D1c = (cache_t) { 32, 8, 64 }; break;
+
+      /* IA-64 info -- panic! */
+      case 0x10: case 0x15: case 0x1a: 
+      case 0x88: case 0x89: case 0x8a: case 0x8d:
+      case 0x90: case 0x96: case 0x9b:
+         VG_(tool_panic)("IA-64 cache detected?!");
+
+      case 0x22: case 0x23: case 0x25: case 0x29:
+      case 0x46: case 0x47: case 0x4a: case 0x4b: case 0x4c: case 0x4d:
+          VG_DMSG("warning: L3 cache detected but ignored");
+          break;
+
+      /* These are sectored, whatever that means */
+      case 0x39: *L2c = (cache_t) {  128, 4, 64 }; L2_found = True; break;
+      case 0x3c: *L2c = (cache_t) {  256, 4, 64 }; L2_found = True; break;
+
+      /* If a P6 core, this means "no L2 cache".  
+         If a P4 core, this means "no L3 cache".
+         We don't know what core it is, so don't issue a warning.  To detect
+         a missing L2 cache, we use 'L2_found'. */
+      case 0x40:
+          break;
+
+      case 0x41: *L2c = (cache_t) {  128, 4, 32 }; L2_found = True; break;
+      case 0x42: *L2c = (cache_t) {  256, 4, 32 }; L2_found = True; break;
+      case 0x43: *L2c = (cache_t) {  512, 4, 32 }; L2_found = True; break;
+      case 0x44: *L2c = (cache_t) { 1024, 4, 32 }; L2_found = True; break;
+      case 0x45: *L2c = (cache_t) { 2048, 4, 32 }; L2_found = True; break;
+      case 0x48: *L2c = (cache_t) { 3072,12, 64 }; L2_found = True; break;
+      case 0x49:
+	  if ((family == 15) && (model == 6))
+	      /* On Xeon MP (family F, model 6), this is for L3 */
+	      VG_DMSG("warning: L3 cache detected but ignored");
+	  else
+	      *L2c = (cache_t) { 4096, 16, 64 }; L2_found = True;
+	  break;
+      case 0x4e: *L2c = (cache_t) { 6144, 24, 64 }; L2_found = True; break;
+
+      /* These are sectored, whatever that means */
+      case 0x60: *D1c = (cache_t) { 16, 8, 64 };  break;      /* sectored */
+      case 0x66: *D1c = (cache_t) {  8, 4, 64 };  break;      /* sectored */
+      case 0x67: *D1c = (cache_t) { 16, 4, 64 };  break;      /* sectored */
+      case 0x68: *D1c = (cache_t) { 32, 4, 64 };  break;      /* sectored */
+
+      /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
+       * conversion to byte size is a total guess;  treat the 12K and 16K
+       * cases the same since the cache byte size must be a power of two for
+       * everything to work!.  Also guessing 32 bytes for the line size... 
+       */
+      case 0x70:    /* 12K micro-ops, 8-way */
+         *I1c = (cache_t) { 16, 8, 32 };  
+         micro_ops_warn(12, 16, 32);
+         break;  
+      case 0x71:    /* 16K micro-ops, 8-way */
+         *I1c = (cache_t) { 16, 8, 32 };  
+         micro_ops_warn(16, 16, 32); 
+         break;  
+      case 0x72:    /* 32K micro-ops, 8-way */
+         *I1c = (cache_t) { 32, 8, 32 };  
+         micro_ops_warn(32, 32, 32); 
+         break;  
+
+      /* These are sectored, whatever that means */
+      case 0x79: *L2c = (cache_t) {  128, 8,  64 }; L2_found = True;  break;
+      case 0x7a: *L2c = (cache_t) {  256, 8,  64 }; L2_found = True;  break;
+      case 0x7b: *L2c = (cache_t) {  512, 8,  64 }; L2_found = True;  break;
+      case 0x7c: *L2c = (cache_t) { 1024, 8,  64 }; L2_found = True;  break;
+      case 0x7d: *L2c = (cache_t) { 2048, 8,  64 }; L2_found = True;  break;
+      case 0x7e: *L2c = (cache_t) {  256, 8, 128 }; L2_found = True;  break;
+
+      case 0x7f: *L2c = (cache_t) {  512, 2, 64 };  L2_found = True;  break;
+      case 0x80: *L2c = (cache_t) {  512, 8, 64 };  L2_found = True;  break;
+
+      case 0x81: *L2c = (cache_t) {  128, 8, 32 };  L2_found = True;  break;
+      case 0x82: *L2c = (cache_t) {  256, 8, 32 };  L2_found = True;  break;
+      case 0x83: *L2c = (cache_t) {  512, 8, 32 };  L2_found = True;  break;
+      case 0x84: *L2c = (cache_t) { 1024, 8, 32 };  L2_found = True;  break;
+      case 0x85: *L2c = (cache_t) { 2048, 8, 32 };  L2_found = True;  break;
+      case 0x86: *L2c = (cache_t) {  512, 4, 64 };  L2_found = True;  break;
+      case 0x87: *L2c = (cache_t) { 1024, 8, 64 };  L2_found = True;  break;
+
+      /* Ignore prefetch information */
+      case 0xf0: case 0xf1:
+         break;
+
+      default:
+         VG_DMSG("warning: Unknown Intel cache config value (0x%x), ignoring",
+                 info[i]);
+         break;
+      }
+   }
+
+   if (!L2_found)
+      VG_DMSG("warning: L2 cache not installed, ignore L2 results.");
+
+   return 0;
+}
+
+/* AMD method is straightforward, just extract appropriate bits from the
+ * result registers.
+ *
+ * Bits, for D1 and I1:
+ *  31..24  data L1 cache size in KBs    
+ *  23..16  data L1 cache associativity (FFh=full)    
+ *  15.. 8  data L1 cache lines per tag    
+ *   7.. 0  data L1 cache line size in bytes
+ *
+ * Bits, for L2:
+ *  31..16  unified L2 cache size in KBs
+ *  15..12  unified L2 cache associativity (0=off, FFh=full)
+ *  11.. 8  unified L2 cache lines per tag    
+ *   7.. 0  unified L2 cache line size in bytes
+ *
+ * #3  The AMD K7 processor's L2 cache must be configured prior to relying 
+ *     upon this information. (Whatever that means -- njn)
+ *
+ * Also, according to Cyrille Chepelov, Duron stepping A0 processors (model
+ * 0x630) have a bug and misreport their L2 size as 1KB (it's really 64KB),
+ * so we detect that.
+ * 
+ * Returns 0 on success, non-zero on failure.
+ */
+static
+Int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* L2c)
+{
+   UInt ext_level;
+   UInt dummy, model;
+   UInt I1i, D1i, L2i;
+   
+   VG_(cpuid)(0x80000000, &ext_level, &dummy, &dummy, &dummy);
+
+   if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) {
+      VG_DMSG("warning: ext_level < 0x80000006 for AMD processor (0x%x)", 
+              ext_level);
+      return -1;
+   }
+
+   VG_(cpuid)(0x80000005, &dummy, &dummy, &D1i, &I1i);
+   VG_(cpuid)(0x80000006, &dummy, &dummy, &L2i, &dummy);
+
+   VG_(cpuid)(0x1, &model, &dummy, &dummy, &dummy);
+
+   /* Check for Duron bug */
+   if (model == 0x630) {
+      VG_DMSG("warning: Buggy Duron stepping A0. Assuming L2 size=65536 bytes");
+      L2i = (64 << 16) | (L2i & 0xffff);
+   }
+
+   D1c->size      = (D1i >> 24) & 0xff;
+   D1c->assoc     = (D1i >> 16) & 0xff;
+   D1c->line_size = (D1i >>  0) & 0xff;
+
+   I1c->size      = (I1i >> 24) & 0xff;
+   I1c->assoc     = (I1i >> 16) & 0xff;
+   I1c->line_size = (I1i >>  0) & 0xff;
+
+   L2c->size      = (L2i >> 16) & 0xffff; /* Nb: different bits used for L2 */
+   L2c->assoc     = (L2i >> 12) & 0xf;
+   L2c->line_size = (L2i >>  0) & 0xff;
+
+   return 0;
+}
+
+static 
+Int get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* L2c)
+{
+   Int  level, ret;
+   Char vendor_id[13];
+
+   if (!VG_(has_cpuid)()) {
+      VG_DMSG("CPUID instruction not supported");
+      return -1;
+   }
+
+   VG_(cpuid)(0, &level, (int*)&vendor_id[0], 
+	      (int*)&vendor_id[8], (int*)&vendor_id[4]);    
+   vendor_id[12] = '\0';
+
+   if (0 == level) {
+      VG_DMSG("CPUID level is 0, early Pentium?");
+      return -1;
+   }
+
+   /* Only handling Intel and AMD chips... no Cyrix, Transmeta, etc */
+   if (0 == VG_(strcmp)(vendor_id, "GenuineIntel")) {
+      ret = Intel_cache_info(level, I1c, D1c, L2c);
+
+   } else if (0 == VG_(strcmp)(vendor_id, "AuthenticAMD")) {
+      ret = AMD_cache_info(I1c, D1c, L2c);
+
+   } else if (0 == VG_(strcmp)(vendor_id, "CentaurHauls")) {
+      /* Total kludge.  Pretend to be a VIA Nehemiah. */
+      D1c->size      = 64;
+      D1c->assoc     = 16;
+      D1c->line_size = 16;
+      I1c->size      = 64;
+      I1c->assoc     = 4;
+      I1c->line_size = 16;
+      L2c->size      = 64;
+      L2c->assoc     = 16;
+      L2c->line_size = 16;
+      ret = 0;
+
+   } else {
+      VG_DMSG("CPU vendor ID not recognised (%s)", vendor_id);
+      return -1;
+   }
+
+   /* Successful!  Convert sizes from KB to bytes */
+   I1c->size *= 1024;
+   D1c->size *= 1024;
+   L2c->size *= 1024;
+      
+   return ret;
+}
+
+
+void VG_(configure_caches)(cache_t* I1c, cache_t* D1c, cache_t* L2c,
+                           Bool all_caches_clo_defined)
+{
+   Int res;
+   
+   // Set caches to default.
+   *I1c = (cache_t) {  65536, 2, 64 };
+   *D1c = (cache_t) {  65536, 2, 64 };
+   *L2c = (cache_t) { 262144, 8, 64 };
+
+   // Then replace with any info we can get from CPUID.
+   res = get_caches_from_CPUID(I1c, D1c, L2c);
+
+   // Warn if CPUID failed and config not completely specified from cmd line.
+   if (res != 0 && !all_caches_clo_defined) {
+      VG_DMSG("Warning: Couldn't auto-detect cache config, using one "
+              "or more defaults ");
+   }
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
diff --git a/cachegrind/cg_annotate b/cachegrind/cg_annotate
new file mode 100644
index 0000000..9cfa873
--- /dev/null
+++ b/cachegrind/cg_annotate
@@ -0,0 +1,905 @@
+#! /usr/bin/perl
+
+##--------------------------------------------------------------------##
+##--- Cachegrind's annotator.                       cg_annotate.in ---##
+##--------------------------------------------------------------------##
+
+#  This file is part of Cachegrind, a Valgrind tool for cache
+#  profiling programs.
+#
+#  Copyright (C) 2002-2005 Nicholas Nethercote
+#     njn@valgrind.org
+#
+#  This program is free software; you can redistribute it and/or
+#  modify it under the terms of the GNU General Public License as
+#  published by the Free Software Foundation; either version 2 of the
+#  License, or (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful, but
+#  WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#  General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software
+#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+#  02111-1307, USA.
+#
+#  The GNU General Public License is contained in the file COPYING.
+
+#----------------------------------------------------------------------------
+# The file format is simple, basically printing the cost centre for every
+# source line, grouped by files and functions.  The details are in
+# Cachegrind's manual.
+
+#----------------------------------------------------------------------------
+# Performance improvements record, using cachegrind.out for cacheprof, doing no
+# source annotation (irrelevant ones removed):
+#                                                               user time
+# 1. turned off warnings in add_hash_a_to_b()                   3.81 --> 3.48s
+#    [now add_array_a_to_b()]
+# 6. make line_to_CC() return a ref instead of a hash           3.01 --> 2.77s
+#
+#10. changed file format to avoid file/fn name repetition       2.40s
+#    (not sure why higher;  maybe due to new '.' entries?)
+#11. changed file format to drop unnecessary end-line "."s      2.36s
+#    (shrunk file by about 37%)
+#12. switched from hash CCs to array CCs                        1.61s
+#13. only adding b[i] to a[i] if b[i] defined (was doing it if
+#    either a[i] or b[i] was defined, but if b[i] was undefined
+#    it just added 0)                                           1.48s
+#14. Stopped converting "." entries to undef and then back      1.16s
+#15. Using foreach $i (x..y) instead of for ($i = 0...) in
+#    add_array_a_to_b()                                         1.11s
+#
+# Auto-annotating primes:
+#16. Finding count lengths by int((length-1)/3), not by
+#    commifying (halves the number of commify calls)            1.68s --> 1.47s
+
+use warnings;
+use strict;
+
+#----------------------------------------------------------------------------
+# Overview: the running example in the comments is for:
+#   - events = A,B,C,D
+#   - --show=C,A,D
+#   - --sort=D,C
+#----------------------------------------------------------------------------
+
+#----------------------------------------------------------------------------
+# Global variables, main data structures
+#----------------------------------------------------------------------------
+# CCs are arrays, the counts corresponding to @events, with 'undef'
+# representing '.'.  This makes things fast (faster than using hashes for CCs)
+# but we have to use @sort_order and @show_order below to handle the --sort and
+# --show options, which is a bit tricky.
+#----------------------------------------------------------------------------
+
+# Total counts for summary (an array reference).
+my $summary_CC;
+
+# Totals for each function, for overall summary.
+# hash(filename:fn_name => CC array)
+my %fn_totals;
+
+# Individual CCs, organised by filename and line_num for easy annotation.
+# hash(filename => hash(line_num => CC array))
+my %all_ind_CCs;
+
+# Files chosen for annotation on the command line.  
+# key = basename (trimmed of any directory), value = full filename
+my %user_ann_files;
+
+# Generic description string.
+my $desc = "";
+
+# Command line of profiled program.
+my $cmd;
+
+# Events in input file, eg. (A,B,C,D)
+my @events;
+
+# Events to show, from command line, eg. (C,A,D)
+my @show_events;
+
+# Map from @show_events indices to @events indices, eg. (2,0,3).  Gives the
+# order in which we must traverse @events in order to show the @show_events, 
+# eg. (@events[$show_order[1]], @events[$show_order[2]]...) = @show_events.
+# (Might help to think of it like a hash (0 => 2, 1 => 0, 2 => 3).)
+my @show_order;
+
+# Print out the function totals sorted by these events, eg. (D,C).
+my @sort_events;
+
+# Map from @sort_events indices to @events indices, eg. (3,2).  Same idea as
+# for @show_order.
+my @sort_order;
+
+# Thresholds, one for each sort event (or default to 1 if no sort events
+# specified).  We print out functions and do auto-annotations until we've
+# handled this proportion of all the events thresholded.
+my @thresholds;
+
+my $default_threshold = 99;
+
+my $single_threshold  = $default_threshold;
+
+# If on, automatically annotates all files that are involved in getting over
+# all the threshold counts.
+my $auto_annotate = 0;
+
+# Number of lines to show around each annotated line.
+my $context = 8;
+
+# Directories in which to look for annotation files.
+my @include_dirs = ("");
+
+# Input file name
+my $input_file = undef;
+
+# Version number
+my $version = "3.5.0.SVN";
+
+# Usage message.
+my $usage = <<END
+usage: cg_annotate [options] output-file [source-files]
+
+  options for the user, with defaults in [ ], are:
+    -h --help             show this message
+    -v --version          show version
+    --show=A,B,C          only show figures for events A,B,C [all]
+    --sort=A,B,C          sort columns by events A,B,C [event column order]
+    --threshold=<0--100>  percentage of counts (of primary sort event) we
+                          are interested in [$default_threshold%]
+    --auto=yes|no         annotate all source files containing functions
+                          that helped reach the event count threshold [no]
+    --context=N           print N lines of context before and after
+                          annotated lines [8]
+    -I<d> --include=<d>   add <d> to list of directories to search for 
+                          source files
+
+  cg_annotate is Copyright (C) 2002-2007 Nicholas Nethercote.
+  and licensed under the GNU General Public License, version 2.
+  Bug reports, feedback, admiration, abuse, etc, to: njn\@valgrind.org.
+                                                
+END
+;
+
+# Used in various places of output.
+my $fancy = '-' x 80 . "\n";
+
+#-----------------------------------------------------------------------------
+# Argument and option handling
+#-----------------------------------------------------------------------------
+sub process_cmd_line() 
+{
+    for my $arg (@ARGV) { 
+
+        # Option handling
+        if ($arg =~ /^-/) {
+
+            # --version
+            if ($arg =~ /^-v$|^--version$/) {
+                die("cg_annotate-$version\n");
+
+            # --show=A,B,C
+            } elsif ($arg =~ /^--show=(.*)$/) {
+                @show_events = split(/,/, $1);
+
+            # --sort=A,B,C
+            #   Nb: You can specify thresholds individually, eg.
+            #   --sort=A:99,B:95,C:90.  These will override any --threshold
+            #   argument.
+            } elsif ($arg =~ /^--sort=(.*)$/) {
+                @sort_events = split(/,/, $1);
+                my $th_specified = 0;
+                foreach my $i (0 .. scalar @sort_events - 1) {
+                    if ($sort_events[$i] =~ /.*:([\d\.]+)%?$/) {
+                        my $th = $1;
+                        ($th >= 0 && $th <= 100) or die($usage);
+                        $sort_events[$i] =~ s/:.*//;
+                        $thresholds[$i] = $th;
+                        $th_specified = 1;
+                    } else {
+                        $thresholds[$i] = 0;
+                    }
+                }
+                if (not $th_specified) {
+                    @thresholds = ();
+                }
+
+            # --threshold=X (tolerates a trailing '%')
+            } elsif ($arg =~ /^--threshold=([\d\.]+)%?$/) {
+                $single_threshold = $1;
+                ($1 >= 0 && $1 <= 100) or die($usage);
+
+            # --auto=yes|no
+            } elsif ($arg =~ /^--auto=yes$/) {
+                $auto_annotate = 1;
+            } elsif ($arg =~ /^--auto=no$/) {
+                $auto_annotate = 0;
+
+            # --context=N
+            } elsif ($arg =~ /^--context=([\d\.]+)$/) {
+                $context = $1;
+                if ($context < 0) {
+                    die($usage);
+                }
+
+            # We don't handle "-I name" -- there can be no space.
+            } elsif ($arg =~ /^-I$/) {
+                die("Sorry, no space is allowed after a -I flag\n");
+            
+            # --include=A,B,C.  Allow -I=name for backwards compatibility.
+            } elsif ($arg =~ /^(-I=|-I|--include=)(.*)$/) {
+                my $inc = $2;
+                $inc =~ s|/$||;         # trim trailing '/'
+                push(@include_dirs, "$inc/");
+
+            } else {            # -h and --help fall under this case
+                die($usage);
+            }
+
+        # Argument handling -- annotation file checking and selection.
+        # Stick filenames into a hash for quick 'n easy lookup throughout.
+        } else {
+            if (not defined $input_file) {
+                # First non-option argument is the output file.
+                $input_file = $arg;
+            } else {
+                # Subsequent non-option arguments are source files.
+                my $readable = 0;
+                foreach my $include_dir (@include_dirs) {
+                    if (-r $include_dir . $arg) {
+                        $readable = 1;
+                    }
+                }
+                $readable or die("File $arg not found in any of: @include_dirs\n");
+                $user_ann_files{$arg} = 1;
+            }
+        }
+    }
+
+    # Must have chosen an input file
+    if (not defined $input_file) {
+        die($usage);
+    }
+}
+
+#-----------------------------------------------------------------------------
+# Reading of input file
+#-----------------------------------------------------------------------------
+sub max ($$) 
+{
+    my ($x, $y) = @_;
+    return ($x > $y ? $x : $y);
+}
+
+# Add the two arrays;  any '.' entries are ignored.  Two tricky things:
+# 1. If $a2->[$i] is undefined, it defaults to 0 which is what we want; we turn
+#    off warnings to allow this.  This makes things about 10% faster than
+#    checking for definedness ourselves.
+# 2. We don't add an undefined count or a ".", even though it's value is 0,
+#    because we don't want to make an $a2->[$i] that is undef become 0
+#    unnecessarily.
+sub add_array_a_to_b ($$) 
+{
+    my ($a1, $a2) = @_;
+
+    my $n = max(scalar @$a1, scalar @$a2);
+    $^W = 0;
+    foreach my $i (0 .. $n-1) {
+        $a2->[$i] += $a1->[$i] if (defined $a1->[$i] && "." ne $a1->[$i]);
+    }
+    $^W = 1;
+}
+
+# Add each event count to the CC array.  '.' counts become undef, as do
+# missing entries (implicitly).
+sub line_to_CC ($)
+{
+    my @CC = (split /\s+/, $_[0]);
+    (@CC <= @events) or die("Line $.: too many event counts\n");
+    return \@CC;
+}
+
+sub read_input_file() 
+{
+    open(INPUTFILE, "< $input_file") 
+         || die "Cannot open $input_file for reading\n";
+
+    # Read "desc:" lines.
+    my $line;
+    while ($line = <INPUTFILE>) {
+        if ($line =~ s/desc:\s+//) {
+            $desc .= $line;
+        } else {
+            last;
+        }
+    }
+
+    # Read "cmd:" line (Nb: will already be in $line from "desc:" loop above).
+    ($line =~ s/^cmd:\s+//) or die("Line $.: missing command line\n");
+    $cmd = $line;
+    chomp($cmd);    # Remove newline
+
+    # Read "events:" line.  We make a temporary hash in which the Nth event's
+    # value is N, which is useful for handling --show/--sort options below.
+    $line = <INPUTFILE>;
+    (defined $line && $line =~ s/^events:\s+//) 
+        or die("Line $.: missing events line\n");
+    @events = split(/\s+/, $line);
+    my %events;
+    my $n = 0;
+    foreach my $event (@events) {
+        $events{$event} = $n;
+        $n++
+    }
+
+    # If no --show arg give, default to showing all events in the file.
+    # If --show option is used, check all specified events appeared in the
+    # "events:" line.  Then initialise @show_order.
+    if (@show_events) {
+        foreach my $show_event (@show_events) {
+            (defined $events{$show_event}) or 
+                die("--show event `$show_event' did not appear in input\n");
+        }
+    } else {
+        @show_events = @events;
+    }
+    foreach my $show_event (@show_events) {
+        push(@show_order, $events{$show_event});
+    }
+
+    # Do as for --show, but if no --sort arg given, default to sorting by
+    # column order (ie. first column event is primary sort key, 2nd column is
+    # 2ndary key, etc).
+    if (@sort_events) {
+        foreach my $sort_event (@sort_events) {
+            (defined $events{$sort_event}) or 
+                die("--sort event `$sort_event' did not appear in input\n");
+        }
+    } else {
+        @sort_events = @events;
+    }
+    foreach my $sort_event (@sort_events) {
+        push(@sort_order, $events{$sort_event});
+    }
+
+    # If multiple threshold args weren't given via --sort, stick in the single
+    # threshold (either from --threshold if used, or the default otherwise) for
+    # the primary sort event, and 0% for the rest.
+    if (not @thresholds) {
+        foreach my $e (@sort_order) {
+            push(@thresholds, 0);
+        }
+        $thresholds[0] = $single_threshold;
+    }
+
+    my $curr_file;
+    my $curr_fn;
+    my $curr_name;
+
+    my $curr_fn_CC = [];
+    my $curr_file_ind_CCs = {};     # hash(line_num => CC)
+
+    # Read body of input file.
+    while (<INPUTFILE>) {
+        s/#.*$//;   # remove comments
+        if (s/^(\d+)\s+//) {
+            my $line_num = $1;
+            my $CC = line_to_CC($_);
+            add_array_a_to_b($CC, $curr_fn_CC);
+            
+            # If curr_file is selected, add CC to curr_file list.  We look for
+            # full filename matches;  or, if auto-annotating, we have to
+            # remember everything -- we won't know until the end what's needed.
+            if ($auto_annotate || defined $user_ann_files{$curr_file}) {
+                my $tmp = $curr_file_ind_CCs->{$line_num};
+                $tmp = [] unless defined $tmp;
+                add_array_a_to_b($CC, $tmp);
+                $curr_file_ind_CCs->{$line_num} = $tmp;
+            }
+
+        } elsif (s/^fn=(.*)$//) {
+            # Commit result from previous function
+            $fn_totals{$curr_name} = $curr_fn_CC if (defined $curr_name);
+
+            # Setup new one
+            $curr_fn = $1;
+            $curr_name = "$curr_file:$curr_fn";
+            $curr_fn_CC = $fn_totals{$curr_name};
+            $curr_fn_CC = [] unless (defined $curr_fn_CC);
+
+        } elsif (s/^fl=(.*)$//) {
+            $all_ind_CCs{$curr_file} = $curr_file_ind_CCs 
+                if (defined $curr_file);
+
+            $curr_file = $1;
+            $curr_file_ind_CCs = $all_ind_CCs{$curr_file};
+            $curr_file_ind_CCs = {} unless (defined $curr_file_ind_CCs);
+
+        } elsif (s/^\s*$//) {
+            # blank, do nothing
+        
+        } elsif (s/^summary:\s+//) {
+            # Finish up handling final filename/fn_name counts
+            $fn_totals{"$curr_file:$curr_fn"} = $curr_fn_CC 
+                if (defined $curr_file && defined $curr_fn);
+            $all_ind_CCs{$curr_file} = 
+                $curr_file_ind_CCs if (defined $curr_file);
+
+            $summary_CC = line_to_CC($_);
+            (scalar(@$summary_CC) == @events) 
+                or die("Line $.: summary event and total event mismatch\n");
+
+        } else {
+            warn("WARNING: line $. malformed, ignoring\n");
+        }
+    }
+
+    # Check if summary line was present
+    if (not defined $summary_CC) {
+        die("missing final summary line, aborting\n");
+    }
+
+    close(INPUTFILE);
+}
+
+#-----------------------------------------------------------------------------
+# Print options used
+#-----------------------------------------------------------------------------
+sub print_options ()
+{
+    print($fancy);
+    print($desc);
+    print("Command:          $cmd\n");
+    print("Data file:        $input_file\n");
+    print("Events recorded:  @events\n");
+    print("Events shown:     @show_events\n");
+    print("Event sort order: @sort_events\n");
+    print("Thresholds:       @thresholds\n");
+
+    my @include_dirs2 = @include_dirs;  # copy @include_dirs
+    shift(@include_dirs2);       # remove "" entry, which is always the first
+    unshift(@include_dirs2, "") if (0 == @include_dirs2); 
+    my $include_dir = shift(@include_dirs2);
+    print("Include dirs:     $include_dir\n");
+    foreach my $include_dir (@include_dirs2) {
+        print("                  $include_dir\n");
+    }
+
+    my @user_ann_files = keys %user_ann_files;
+    unshift(@user_ann_files, "") if (0 == @user_ann_files); 
+    my $user_ann_file = shift(@user_ann_files);
+    print("User annotated:   $user_ann_file\n");
+    foreach $user_ann_file (@user_ann_files) {
+        print("                  $user_ann_file\n");
+    }
+
+    my $is_on = ($auto_annotate ? "on" : "off");
+    print("Auto-annotation:  $is_on\n");
+    print("\n");
+}
+
+#-----------------------------------------------------------------------------
+# Print summary and sorted function totals
+#-----------------------------------------------------------------------------
+sub mycmp ($$) 
+{
+    my ($c, $d) = @_;
+
+    # Iterate through sort events (eg. 3,2); return result if two are different
+    foreach my $i (@sort_order) {
+        my ($x, $y);
+        $x = $c->[$i];
+        $y = $d->[$i];
+        $x = -1 unless defined $x;
+        $y = -1 unless defined $y;
+
+        my $cmp = $y <=> $x;        # reverse sort
+        if (0 != $cmp) {
+            return $cmp;
+        }
+    }
+    # Exhausted events, equal
+    return 0;
+}
+
+sub commify ($) {
+    my ($val) = @_;
+    1 while ($val =~ s/^(\d+)(\d{3})/$1,$2/);
+    return $val;
+}
+
+# Because the counts can get very big, and we don't want to waste screen space
+# and make lines too long, we compute exactly how wide each column needs to be
+# by finding the widest entry for each one.
+sub compute_CC_col_widths (@) 
+{
+    my @CCs = @_;
+    my $CC_col_widths = [];
+
+    # Initialise with minimum widths (from event names)
+    foreach my $event (@events) {
+        push(@$CC_col_widths, length($event));
+    }
+    
+    # Find maximum width count for each column.  @CC_col_width positions
+    # correspond to @CC positions.
+    foreach my $CC (@CCs) {
+        foreach my $i (0 .. scalar(@$CC)-1) {
+            if (defined $CC->[$i]) {
+                # Find length, accounting for commas that will be added
+                my $length = length $CC->[$i];
+                my $clength = $length + int(($length - 1) / 3);
+                $CC_col_widths->[$i] = max($CC_col_widths->[$i], $clength); 
+            }
+        }
+    }
+    return $CC_col_widths;
+}
+
+# Print the CC with each column's size dictated by $CC_col_widths.
+sub print_CC ($$) 
+{
+    my ($CC, $CC_col_widths) = @_;
+
+    foreach my $i (@show_order) {
+        my $count = (defined $CC->[$i] ? commify($CC->[$i]) : ".");
+        my $space = ' ' x ($CC_col_widths->[$i] - length($count));
+        print("$space$count ");
+    }
+}
+
+sub print_events ($)
+{
+    my ($CC_col_widths) = @_;
+
+    foreach my $i (@show_order) { 
+        my $event       = $events[$i];
+        my $event_width = length($event);
+        my $col_width   = $CC_col_widths->[$i];
+        my $space       = ' ' x ($col_width - $event_width);
+        print("$space$event ");
+    }
+}
+
+# Prints summary and function totals (with separate column widths, so that
+# function names aren't pushed over unnecessarily by huge summary figures).
+# Also returns a hash containing all the files that are involved in getting the
+# events count above the thresholds (ie. all the interesting ones).
+sub print_summary_and_fn_totals ()
+{
+    my @fn_fullnames = keys   %fn_totals;
+
+    # Work out the size of each column for printing (summary and functions
+    # separately).
+    my $summary_CC_col_widths = compute_CC_col_widths($summary_CC);
+    my      $fn_CC_col_widths = compute_CC_col_widths(values %fn_totals);
+
+    # Header and counts for summary
+    print($fancy);
+    print_events($summary_CC_col_widths);
+    print("\n");
+    print($fancy);
+    print_CC($summary_CC, $summary_CC_col_widths);
+    print(" PROGRAM TOTALS\n");
+    print("\n");
+
+    # Header for functions
+    print($fancy);
+    print_events($fn_CC_col_widths);
+    print(" file:function\n");
+    print($fancy);
+
+    # Sort function names into order dictated by --sort option.
+    @fn_fullnames = sort {
+        mycmp($fn_totals{$a}, $fn_totals{$b})
+    } @fn_fullnames;
+
+
+    # Assertion
+    (scalar @sort_order == scalar @thresholds) or 
+        die("sort_order length != thresholds length:\n",
+            "  @sort_order\n  @thresholds\n");
+
+    my $threshold_files       = {};
+    # @curr_totals has the same shape as @sort_order and @thresholds
+    my @curr_totals = ();
+    foreach my $e (@thresholds) {
+        push(@curr_totals, 0);
+    }
+
+    # Print functions, stopping when the threshold has been reached.
+    foreach my $fn_name (@fn_fullnames) {
+
+        # Stop when we've reached all the thresholds
+        my $reached_all_thresholds = 1;
+        foreach my $i (0 .. scalar @thresholds - 1) {
+            my $prop = $curr_totals[$i] * 100 / $summary_CC->[$sort_order[$i]];
+            $reached_all_thresholds &&= ($prop >= $thresholds[$i]);
+        }
+        last if $reached_all_thresholds;
+
+        # Print function results
+        my $fn_CC = $fn_totals{$fn_name};
+        print_CC($fn_CC, $fn_CC_col_widths);
+        print(" $fn_name\n");
+
+        # Update the threshold counts
+        my $filename = $fn_name;
+        $filename =~ s/:.+$//;    # remove function name
+        $threshold_files->{$filename} = 1;
+        foreach my $i (0 .. scalar @sort_order - 1) {
+            $curr_totals[$i] += $fn_CC->[$sort_order[$i]] 
+                if (defined $fn_CC->[$sort_order[$i]]);
+        }
+    }
+    print("\n");
+
+    return $threshold_files;
+}
+
+#-----------------------------------------------------------------------------
+# Annotate selected files
+#-----------------------------------------------------------------------------
+
+# Issue a warning that the source file is more recent than the input file. 
+sub warning_on_src_more_recent_than_inputfile ($)
+{
+    my $src_file = $_[0];
+
+    my $warning = <<END
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@ Source file '$src_file' is more recent than input file '$input_file'.
+@ Annotations may not be correct.
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+END
+;
+    print($warning);
+}
+
+# If there is information about lines not in the file, issue a warning
+# explaining possible causes.
+sub warning_on_nonexistent_lines ($$$)
+{
+    my ($src_more_recent_than_inputfile, $src_file, $excess_line_nums) = @_;
+    my $cause_and_solution;
+
+    if ($src_more_recent_than_inputfile) {
+        $cause_and_solution = <<END
+@@ cause:    '$src_file' has changed since information was gathered.
+@@           If so, a warning will have already been issued about this.
+@@ solution: Recompile program and rerun under "valgrind --cachesim=yes" to 
+@@           gather new information.
+END
+    # We suppress warnings about .h files
+    } elsif ($src_file =~ /\.h$/) {
+        $cause_and_solution = <<END
+@@ cause:    bug in the Valgrind's debug info reader that screws up with .h
+@@           files sometimes
+@@ solution: none, sorry
+END
+    } else {
+        $cause_and_solution = <<END
+@@ cause:    not sure, sorry
+END
+    }
+
+    my $warning = <<END
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@@
+@@ Information recorded about lines past the end of '$src_file'.
+@@
+@@ Probable cause and solution:
+$cause_and_solution@@
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+END
+;
+    print($warning);
+}
+
+sub annotate_ann_files($)
+{
+    my ($threshold_files) = @_; 
+
+    my %all_ann_files;
+    my @unfound_auto_annotate_files;
+    my $printed_totals_CC = [];
+
+    # If auto-annotating, add interesting files (but not "???")
+    if ($auto_annotate) {
+        delete $threshold_files->{"???"};
+        %all_ann_files = (%user_ann_files, %$threshold_files) 
+    } else {
+        %all_ann_files = %user_ann_files;
+    }
+
+    # Track if we did any annotations.
+    my $did_annotations = 0;
+
+    LOOP:
+    foreach my $src_file (keys %all_ann_files) {
+
+        my $opened_file = "";
+        my $full_file_name = "";
+        # Nb: include_dirs already includes "", so it works in the case
+        # where the filename has the full path.
+        foreach my $include_dir (@include_dirs) {
+            my $try_name = $include_dir . $src_file;
+            if (open(INPUTFILE, "< $try_name")) {
+                $opened_file    = $try_name;
+                $full_file_name = ($include_dir eq "" 
+                                  ? $src_file 
+                                  : "$include_dir + $src_file"); 
+                last;
+            }
+        }
+        
+        if (not $opened_file) {
+            # Failed to open the file.  If chosen on the command line, die.
+            # If arose from auto-annotation, print a little message.
+            if (defined $user_ann_files{$src_file}) {
+                die("File $src_file not opened in any of: @include_dirs\n");
+
+            } else {
+                push(@unfound_auto_annotate_files, $src_file);
+            }
+
+        } else {
+            # File header (distinguish between user- and auto-selected files).
+            print("$fancy");
+            my $ann_type = 
+                (defined $user_ann_files{$src_file} ? "User" : "Auto");
+            print("-- $ann_type-annotated source: $full_file_name\n");
+            print("$fancy");
+
+            # Get file's CCs
+            my $src_file_CCs = $all_ind_CCs{$src_file};
+            if (!defined $src_file_CCs) {
+                print("  No information has been collected for $src_file\n\n");
+                next LOOP;
+            }
+        
+            $did_annotations = 1;
+            
+            # Numeric, not lexicographic sort!
+            my @line_nums = sort {$a <=> $b} keys %$src_file_CCs;  
+
+            # If $src_file more recent than cachegrind.out, issue warning
+            my $src_more_recent_than_inputfile = 0;
+            if ((stat $opened_file)[9] > (stat $input_file)[9]) {
+                $src_more_recent_than_inputfile = 1;
+                warning_on_src_more_recent_than_inputfile($src_file);
+            }
+
+            # Work out the size of each column for printing
+            my $CC_col_widths = compute_CC_col_widths(values %$src_file_CCs);
+
+            # Events header
+            print_events($CC_col_widths);
+            print("\n\n");
+
+            # Shift out 0 if it's in the line numbers (from unknown entries,
+            # likely due to bugs in Valgrind's stabs debug info reader)
+            shift(@line_nums) if (0 == $line_nums[0]);
+
+            # Finds interesting line ranges -- all lines with a CC, and all
+            # lines within $context lines of a line with a CC.
+            my $n = @line_nums;
+            my @pairs;
+            for (my $i = 0; $i < $n; $i++) {
+                push(@pairs, $line_nums[$i] - $context);   # lower marker
+                while ($i < $n-1 && 
+                       $line_nums[$i] + 2*$context >= $line_nums[$i+1]) {
+                    $i++;
+                }
+                push(@pairs, $line_nums[$i] + $context);   # upper marker
+            }
+
+            # Annotate chosen lines, tracking total counts of lines printed
+            $pairs[0] = 1 if ($pairs[0] < 1);
+            while (@pairs) {
+                my $low  = shift @pairs;
+                my $high = shift @pairs;
+                while ($. < $low-1) {
+                    my $tmp = <INPUTFILE>;
+                    last unless (defined $tmp);     # hack to detect EOF
+                }
+                my $src_line;
+                # Print line number, unless start of file
+                print("-- line $low " . '-' x 40 . "\n") if ($low != 1);
+                while (($. < $high) && ($src_line = <INPUTFILE>)) {
+                    if (defined $line_nums[0] && $. == $line_nums[0]) {
+                        print_CC($src_file_CCs->{$.}, $CC_col_widths);
+                        add_array_a_to_b($src_file_CCs->{$.}, 
+                                         $printed_totals_CC);
+                        shift(@line_nums);
+
+                    } else {
+                        print_CC( [], $CC_col_widths);
+                    }
+
+                    print(" $src_line");
+                }
+                # Print line number, unless EOF
+                if ($src_line) {
+                    print("-- line $high " . '-' x 40 . "\n");
+                } else {
+                    last;
+                }
+            }
+
+            # If there was info on lines past the end of the file...
+            if (@line_nums) {
+                foreach my $line_num (@line_nums) {
+                    print_CC($src_file_CCs->{$line_num}, $CC_col_widths);
+                    print(" <bogus line $line_num>\n");
+                }
+                print("\n");
+                warning_on_nonexistent_lines($src_more_recent_than_inputfile,
+                                             $src_file, \@line_nums);
+            }
+            print("\n");
+
+            # Print summary of counts attributed to file but not to any
+            # particular line (due to incomplete debug info).
+            if ($src_file_CCs->{0}) {
+                print_CC($src_file_CCs->{0}, $CC_col_widths);
+                print(" <counts for unidentified lines in $src_file>\n\n");
+            }
+            
+            close(INPUTFILE);
+        }
+    }
+
+    # Print list of unfound auto-annotate selected files.
+    if (@unfound_auto_annotate_files) {
+        print("$fancy");
+        print("The following files chosen for auto-annotation could not be found:\n");
+        print($fancy);
+        foreach my $f (@unfound_auto_annotate_files) {
+            print("  $f\n");
+        }
+        print("\n");
+    }
+
+    # If we did any annotating, print what proportion of events were covered by
+    # annotated lines above.
+    if ($did_annotations) {
+        my $percent_printed_CC;
+        foreach (my $i = 0; $i < @$summary_CC; $i++) {
+            $percent_printed_CC->[$i] = 
+                sprintf("%.0f", 
+                        $printed_totals_CC->[$i] / $summary_CC->[$i] * 100);
+        }
+        my $pp_CC_col_widths = compute_CC_col_widths($percent_printed_CC);
+        print($fancy);
+        print_events($pp_CC_col_widths);
+        print("\n");
+        print($fancy);
+        print_CC($percent_printed_CC, $pp_CC_col_widths);
+        print(" percentage of events annotated\n\n");
+    }
+}
+
+#----------------------------------------------------------------------------
+# "main()"
+#----------------------------------------------------------------------------
+process_cmd_line();
+read_input_file();
+print_options();
+my $threshold_files = print_summary_and_fn_totals();
+annotate_ann_files($threshold_files);
+
+##--------------------------------------------------------------------##
+##--- end                                           cg_annotate.in ---##
+##--------------------------------------------------------------------##
+
+
diff --git a/cachegrind/cg_annotate.in b/cachegrind/cg_annotate.in
new file mode 100644
index 0000000..31e9506
--- /dev/null
+++ b/cachegrind/cg_annotate.in
@@ -0,0 +1,905 @@
+#! @PERL@
+
+##--------------------------------------------------------------------##
+##--- Cachegrind's annotator.                       cg_annotate.in ---##
+##--------------------------------------------------------------------##
+
+#  This file is part of Cachegrind, a Valgrind tool for cache
+#  profiling programs.
+#
+#  Copyright (C) 2002-2005 Nicholas Nethercote
+#     njn@valgrind.org
+#
+#  This program is free software; you can redistribute it and/or
+#  modify it under the terms of the GNU General Public License as
+#  published by the Free Software Foundation; either version 2 of the
+#  License, or (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful, but
+#  WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#  General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software
+#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+#  02111-1307, USA.
+#
+#  The GNU General Public License is contained in the file COPYING.
+
+#----------------------------------------------------------------------------
+# The file format is simple, basically printing the cost centre for every
+# source line, grouped by files and functions.  The details are in
+# Cachegrind's manual.
+
+#----------------------------------------------------------------------------
+# Performance improvements record, using cachegrind.out for cacheprof, doing no
+# source annotation (irrelevant ones removed):
+#                                                               user time
+# 1. turned off warnings in add_hash_a_to_b()                   3.81 --> 3.48s
+#    [now add_array_a_to_b()]
+# 6. make line_to_CC() return a ref instead of a hash           3.01 --> 2.77s
+#
+#10. changed file format to avoid file/fn name repetition       2.40s
+#    (not sure why higher;  maybe due to new '.' entries?)
+#11. changed file format to drop unnecessary end-line "."s      2.36s
+#    (shrunk file by about 37%)
+#12. switched from hash CCs to array CCs                        1.61s
+#13. only adding b[i] to a[i] if b[i] defined (was doing it if
+#    either a[i] or b[i] was defined, but if b[i] was undefined
+#    it just added 0)                                           1.48s
+#14. Stopped converting "." entries to undef and then back      1.16s
+#15. Using foreach $i (x..y) instead of for ($i = 0...) in
+#    add_array_a_to_b()                                         1.11s
+#
+# Auto-annotating primes:
+#16. Finding count lengths by int((length-1)/3), not by
+#    commifying (halves the number of commify calls)            1.68s --> 1.47s
+
+use warnings;
+use strict;
+
+#----------------------------------------------------------------------------
+# Overview: the running example in the comments is for:
+#   - events = A,B,C,D
+#   - --show=C,A,D
+#   - --sort=D,C
+#----------------------------------------------------------------------------
+
+#----------------------------------------------------------------------------
+# Global variables, main data structures
+#----------------------------------------------------------------------------
+# CCs are arrays, the counts corresponding to @events, with 'undef'
+# representing '.'.  This makes things fast (faster than using hashes for CCs)
+# but we have to use @sort_order and @show_order below to handle the --sort and
+# --show options, which is a bit tricky.
+#----------------------------------------------------------------------------
+
+# Total counts for summary (an array reference).
+my $summary_CC;
+
+# Totals for each function, for overall summary.
+# hash(filename:fn_name => CC array)
+my %fn_totals;
+
+# Individual CCs, organised by filename and line_num for easy annotation.
+# hash(filename => hash(line_num => CC array))
+my %all_ind_CCs;
+
+# Files chosen for annotation on the command line.  
+# key = basename (trimmed of any directory), value = full filename
+my %user_ann_files;
+
+# Generic description string.
+my $desc = "";
+
+# Command line of profiled program.
+my $cmd;
+
+# Events in input file, eg. (A,B,C,D)
+my @events;
+
+# Events to show, from command line, eg. (C,A,D)
+my @show_events;
+
+# Map from @show_events indices to @events indices, eg. (2,0,3).  Gives the
+# order in which we must traverse @events in order to show the @show_events, 
+# eg. (@events[$show_order[1]], @events[$show_order[2]]...) = @show_events.
+# (Might help to think of it like a hash (0 => 2, 1 => 0, 2 => 3).)
+my @show_order;
+
+# Print out the function totals sorted by these events, eg. (D,C).
+my @sort_events;
+
+# Map from @sort_events indices to @events indices, eg. (3,2).  Same idea as
+# for @show_order.
+my @sort_order;
+
+# Thresholds, one for each sort event (or default to 1 if no sort events
+# specified).  We print out functions and do auto-annotations until we've
+# handled this proportion of all the events thresholded.
+my @thresholds;
+
+my $default_threshold = 99;
+
+my $single_threshold  = $default_threshold;
+
+# If on, automatically annotates all files that are involved in getting over
+# all the threshold counts.
+my $auto_annotate = 0;
+
+# Number of lines to show around each annotated line.
+my $context = 8;
+
+# Directories in which to look for annotation files.
+my @include_dirs = ("");
+
+# Input file name
+my $input_file = undef;
+
+# Version number
+my $version = "@VERSION@";
+
+# Usage message.
+my $usage = <<END
+usage: cg_annotate [options] output-file [source-files]
+
+  options for the user, with defaults in [ ], are:
+    -h --help             show this message
+    -v --version          show version
+    --show=A,B,C          only show figures for events A,B,C [all]
+    --sort=A,B,C          sort columns by events A,B,C [event column order]
+    --threshold=<0--100>  percentage of counts (of primary sort event) we
+                          are interested in [$default_threshold%]
+    --auto=yes|no         annotate all source files containing functions
+                          that helped reach the event count threshold [no]
+    --context=N           print N lines of context before and after
+                          annotated lines [8]
+    -I<d> --include=<d>   add <d> to list of directories to search for 
+                          source files
+
+  cg_annotate is Copyright (C) 2002-2007 Nicholas Nethercote.
+  and licensed under the GNU General Public License, version 2.
+  Bug reports, feedback, admiration, abuse, etc, to: njn\@valgrind.org.
+                                                
+END
+;
+
+# Used in various places of output.
+my $fancy = '-' x 80 . "\n";
+
+#-----------------------------------------------------------------------------
+# Argument and option handling
+#-----------------------------------------------------------------------------
+sub process_cmd_line() 
+{
+    for my $arg (@ARGV) { 
+
+        # Option handling
+        if ($arg =~ /^-/) {
+
+            # --version
+            if ($arg =~ /^-v$|^--version$/) {
+                die("cg_annotate-$version\n");
+
+            # --show=A,B,C
+            } elsif ($arg =~ /^--show=(.*)$/) {
+                @show_events = split(/,/, $1);
+
+            # --sort=A,B,C
+            #   Nb: You can specify thresholds individually, eg.
+            #   --sort=A:99,B:95,C:90.  These will override any --threshold
+            #   argument.
+            } elsif ($arg =~ /^--sort=(.*)$/) {
+                @sort_events = split(/,/, $1);
+                my $th_specified = 0;
+                foreach my $i (0 .. scalar @sort_events - 1) {
+                    if ($sort_events[$i] =~ /.*:([\d\.]+)%?$/) {
+                        my $th = $1;
+                        ($th >= 0 && $th <= 100) or die($usage);
+                        $sort_events[$i] =~ s/:.*//;
+                        $thresholds[$i] = $th;
+                        $th_specified = 1;
+                    } else {
+                        $thresholds[$i] = 0;
+                    }
+                }
+                if (not $th_specified) {
+                    @thresholds = ();
+                }
+
+            # --threshold=X (tolerates a trailing '%')
+            } elsif ($arg =~ /^--threshold=([\d\.]+)%?$/) {
+                $single_threshold = $1;
+                ($1 >= 0 && $1 <= 100) or die($usage);
+
+            # --auto=yes|no
+            } elsif ($arg =~ /^--auto=yes$/) {
+                $auto_annotate = 1;
+            } elsif ($arg =~ /^--auto=no$/) {
+                $auto_annotate = 0;
+
+            # --context=N
+            } elsif ($arg =~ /^--context=([\d\.]+)$/) {
+                $context = $1;
+                if ($context < 0) {
+                    die($usage);
+                }
+
+            # We don't handle "-I name" -- there can be no space.
+            } elsif ($arg =~ /^-I$/) {
+                die("Sorry, no space is allowed after a -I flag\n");
+            
+            # --include=A,B,C.  Allow -I=name for backwards compatibility.
+            } elsif ($arg =~ /^(-I=|-I|--include=)(.*)$/) {
+                my $inc = $2;
+                $inc =~ s|/$||;         # trim trailing '/'
+                push(@include_dirs, "$inc/");
+
+            } else {            # -h and --help fall under this case
+                die($usage);
+            }
+
+        # Argument handling -- annotation file checking and selection.
+        # Stick filenames into a hash for quick 'n easy lookup throughout.
+        } else {
+            if (not defined $input_file) {
+                # First non-option argument is the output file.
+                $input_file = $arg;
+            } else {
+                # Subsequent non-option arguments are source files.
+                my $readable = 0;
+                foreach my $include_dir (@include_dirs) {
+                    if (-r $include_dir . $arg) {
+                        $readable = 1;
+                    }
+                }
+                $readable or die("File $arg not found in any of: @include_dirs\n");
+                $user_ann_files{$arg} = 1;
+            }
+        }
+    }
+
+    # Must have chosen an input file
+    if (not defined $input_file) {
+        die($usage);
+    }
+}
+
+#-----------------------------------------------------------------------------
+# Reading of input file
+#-----------------------------------------------------------------------------
+sub max ($$) 
+{
+    my ($x, $y) = @_;
+    return ($x > $y ? $x : $y);
+}
+
+# Add the two arrays;  any '.' entries are ignored.  Two tricky things:
+# 1. If $a2->[$i] is undefined, it defaults to 0 which is what we want; we turn
+#    off warnings to allow this.  This makes things about 10% faster than
+#    checking for definedness ourselves.
+# 2. We don't add an undefined count or a ".", even though it's value is 0,
+#    because we don't want to make an $a2->[$i] that is undef become 0
+#    unnecessarily.
+sub add_array_a_to_b ($$) 
+{
+    my ($a1, $a2) = @_;
+
+    my $n = max(scalar @$a1, scalar @$a2);
+    $^W = 0;
+    foreach my $i (0 .. $n-1) {
+        $a2->[$i] += $a1->[$i] if (defined $a1->[$i] && "." ne $a1->[$i]);
+    }
+    $^W = 1;
+}
+
+# Add each event count to the CC array.  '.' counts become undef, as do
+# missing entries (implicitly).
+sub line_to_CC ($)
+{
+    my @CC = (split /\s+/, $_[0]);
+    (@CC <= @events) or die("Line $.: too many event counts\n");
+    return \@CC;
+}
+
+sub read_input_file() 
+{
+    open(INPUTFILE, "< $input_file") 
+         || die "Cannot open $input_file for reading\n";
+
+    # Read "desc:" lines.
+    my $line;
+    while ($line = <INPUTFILE>) {
+        if ($line =~ s/desc:\s+//) {
+            $desc .= $line;
+        } else {
+            last;
+        }
+    }
+
+    # Read "cmd:" line (Nb: will already be in $line from "desc:" loop above).
+    ($line =~ s/^cmd:\s+//) or die("Line $.: missing command line\n");
+    $cmd = $line;
+    chomp($cmd);    # Remove newline
+
+    # Read "events:" line.  We make a temporary hash in which the Nth event's
+    # value is N, which is useful for handling --show/--sort options below.
+    $line = <INPUTFILE>;
+    (defined $line && $line =~ s/^events:\s+//) 
+        or die("Line $.: missing events line\n");
+    @events = split(/\s+/, $line);
+    my %events;
+    my $n = 0;
+    foreach my $event (@events) {
+        $events{$event} = $n;
+        $n++
+    }
+
+    # If no --show arg give, default to showing all events in the file.
+    # If --show option is used, check all specified events appeared in the
+    # "events:" line.  Then initialise @show_order.
+    if (@show_events) {
+        foreach my $show_event (@show_events) {
+            (defined $events{$show_event}) or 
+                die("--show event `$show_event' did not appear in input\n");
+        }
+    } else {
+        @show_events = @events;
+    }
+    foreach my $show_event (@show_events) {
+        push(@show_order, $events{$show_event});
+    }
+
+    # Do as for --show, but if no --sort arg given, default to sorting by
+    # column order (ie. first column event is primary sort key, 2nd column is
+    # 2ndary key, etc).
+    if (@sort_events) {
+        foreach my $sort_event (@sort_events) {
+            (defined $events{$sort_event}) or 
+                die("--sort event `$sort_event' did not appear in input\n");
+        }
+    } else {
+        @sort_events = @events;
+    }
+    foreach my $sort_event (@sort_events) {
+        push(@sort_order, $events{$sort_event});
+    }
+
+    # If multiple threshold args weren't given via --sort, stick in the single
+    # threshold (either from --threshold if used, or the default otherwise) for
+    # the primary sort event, and 0% for the rest.
+    if (not @thresholds) {
+        foreach my $e (@sort_order) {
+            push(@thresholds, 0);
+        }
+        $thresholds[0] = $single_threshold;
+    }
+
+    my $curr_file;
+    my $curr_fn;
+    my $curr_name;
+
+    my $curr_fn_CC = [];
+    my $curr_file_ind_CCs = {};     # hash(line_num => CC)
+
+    # Read body of input file.
+    while (<INPUTFILE>) {
+        s/#.*$//;   # remove comments
+        if (s/^(\d+)\s+//) {
+            my $line_num = $1;
+            my $CC = line_to_CC($_);
+            add_array_a_to_b($CC, $curr_fn_CC);
+            
+            # If curr_file is selected, add CC to curr_file list.  We look for
+            # full filename matches;  or, if auto-annotating, we have to
+            # remember everything -- we won't know until the end what's needed.
+            if ($auto_annotate || defined $user_ann_files{$curr_file}) {
+                my $tmp = $curr_file_ind_CCs->{$line_num};
+                $tmp = [] unless defined $tmp;
+                add_array_a_to_b($CC, $tmp);
+                $curr_file_ind_CCs->{$line_num} = $tmp;
+            }
+
+        } elsif (s/^fn=(.*)$//) {
+            # Commit result from previous function
+            $fn_totals{$curr_name} = $curr_fn_CC if (defined $curr_name);
+
+            # Setup new one
+            $curr_fn = $1;
+            $curr_name = "$curr_file:$curr_fn";
+            $curr_fn_CC = $fn_totals{$curr_name};
+            $curr_fn_CC = [] unless (defined $curr_fn_CC);
+
+        } elsif (s/^fl=(.*)$//) {
+            $all_ind_CCs{$curr_file} = $curr_file_ind_CCs 
+                if (defined $curr_file);
+
+            $curr_file = $1;
+            $curr_file_ind_CCs = $all_ind_CCs{$curr_file};
+            $curr_file_ind_CCs = {} unless (defined $curr_file_ind_CCs);
+
+        } elsif (s/^\s*$//) {
+            # blank, do nothing
+        
+        } elsif (s/^summary:\s+//) {
+            # Finish up handling final filename/fn_name counts
+            $fn_totals{"$curr_file:$curr_fn"} = $curr_fn_CC 
+                if (defined $curr_file && defined $curr_fn);
+            $all_ind_CCs{$curr_file} = 
+                $curr_file_ind_CCs if (defined $curr_file);
+
+            $summary_CC = line_to_CC($_);
+            (scalar(@$summary_CC) == @events) 
+                or die("Line $.: summary event and total event mismatch\n");
+
+        } else {
+            warn("WARNING: line $. malformed, ignoring\n");
+        }
+    }
+
+    # Check if summary line was present
+    if (not defined $summary_CC) {
+        die("missing final summary line, aborting\n");
+    }
+
+    close(INPUTFILE);
+}
+
+#-----------------------------------------------------------------------------
+# Print options used
+#-----------------------------------------------------------------------------
+sub print_options ()
+{
+    print($fancy);
+    print($desc);
+    print("Command:          $cmd\n");
+    print("Data file:        $input_file\n");
+    print("Events recorded:  @events\n");
+    print("Events shown:     @show_events\n");
+    print("Event sort order: @sort_events\n");
+    print("Thresholds:       @thresholds\n");
+
+    my @include_dirs2 = @include_dirs;  # copy @include_dirs
+    shift(@include_dirs2);       # remove "" entry, which is always the first
+    unshift(@include_dirs2, "") if (0 == @include_dirs2); 
+    my $include_dir = shift(@include_dirs2);
+    print("Include dirs:     $include_dir\n");
+    foreach my $include_dir (@include_dirs2) {
+        print("                  $include_dir\n");
+    }
+
+    my @user_ann_files = keys %user_ann_files;
+    unshift(@user_ann_files, "") if (0 == @user_ann_files); 
+    my $user_ann_file = shift(@user_ann_files);
+    print("User annotated:   $user_ann_file\n");
+    foreach $user_ann_file (@user_ann_files) {
+        print("                  $user_ann_file\n");
+    }
+
+    my $is_on = ($auto_annotate ? "on" : "off");
+    print("Auto-annotation:  $is_on\n");
+    print("\n");
+}
+
+#-----------------------------------------------------------------------------
+# Print summary and sorted function totals
+#-----------------------------------------------------------------------------
+sub mycmp ($$) 
+{
+    my ($c, $d) = @_;
+
+    # Iterate through sort events (eg. 3,2); return result if two are different
+    foreach my $i (@sort_order) {
+        my ($x, $y);
+        $x = $c->[$i];
+        $y = $d->[$i];
+        $x = -1 unless defined $x;
+        $y = -1 unless defined $y;
+
+        my $cmp = $y <=> $x;        # reverse sort
+        if (0 != $cmp) {
+            return $cmp;
+        }
+    }
+    # Exhausted events, equal
+    return 0;
+}
+
+sub commify ($) {
+    my ($val) = @_;
+    1 while ($val =~ s/^(\d+)(\d{3})/$1,$2/);
+    return $val;
+}
+
+# Because the counts can get very big, and we don't want to waste screen space
+# and make lines too long, we compute exactly how wide each column needs to be
+# by finding the widest entry for each one.
+sub compute_CC_col_widths (@) 
+{
+    my @CCs = @_;
+    my $CC_col_widths = [];
+
+    # Initialise with minimum widths (from event names)
+    foreach my $event (@events) {
+        push(@$CC_col_widths, length($event));
+    }
+    
+    # Find maximum width count for each column.  @CC_col_width positions
+    # correspond to @CC positions.
+    foreach my $CC (@CCs) {
+        foreach my $i (0 .. scalar(@$CC)-1) {
+            if (defined $CC->[$i]) {
+                # Find length, accounting for commas that will be added
+                my $length = length $CC->[$i];
+                my $clength = $length + int(($length - 1) / 3);
+                $CC_col_widths->[$i] = max($CC_col_widths->[$i], $clength); 
+            }
+        }
+    }
+    return $CC_col_widths;
+}
+
+# Print the CC with each column's size dictated by $CC_col_widths.
+sub print_CC ($$) 
+{
+    my ($CC, $CC_col_widths) = @_;
+
+    foreach my $i (@show_order) {
+        my $count = (defined $CC->[$i] ? commify($CC->[$i]) : ".");
+        my $space = ' ' x ($CC_col_widths->[$i] - length($count));
+        print("$space$count ");
+    }
+}
+
+sub print_events ($)
+{
+    my ($CC_col_widths) = @_;
+
+    foreach my $i (@show_order) { 
+        my $event       = $events[$i];
+        my $event_width = length($event);
+        my $col_width   = $CC_col_widths->[$i];
+        my $space       = ' ' x ($col_width - $event_width);
+        print("$space$event ");
+    }
+}
+
+# Prints summary and function totals (with separate column widths, so that
+# function names aren't pushed over unnecessarily by huge summary figures).
+# Also returns a hash containing all the files that are involved in getting the
+# events count above the thresholds (ie. all the interesting ones).
+sub print_summary_and_fn_totals ()
+{
+    my @fn_fullnames = keys   %fn_totals;
+
+    # Work out the size of each column for printing (summary and functions
+    # separately).
+    my $summary_CC_col_widths = compute_CC_col_widths($summary_CC);
+    my      $fn_CC_col_widths = compute_CC_col_widths(values %fn_totals);
+
+    # Header and counts for summary
+    print($fancy);
+    print_events($summary_CC_col_widths);
+    print("\n");
+    print($fancy);
+    print_CC($summary_CC, $summary_CC_col_widths);
+    print(" PROGRAM TOTALS\n");
+    print("\n");
+
+    # Header for functions
+    print($fancy);
+    print_events($fn_CC_col_widths);
+    print(" file:function\n");
+    print($fancy);
+
+    # Sort function names into order dictated by --sort option.
+    @fn_fullnames = sort {
+        mycmp($fn_totals{$a}, $fn_totals{$b})
+    } @fn_fullnames;
+
+
+    # Assertion
+    (scalar @sort_order == scalar @thresholds) or 
+        die("sort_order length != thresholds length:\n",
+            "  @sort_order\n  @thresholds\n");
+
+    my $threshold_files       = {};
+    # @curr_totals has the same shape as @sort_order and @thresholds
+    my @curr_totals = ();
+    foreach my $e (@thresholds) {
+        push(@curr_totals, 0);
+    }
+
+    # Print functions, stopping when the threshold has been reached.
+    foreach my $fn_name (@fn_fullnames) {
+
+        # Stop when we've reached all the thresholds
+        my $reached_all_thresholds = 1;
+        foreach my $i (0 .. scalar @thresholds - 1) {
+            my $prop = $curr_totals[$i] * 100 / $summary_CC->[$sort_order[$i]];
+            $reached_all_thresholds &&= ($prop >= $thresholds[$i]);
+        }
+        last if $reached_all_thresholds;
+
+        # Print function results
+        my $fn_CC = $fn_totals{$fn_name};
+        print_CC($fn_CC, $fn_CC_col_widths);
+        print(" $fn_name\n");
+
+        # Update the threshold counts
+        my $filename = $fn_name;
+        $filename =~ s/:.+$//;    # remove function name
+        $threshold_files->{$filename} = 1;
+        foreach my $i (0 .. scalar @sort_order - 1) {
+            $curr_totals[$i] += $fn_CC->[$sort_order[$i]] 
+                if (defined $fn_CC->[$sort_order[$i]]);
+        }
+    }
+    print("\n");
+
+    return $threshold_files;
+}
+
+#-----------------------------------------------------------------------------
+# Annotate selected files
+#-----------------------------------------------------------------------------
+
+# Issue a warning that the source file is more recent than the input file. 
+sub warning_on_src_more_recent_than_inputfile ($)
+{
+    my $src_file = $_[0];
+
+    my $warning = <<END
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@ Source file '$src_file' is more recent than input file '$input_file'.
+@ Annotations may not be correct.
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+END
+;
+    print($warning);
+}
+
+# If there is information about lines not in the file, issue a warning
+# explaining possible causes.
+sub warning_on_nonexistent_lines ($$$)
+{
+    my ($src_more_recent_than_inputfile, $src_file, $excess_line_nums) = @_;
+    my $cause_and_solution;
+
+    if ($src_more_recent_than_inputfile) {
+        $cause_and_solution = <<END
+@@ cause:    '$src_file' has changed since information was gathered.
+@@           If so, a warning will have already been issued about this.
+@@ solution: Recompile program and rerun under "valgrind --cachesim=yes" to 
+@@           gather new information.
+END
+    # We suppress warnings about .h files
+    } elsif ($src_file =~ /\.h$/) {
+        $cause_and_solution = <<END
+@@ cause:    bug in the Valgrind's debug info reader that screws up with .h
+@@           files sometimes
+@@ solution: none, sorry
+END
+    } else {
+        $cause_and_solution = <<END
+@@ cause:    not sure, sorry
+END
+    }
+
+    my $warning = <<END
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@@
+@@ Information recorded about lines past the end of '$src_file'.
+@@
+@@ Probable cause and solution:
+$cause_and_solution@@
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+END
+;
+    print($warning);
+}
+
+sub annotate_ann_files($)
+{
+    my ($threshold_files) = @_; 
+
+    my %all_ann_files;
+    my @unfound_auto_annotate_files;
+    my $printed_totals_CC = [];
+
+    # If auto-annotating, add interesting files (but not "???")
+    if ($auto_annotate) {
+        delete $threshold_files->{"???"};
+        %all_ann_files = (%user_ann_files, %$threshold_files) 
+    } else {
+        %all_ann_files = %user_ann_files;
+    }
+
+    # Track if we did any annotations.
+    my $did_annotations = 0;
+
+    LOOP:
+    foreach my $src_file (keys %all_ann_files) {
+
+        my $opened_file = "";
+        my $full_file_name = "";
+        # Nb: include_dirs already includes "", so it works in the case
+        # where the filename has the full path.
+        foreach my $include_dir (@include_dirs) {
+            my $try_name = $include_dir . $src_file;
+            if (open(INPUTFILE, "< $try_name")) {
+                $opened_file    = $try_name;
+                $full_file_name = ($include_dir eq "" 
+                                  ? $src_file 
+                                  : "$include_dir + $src_file"); 
+                last;
+            }
+        }
+        
+        if (not $opened_file) {
+            # Failed to open the file.  If chosen on the command line, die.
+            # If arose from auto-annotation, print a little message.
+            if (defined $user_ann_files{$src_file}) {
+                die("File $src_file not opened in any of: @include_dirs\n");
+
+            } else {
+                push(@unfound_auto_annotate_files, $src_file);
+            }
+
+        } else {
+            # File header (distinguish between user- and auto-selected files).
+            print("$fancy");
+            my $ann_type = 
+                (defined $user_ann_files{$src_file} ? "User" : "Auto");
+            print("-- $ann_type-annotated source: $full_file_name\n");
+            print("$fancy");
+
+            # Get file's CCs
+            my $src_file_CCs = $all_ind_CCs{$src_file};
+            if (!defined $src_file_CCs) {
+                print("  No information has been collected for $src_file\n\n");
+                next LOOP;
+            }
+        
+            $did_annotations = 1;
+            
+            # Numeric, not lexicographic sort!
+            my @line_nums = sort {$a <=> $b} keys %$src_file_CCs;  
+
+            # If $src_file more recent than cachegrind.out, issue warning
+            my $src_more_recent_than_inputfile = 0;
+            if ((stat $opened_file)[9] > (stat $input_file)[9]) {
+                $src_more_recent_than_inputfile = 1;
+                warning_on_src_more_recent_than_inputfile($src_file);
+            }
+
+            # Work out the size of each column for printing
+            my $CC_col_widths = compute_CC_col_widths(values %$src_file_CCs);
+
+            # Events header
+            print_events($CC_col_widths);
+            print("\n\n");
+
+            # Shift out 0 if it's in the line numbers (from unknown entries,
+            # likely due to bugs in Valgrind's stabs debug info reader)
+            shift(@line_nums) if (0 == $line_nums[0]);
+
+            # Finds interesting line ranges -- all lines with a CC, and all
+            # lines within $context lines of a line with a CC.
+            my $n = @line_nums;
+            my @pairs;
+            for (my $i = 0; $i < $n; $i++) {
+                push(@pairs, $line_nums[$i] - $context);   # lower marker
+                while ($i < $n-1 && 
+                       $line_nums[$i] + 2*$context >= $line_nums[$i+1]) {
+                    $i++;
+                }
+                push(@pairs, $line_nums[$i] + $context);   # upper marker
+            }
+
+            # Annotate chosen lines, tracking total counts of lines printed
+            $pairs[0] = 1 if ($pairs[0] < 1);
+            while (@pairs) {
+                my $low  = shift @pairs;
+                my $high = shift @pairs;
+                while ($. < $low-1) {
+                    my $tmp = <INPUTFILE>;
+                    last unless (defined $tmp);     # hack to detect EOF
+                }
+                my $src_line;
+                # Print line number, unless start of file
+                print("-- line $low " . '-' x 40 . "\n") if ($low != 1);
+                while (($. < $high) && ($src_line = <INPUTFILE>)) {
+                    if (defined $line_nums[0] && $. == $line_nums[0]) {
+                        print_CC($src_file_CCs->{$.}, $CC_col_widths);
+                        add_array_a_to_b($src_file_CCs->{$.}, 
+                                         $printed_totals_CC);
+                        shift(@line_nums);
+
+                    } else {
+                        print_CC( [], $CC_col_widths);
+                    }
+
+                    print(" $src_line");
+                }
+                # Print line number, unless EOF
+                if ($src_line) {
+                    print("-- line $high " . '-' x 40 . "\n");
+                } else {
+                    last;
+                }
+            }
+
+            # If there was info on lines past the end of the file...
+            if (@line_nums) {
+                foreach my $line_num (@line_nums) {
+                    print_CC($src_file_CCs->{$line_num}, $CC_col_widths);
+                    print(" <bogus line $line_num>\n");
+                }
+                print("\n");
+                warning_on_nonexistent_lines($src_more_recent_than_inputfile,
+                                             $src_file, \@line_nums);
+            }
+            print("\n");
+
+            # Print summary of counts attributed to file but not to any
+            # particular line (due to incomplete debug info).
+            if ($src_file_CCs->{0}) {
+                print_CC($src_file_CCs->{0}, $CC_col_widths);
+                print(" <counts for unidentified lines in $src_file>\n\n");
+            }
+            
+            close(INPUTFILE);
+        }
+    }
+
+    # Print list of unfound auto-annotate selected files.
+    if (@unfound_auto_annotate_files) {
+        print("$fancy");
+        print("The following files chosen for auto-annotation could not be found:\n");
+        print($fancy);
+        foreach my $f (@unfound_auto_annotate_files) {
+            print("  $f\n");
+        }
+        print("\n");
+    }
+
+    # If we did any annotating, print what proportion of events were covered by
+    # annotated lines above.
+    if ($did_annotations) {
+        my $percent_printed_CC;
+        foreach (my $i = 0; $i < @$summary_CC; $i++) {
+            $percent_printed_CC->[$i] = 
+                sprintf("%.0f", 
+                        $printed_totals_CC->[$i] / $summary_CC->[$i] * 100);
+        }
+        my $pp_CC_col_widths = compute_CC_col_widths($percent_printed_CC);
+        print($fancy);
+        print_events($pp_CC_col_widths);
+        print("\n");
+        print($fancy);
+        print_CC($percent_printed_CC, $pp_CC_col_widths);
+        print(" percentage of events annotated\n\n");
+    }
+}
+
+#----------------------------------------------------------------------------
+# "main()"
+#----------------------------------------------------------------------------
+process_cmd_line();
+read_input_file();
+print_options();
+my $threshold_files = print_summary_and_fn_totals();
+annotate_ann_files($threshold_files);
+
+##--------------------------------------------------------------------##
+##--- end                                           cg_annotate.in ---##
+##--------------------------------------------------------------------##
+
+
diff --git a/cachegrind/cg_arch.h b/cachegrind/cg_arch.h
new file mode 100644
index 0000000..9090908
--- /dev/null
+++ b/cachegrind/cg_arch.h
@@ -0,0 +1,50 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Arch-specific declarations.                        cg_arch.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Cachegrind, a Valgrind tool for cache
+   profiling programs.
+
+   Copyright (C) 2002-2009 Nicholas Nethercote
+      njn@valgrind.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __CG_ARCH_H
+#define __CG_ARCH_H
+
+// For cache simulation
+typedef struct {
+   int size;       // bytes
+   int assoc;
+   int line_size;  // bytes
+} cache_t;
+
+// Gives the configuration of I1, D1 and L2 caches.  They get overridden
+// by any cache configurations specified on the command line.
+void VG_(configure_caches)(cache_t* I1c, cache_t* D1c, cache_t* L2c,
+                           Bool all_caches_clo_defined);
+
+#endif   // __CG_ARCH_H
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
diff --git a/cachegrind/cg_branchpred.c b/cachegrind/cg_branchpred.c
new file mode 100644
index 0000000..e19a3d3
--- /dev/null
+++ b/cachegrind/cg_branchpred.c
@@ -0,0 +1,154 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Branch predictor simulation                  cg_branchpred.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Cachegrind, a Valgrind tool for cache
+   profiling programs.
+
+   Copyright (C) 2002-2009 Nicholas Nethercote
+      njn@valgrind.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+
+/* This file contains the actual branch predictor simulator and its
+   associated state.  As with cg_sim.c it is #included directly into
+   cg_main.c.  It provides:
+
+   - a taken/not-taken predictor for conditional branches
+   - a branch target address predictor for indirect branches
+
+   Function return-address prediction is not modelled, on the basis
+   that return stack predictors almost always predict correctly, and
+   also that it is difficult for Valgrind to robustly identify
+   function calls and returns.
+*/
+
+/* How many bits at the bottom of an instruction address are
+   guaranteed to be zero? */
+#if defined(VGA_ppc32) || defined(VGA_ppc64)
+#  define N_IADDR_LO_ZERO_BITS 2
+#elif defined(VGA_x86) || defined(VGA_amd64)
+#  define N_IADDR_LO_ZERO_BITS 0
+#else
+#  error "Unsupported architecture"
+#endif
+
+
+/* Get a taken/not-taken prediction for the instruction (presumably a
+   conditional branch) at instr_addr.  Once that's done, update the
+   predictor state based on whether or not it was actually taken, as
+   indicated by 'taken'.  Finally, return 1 for a mispredict and 0 for
+   a successful predict.
+
+   The predictor is an array of 16k (== 2^14) 2-bit saturating
+   counters.  Given the address of the branch instruction, the array
+   index to use is computed both from the low order bits of the branch
+   instruction's address, and the global history - that is, from the
+   taken/not-taken behaviour of the most recent few branches.  This
+   makes the predictor able to correlate this branch's behaviour with
+   that of other branches. 
+
+   TODO: use predictor written by someone who understands this stuff.
+   Perhaps it would be better to move to a standard GShare predictor
+   and/or tournament predictor.
+*/
+/* The index is composed of N_HIST bits at the top and N_IADD bits at
+   the bottom.  These numbers chosen somewhat arbitrarily, but note
+   that making N_IADD_BITS too small (eg 4) can cause large amounts of
+   aliasing, and hence misprediction, particularly if the history bits
+   are mostly unchanging. */
+#define N_HIST_BITS 7
+#define N_IADD_BITS 7
+
+#define N_BITS     (N_HIST_BITS + N_IADD_BITS)
+#define N_COUNTERS (1 << N_BITS)
+
+static UWord shift_register = 0;   /* Contains global history */
+static UChar counters[N_COUNTERS]; /* Counter array; presumably auto-zeroed */
+
+
+static ULong do_cond_branch_predict ( Addr instr_addr, Word takenW )
+{
+   UWord indx;
+   Bool  predicted_taken, actually_taken, mispredict;
+
+   const UWord hist_mask = (1 << N_HIST_BITS) - 1;
+   const UWord iadd_mask = (1 << N_IADD_BITS) - 1;
+         UWord hist_bits = shift_register & hist_mask;
+         UWord iadd_bits = (instr_addr >> N_IADDR_LO_ZERO_BITS)
+                           & iadd_mask;
+
+   tl_assert(hist_bits <= hist_mask);
+   tl_assert(iadd_bits <= iadd_mask);
+   indx = (hist_bits << N_IADD_BITS) | iadd_bits;
+   tl_assert(indx < N_COUNTERS);
+   if (0) VG_(printf)("index = %d\n", (Int)indx);
+
+   tl_assert(takenW <= 1);
+   predicted_taken = counters[ indx ] >= 2;
+   actually_taken  = takenW > 0;
+
+   mispredict = (actually_taken && (!predicted_taken))
+                || ((!actually_taken) && predicted_taken);
+
+   shift_register <<= 1;
+   shift_register |= (actually_taken ? 1 : 0);
+
+   if (actually_taken) {
+      if (counters[indx] < 3)
+         counters[indx]++;
+   } else {
+      if (counters[indx] > 0)
+         counters[indx]--;
+   }
+
+   tl_assert(counters[indx] <= 3);
+
+   return mispredict ? 1 : 0;
+}
+
+
+/* A very simple indirect branch predictor.  Use the branch's address
+   to index a table which records the previous target address for this
+   branch (or whatever aliased with it) and use that as the
+   prediction. */
+#define N_BTAC_BITS 9
+#define N_BTAC      (1 << N_BTAC_BITS)
+static Addr btac[N_BTAC]; /* BTAC; presumably auto-zeroed */
+
+static ULong do_ind_branch_predict ( Addr instr_addr, Addr actual )
+{
+   Bool mispredict;
+   const UWord mask = (1 << N_BTAC_BITS) - 1;
+         UWord indx = (instr_addr >> N_IADDR_LO_ZERO_BITS) 
+                      & mask;
+   tl_assert(indx < N_BTAC);
+   mispredict = btac[indx] != actual;
+   btac[indx] = actual;
+   return mispredict ? 1 : 0;
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                          cg_branchpred.c ---*/
+/*--------------------------------------------------------------------*/
+
diff --git a/cachegrind/cg_main.c b/cachegrind/cg_main.c
new file mode 100644
index 0000000..6d7ce87
--- /dev/null
+++ b/cachegrind/cg_main.c
@@ -0,0 +1,1767 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Cachegrind: everything but the simulation itself.            ---*/
+/*---                                                    cg_main.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Cachegrind, a Valgrind tool for cache
+   profiling programs.
+
+   Copyright (C) 2002-2009 Nicholas Nethercote
+      njn@valgrind.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "pub_tool_basics.h"
+#include "pub_tool_vki.h"
+#include "pub_tool_debuginfo.h"
+#include "pub_tool_libcbase.h"
+#include "pub_tool_libcassert.h"
+#include "pub_tool_libcfile.h"
+#include "pub_tool_libcprint.h"
+#include "pub_tool_libcproc.h"
+#include "pub_tool_machine.h"
+#include "pub_tool_mallocfree.h"
+#include "pub_tool_options.h"
+#include "pub_tool_oset.h"
+#include "pub_tool_tooliface.h"
+#include "pub_tool_xarray.h"
+#include "pub_tool_clientstate.h"
+#include "pub_tool_machine.h"      // VG_(fnptr_to_fnentry)
+
+#include "cg_arch.h"
+#include "cg_sim.c"
+#include "cg_branchpred.c"
+
+/*------------------------------------------------------------*/
+/*--- Constants                                            ---*/
+/*------------------------------------------------------------*/
+
+/* Set to 1 for very verbose debugging */
+#define DEBUG_CG 0
+
+#define MIN_LINE_SIZE         16
+#define FILE_LEN              VKI_PATH_MAX
+#define FN_LEN                256
+
+/*------------------------------------------------------------*/
+/*--- Options                                              ---*/
+/*------------------------------------------------------------*/
+
+static Bool  clo_cache_sim  = True;  /* do cache simulation? */
+static Bool  clo_branch_sim = False; /* do branch simulation? */
+static Char* clo_cachegrind_out_file = "cachegrind.out.%p";
+
+/*------------------------------------------------------------*/
+/*--- Types and Data Structures                            ---*/
+/*------------------------------------------------------------*/
+
+typedef
+   struct {
+      ULong a;  /* total # memory accesses of this kind */
+      ULong m1; /* misses in the first level cache */
+      ULong m2; /* misses in the second level cache */
+   }
+   CacheCC;
+
+typedef
+   struct {
+      ULong b;  /* total # branches of this kind */
+      ULong mp; /* number of branches mispredicted */
+   }
+   BranchCC;
+
+//------------------------------------------------------------
+// Primary data structure #1: CC table
+// - Holds the per-source-line hit/miss stats, grouped by file/function/line.
+// - an ordered set of CCs.  CC indexing done by file/function/line (as
+//   determined from the instrAddr).
+// - Traversed for dumping stats at end in file/func/line hierarchy.
+
+typedef struct {
+   Char* file;
+   Char* fn;
+   Int   line;
+}
+CodeLoc;
+
+typedef struct {
+   CodeLoc  loc; /* Source location that these counts pertain to */
+   CacheCC  Ir;  /* Insn read counts */
+   CacheCC  Dr;  /* Data read counts */
+   CacheCC  Dw;  /* Data write/modify counts */
+   BranchCC Bc;  /* Conditional branch counts */
+   BranchCC Bi;  /* Indirect branch counts */
+} LineCC;
+
+// First compare file, then fn, then line.
+static Word cmp_CodeLoc_LineCC(const void *vloc, const void *vcc)
+{
+   Word res;
+   CodeLoc* a = (CodeLoc*)vloc;
+   CodeLoc* b = &(((LineCC*)vcc)->loc);
+
+   res = VG_(strcmp)(a->file, b->file);
+   if (0 != res)
+      return res;
+
+   res = VG_(strcmp)(a->fn, b->fn);
+   if (0 != res)
+      return res;
+
+   return a->line - b->line;
+}
+
+static OSet* CC_table;
+
+//------------------------------------------------------------
+// Primary data structure #2: InstrInfo table
+// - Holds the cached info about each instr that is used for simulation.
+// - table(SB_start_addr, list(InstrInfo))
+// - For each SB, each InstrInfo in the list holds info about the
+//   instruction (instrLen, instrAddr, etc), plus a pointer to its line
+//   CC.  This node is what's passed to the simulation function.
+// - When SBs are discarded the relevant list(instr_details) is freed.
+
+typedef struct _InstrInfo InstrInfo;
+struct _InstrInfo {
+   Addr    instr_addr;
+   UChar   instr_len;
+   LineCC* parent;         // parent line-CC
+};
+
+typedef struct _SB_info SB_info;
+struct _SB_info {
+   Addr      SB_addr;      // key;  MUST BE FIRST
+   Int       n_instrs;
+   InstrInfo instrs[0];
+};
+
+static OSet* instrInfoTable;
+
+//------------------------------------------------------------
+// Secondary data structure: string table
+// - holds strings, avoiding dups
+// - used for filenames and function names, each of which will be
+//   pointed to by one or more CCs.
+// - it also allows equality checks just by pointer comparison, which
+//   is good when printing the output file at the end.
+
+static OSet* stringTable;
+
+//------------------------------------------------------------
+// Stats
+static Int  distinct_files      = 0;
+static Int  distinct_fns        = 0;
+static Int  distinct_lines      = 0;
+static Int  distinct_instrs     = 0;
+
+static Int  full_debugs         = 0;
+static Int  file_line_debugs    = 0;
+static Int  fn_debugs           = 0;
+static Int  no_debugs           = 0;
+
+/*------------------------------------------------------------*/
+/*--- String table operations                              ---*/
+/*------------------------------------------------------------*/
+
+static Word stringCmp( const void* key, const void* elem )
+{
+   return VG_(strcmp)(*(Char**)key, *(Char**)elem);
+}
+
+// Get a permanent string;  either pull it out of the string table if it's
+// been encountered before, or dup it and put it into the string table.
+static Char* get_perm_string(Char* s)
+{
+   Char** s_ptr = VG_(OSetGen_Lookup)(stringTable, &s);
+   if (s_ptr) {
+      return *s_ptr;
+   } else {
+      Char** s_node = VG_(OSetGen_AllocNode)(stringTable, sizeof(Char*));
+      *s_node = VG_(strdup)("cg.main.gps.1", s);
+      VG_(OSetGen_Insert)(stringTable, s_node);
+      return *s_node;
+   }
+}
+
+/*------------------------------------------------------------*/
+/*--- CC table operations                                  ---*/
+/*------------------------------------------------------------*/
+
+static void get_debug_info(Addr instr_addr, Char file[FILE_LEN],
+                           Char fn[FN_LEN], Int* line)
+{
+   Char dir[FILE_LEN];
+   Bool found_dirname;
+   Bool found_file_line = VG_(get_filename_linenum)(
+                             instr_addr, 
+                             file, FILE_LEN,
+                             dir,  FILE_LEN, &found_dirname,
+                             line
+                          );
+   Bool found_fn        = VG_(get_fnname)(instr_addr, fn, FN_LEN);
+
+   if (!found_file_line) {
+      VG_(strcpy)(file, "???");
+      *line = 0;
+   }
+   if (!found_fn) {
+      VG_(strcpy)(fn,  "???");
+   }
+
+   if (found_dirname) {
+      // +1 for the '/'.
+      tl_assert(VG_(strlen)(dir) + VG_(strlen)(file) + 1 < FILE_LEN);
+      VG_(strcat)(dir, "/");     // Append '/'
+      VG_(strcat)(dir, file);    // Append file to dir
+      VG_(strcpy)(file, dir);    // Move dir+file to file
+   }
+   
+   if (found_file_line) {
+      if (found_fn) full_debugs++;
+      else          file_line_debugs++;
+   } else {
+      if (found_fn) fn_debugs++;
+      else          no_debugs++;
+   }
+}
+
+// Do a three step traversal: by file, then fn, then line.
+// Returns a pointer to the line CC, creates a new one if necessary.
+static LineCC* get_lineCC(Addr origAddr)
+{
+   Char    file[FILE_LEN], fn[FN_LEN];
+   Int     line;
+   CodeLoc loc;
+   LineCC* lineCC;
+
+   get_debug_info(origAddr, file, fn, &line);
+
+   loc.file = file;
+   loc.fn   = fn;
+   loc.line = line;
+
+   lineCC = VG_(OSetGen_Lookup)(CC_table, &loc);
+   if (!lineCC) {
+      // Allocate and zero a new node.
+      lineCC           = VG_(OSetGen_AllocNode)(CC_table, sizeof(LineCC));
+      lineCC->loc.file = get_perm_string(loc.file);
+      lineCC->loc.fn   = get_perm_string(loc.fn);
+      lineCC->loc.line = loc.line;
+      lineCC->Ir.a     = 0;
+      lineCC->Ir.m1    = 0;
+      lineCC->Ir.m2    = 0;
+      lineCC->Dr.a     = 0;
+      lineCC->Dr.m1    = 0;
+      lineCC->Dr.m2    = 0;
+      lineCC->Dw.a     = 0;
+      lineCC->Dw.m1    = 0;
+      lineCC->Dw.m2    = 0;
+      lineCC->Bc.b     = 0;
+      lineCC->Bc.mp    = 0;
+      lineCC->Bi.b     = 0;
+      lineCC->Bi.mp    = 0;
+      VG_(OSetGen_Insert)(CC_table, lineCC);
+   }
+
+   return lineCC;
+}
+
+/*------------------------------------------------------------*/
+/*--- Cache simulation functions                           ---*/
+/*------------------------------------------------------------*/
+
+static VG_REGPARM(1)
+void log_1I_0D_cache_access(InstrInfo* n)
+{
+   //VG_(printf)("1I_0D :  CCaddr=0x%010lx,  iaddr=0x%010lx,  isize=%lu\n",
+   //             n, n->instr_addr, n->instr_len);
+   cachesim_I1_doref(n->instr_addr, n->instr_len, 
+                     &n->parent->Ir.m1, &n->parent->Ir.m2);
+   n->parent->Ir.a++;
+}
+
+static VG_REGPARM(2)
+void log_2I_0D_cache_access(InstrInfo* n, InstrInfo* n2)
+{
+   //VG_(printf)("2I_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
+   //            "        CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n",
+   //            n,  n->instr_addr,  n->instr_len,
+   //            n2, n2->instr_addr, n2->instr_len);
+   cachesim_I1_doref(n->instr_addr, n->instr_len, 
+                     &n->parent->Ir.m1, &n->parent->Ir.m2);
+   n->parent->Ir.a++;
+   cachesim_I1_doref(n2->instr_addr, n2->instr_len, 
+                     &n2->parent->Ir.m1, &n2->parent->Ir.m2);
+   n2->parent->Ir.a++;
+}
+
+static VG_REGPARM(3)
+void log_3I_0D_cache_access(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
+{
+   //VG_(printf)("3I_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
+   //            "        CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n"
+   //            "        CC3addr=0x%010lx, i3addr=0x%010lx, i3size=%lu\n",
+   //            n,  n->instr_addr,  n->instr_len,
+   //            n2, n2->instr_addr, n2->instr_len,
+   //            n3, n3->instr_addr, n3->instr_len);
+   cachesim_I1_doref(n->instr_addr, n->instr_len, 
+                     &n->parent->Ir.m1, &n->parent->Ir.m2);
+   n->parent->Ir.a++;
+   cachesim_I1_doref(n2->instr_addr, n2->instr_len, 
+                     &n2->parent->Ir.m1, &n2->parent->Ir.m2);
+   n2->parent->Ir.a++;
+   cachesim_I1_doref(n3->instr_addr, n3->instr_len, 
+                     &n3->parent->Ir.m1, &n3->parent->Ir.m2);
+   n3->parent->Ir.a++;
+}
+
+static VG_REGPARM(3)
+void log_1I_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
+{
+   //VG_(printf)("1I_1Dr:  CCaddr=0x%010lx,  iaddr=0x%010lx,  isize=%lu\n"
+   //            "                               daddr=0x%010lx,  dsize=%lu\n",
+   //            n, n->instr_addr, n->instr_len, data_addr, data_size);
+   cachesim_I1_doref(n->instr_addr, n->instr_len, 
+                     &n->parent->Ir.m1, &n->parent->Ir.m2);
+   n->parent->Ir.a++;
+
+   cachesim_D1_doref(data_addr, data_size, 
+                     &n->parent->Dr.m1, &n->parent->Dr.m2);
+   n->parent->Dr.a++;
+}
+
+static VG_REGPARM(3)
+void log_1I_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
+{
+   //VG_(printf)("1I_1Dw:  CCaddr=0x%010lx,  iaddr=0x%010lx,  isize=%lu\n"
+   //            "                               daddr=0x%010lx,  dsize=%lu\n",
+   //            n, n->instr_addr, n->instr_len, data_addr, data_size);
+   cachesim_I1_doref(n->instr_addr, n->instr_len, 
+                     &n->parent->Ir.m1, &n->parent->Ir.m2);
+   n->parent->Ir.a++;
+
+   cachesim_D1_doref(data_addr, data_size, 
+                     &n->parent->Dw.m1, &n->parent->Dw.m2);
+   n->parent->Dw.a++;
+}
+
+static VG_REGPARM(3)
+void log_0I_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
+{
+   //VG_(printf)("0I_1Dr:  CCaddr=0x%010lx,  daddr=0x%010lx,  dsize=%lu\n",
+   //            n, data_addr, data_size);
+   cachesim_D1_doref(data_addr, data_size, 
+                     &n->parent->Dr.m1, &n->parent->Dr.m2);
+   n->parent->Dr.a++;
+}
+
+static VG_REGPARM(3)
+void log_0I_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
+{
+   //VG_(printf)("0I_1Dw:  CCaddr=0x%010lx,  daddr=0x%010lx,  dsize=%lu\n",
+   //            n, data_addr, data_size);
+   cachesim_D1_doref(data_addr, data_size, 
+                     &n->parent->Dw.m1, &n->parent->Dw.m2);
+   n->parent->Dw.a++;
+}
+
+/* For branches, we consult two different predictors, one which
+   predicts taken/untaken for conditional branches, and the other
+   which predicts the branch target address for indirect branches
+   (jump-to-register style ones). */
+
+static VG_REGPARM(2)
+void log_cond_branch(InstrInfo* n, Word taken)
+{
+   //VG_(printf)("cbrnch:  CCaddr=0x%010lx,  taken=0x%010lx\n",
+   //             n, taken);
+   n->parent->Bc.b++;
+   n->parent->Bc.mp 
+      += (1 & do_cond_branch_predict(n->instr_addr, taken));
+}
+
+static VG_REGPARM(2)
+void log_ind_branch(InstrInfo* n, UWord actual_dst)
+{
+   //VG_(printf)("ibrnch:  CCaddr=0x%010lx,    dst=0x%010lx\n",
+   //             n, actual_dst);
+   n->parent->Bi.b++;
+   n->parent->Bi.mp
+      += (1 & do_ind_branch_predict(n->instr_addr, actual_dst));
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Instrumentation types and structures                 ---*/
+/*------------------------------------------------------------*/
+
+/* Maintain an ordered list of memory events which are outstanding, in
+   the sense that no IR has yet been generated to do the relevant
+   helper calls.  The BB is scanned top to bottom and memory events
+   are added to the end of the list, merging with the most recent
+   notified event where possible (Dw immediately following Dr and
+   having the same size and EA can be merged).
+
+   This merging is done so that for architectures which have
+   load-op-store instructions (x86, amd64), the insn is treated as if
+   it makes just one memory reference (a modify), rather than two (a
+   read followed by a write at the same address).
+
+   At various points the list will need to be flushed, that is, IR
+   generated from it.  That must happen before any possible exit from
+   the block (the end, or an IRStmt_Exit).  Flushing also takes place
+   when there is no space to add a new event.
+
+   If we require the simulation statistics to be up to date with
+   respect to possible memory exceptions, then the list would have to
+   be flushed before each memory reference.  That would however lose
+   performance by inhibiting event-merging during flushing.
+
+   Flushing the list consists of walking it start to end and emitting
+   instrumentation IR for each event, in the order in which they
+   appear.  It may be possible to emit a single call for two adjacent
+   events in order to reduce the number of helper function calls made.
+   For example, it could well be profitable to handle two adjacent Ir
+   events with a single helper call.  */
+
+typedef
+   IRExpr 
+   IRAtom;
+
+typedef 
+   enum { 
+      Ev_Ir,  // Instruction read
+      Ev_Dr,  // Data read
+      Ev_Dw,  // Data write
+      Ev_Dm,  // Data modify (read then write)
+      Ev_Bc,  // branch conditional
+      Ev_Bi   // branch indirect (to unknown destination)
+   }
+   EventTag;
+
+typedef
+   struct {
+      EventTag   tag;
+      InstrInfo* inode;
+      union {
+         struct {
+         } Ir;
+         struct {
+            IRAtom* ea;
+            Int     szB;
+         } Dr;
+         struct {
+            IRAtom* ea;
+            Int     szB;
+         } Dw;
+         struct {
+            IRAtom* ea;
+            Int     szB;
+         } Dm;
+         struct {
+            IRAtom* taken; /* :: Ity_I1 */
+         } Bc;
+         struct {
+            IRAtom* dst;
+         } Bi;
+      } Ev;
+   }
+   Event;
+
+static void init_Event ( Event* ev ) {
+   VG_(memset)(ev, 0, sizeof(Event));
+}
+
+static IRAtom* get_Event_dea ( Event* ev ) {
+   switch (ev->tag) {
+      case Ev_Dr: return ev->Ev.Dr.ea;
+      case Ev_Dw: return ev->Ev.Dw.ea;
+      case Ev_Dm: return ev->Ev.Dm.ea;
+      default:    tl_assert(0);
+   }
+}
+
+static Int get_Event_dszB ( Event* ev ) {
+   switch (ev->tag) {
+      case Ev_Dr: return ev->Ev.Dr.szB;
+      case Ev_Dw: return ev->Ev.Dw.szB;
+      case Ev_Dm: return ev->Ev.Dm.szB;
+      default:    tl_assert(0);
+   }
+}
+
+
+/* Up to this many unnotified events are allowed.  Number is
+   arbitrary.  Larger numbers allow more event merging to occur, but
+   potentially induce more spilling due to extending live ranges of
+   address temporaries. */
+#define N_EVENTS 16
+
+
+/* A struct which holds all the running state during instrumentation.
+   Mostly to avoid passing loads of parameters everywhere. */
+typedef
+   struct {
+      /* The current outstanding-memory-event list. */
+      Event events[N_EVENTS];
+      Int   events_used;
+
+      /* The array of InstrInfo bins for the BB. */
+      SB_info* sbInfo;
+
+      /* Number InstrInfo bins 'used' so far. */
+      Int sbInfo_i;
+
+      /* The output SB being constructed. */
+      IRSB* sbOut;
+   }
+   CgState;
+
+
+/*------------------------------------------------------------*/
+/*--- Instrumentation main                                 ---*/
+/*------------------------------------------------------------*/
+
+// Note that origAddr is the real origAddr, not the address of the first
+// instruction in the block (they can be different due to redirection).
+static
+SB_info* get_SB_info(IRSB* sbIn, Addr origAddr)
+{
+   Int      i, n_instrs;
+   IRStmt*  st;
+   SB_info* sbInfo;
+
+   // Count number of original instrs in SB
+   n_instrs = 0;
+   for (i = 0; i < sbIn->stmts_used; i++) {
+      st = sbIn->stmts[i];
+      if (Ist_IMark == st->tag) n_instrs++;
+   }
+
+   // Check that we don't have an entry for this BB in the instr-info table.
+   // If this assertion fails, there has been some screwup:  some
+   // translations must have been discarded but Cachegrind hasn't discarded
+   // the corresponding entries in the instr-info table.
+   sbInfo = VG_(OSetGen_Lookup)(instrInfoTable, &origAddr);
+   tl_assert(NULL == sbInfo);
+
+   // BB never translated before (at this address, at least;  could have
+   // been unloaded and then reloaded elsewhere in memory)
+   sbInfo = VG_(OSetGen_AllocNode)(instrInfoTable,
+                                sizeof(SB_info) + n_instrs*sizeof(InstrInfo)); 
+   sbInfo->SB_addr  = origAddr;
+   sbInfo->n_instrs = n_instrs;
+   VG_(OSetGen_Insert)( instrInfoTable, sbInfo );
+   distinct_instrs++;
+
+   return sbInfo;
+}
+
+
+static void showEvent ( Event* ev )
+{
+   switch (ev->tag) {
+      case Ev_Ir: 
+         VG_(printf)("Ir %p\n", ev->inode);
+         break;
+      case Ev_Dr:
+         VG_(printf)("Dr %p %d EA=", ev->inode, ev->Ev.Dr.szB);
+         ppIRExpr(ev->Ev.Dr.ea); 
+         VG_(printf)("\n");
+         break;
+      case Ev_Dw:
+         VG_(printf)("Dw %p %d EA=", ev->inode, ev->Ev.Dw.szB);
+         ppIRExpr(ev->Ev.Dw.ea); 
+         VG_(printf)("\n");
+         break;
+      case Ev_Dm:
+         VG_(printf)("Dm %p %d EA=", ev->inode, ev->Ev.Dm.szB);
+         ppIRExpr(ev->Ev.Dm.ea); 
+         VG_(printf)("\n");
+         break;
+      case Ev_Bc:
+         VG_(printf)("Bc %p   GA=", ev->inode);
+         ppIRExpr(ev->Ev.Bc.taken); 
+         VG_(printf)("\n");
+         break;
+      case Ev_Bi:
+         VG_(printf)("Bi %p  DST=", ev->inode);
+         ppIRExpr(ev->Ev.Bi.dst); 
+         VG_(printf)("\n");
+         break;
+      default: 
+         tl_assert(0);
+         break;
+   }
+}
+
+// Reserve and initialise an InstrInfo for the first mention of a new insn.
+static
+InstrInfo* setup_InstrInfo ( CgState* cgs, Addr instr_addr, UInt instr_len )
+{
+   InstrInfo* i_node;
+   tl_assert(cgs->sbInfo_i >= 0);
+   tl_assert(cgs->sbInfo_i < cgs->sbInfo->n_instrs);
+   i_node = &cgs->sbInfo->instrs[ cgs->sbInfo_i ];
+   i_node->instr_addr = instr_addr;
+   i_node->instr_len  = instr_len;
+   i_node->parent     = get_lineCC(instr_addr);
+   cgs->sbInfo_i++;
+   return i_node;
+}
+
+
+/* Generate code for all outstanding memory events, and mark the queue
+   empty.  Code is generated into cgs->bbOut, and this activity
+   'consumes' slots in cgs->sbInfo. */
+
+static void flushEvents ( CgState* cgs )
+{
+   Int        i, regparms;
+   Char*      helperName;
+   void*      helperAddr;
+   IRExpr**   argv;
+   IRExpr*    i_node_expr;
+   IRDirty*   di;
+   Event*     ev;
+   Event*     ev2;
+   Event*     ev3;
+
+   i = 0;
+   while (i < cgs->events_used) {
+
+      helperName = NULL;
+      helperAddr = NULL;
+      argv       = NULL;
+      regparms   = 0;
+
+      /* generate IR to notify event i and possibly the ones
+         immediately following it. */
+      tl_assert(i >= 0 && i < cgs->events_used);
+
+      ev  = &cgs->events[i];
+      ev2 = ( i < cgs->events_used-1 ? &cgs->events[i+1] : NULL );
+      ev3 = ( i < cgs->events_used-2 ? &cgs->events[i+2] : NULL );
+      
+      if (DEBUG_CG) {
+         VG_(printf)("   flush "); 
+         showEvent( ev );
+      }
+
+      i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
+
+      /* Decide on helper fn to call and args to pass it, and advance
+         i appropriately. */
+      switch (ev->tag) {
+         case Ev_Ir:
+            /* Merge an Ir with a following Dr/Dm. */
+            if (ev2 && (ev2->tag == Ev_Dr || ev2->tag == Ev_Dm)) {
+               /* Why is this true?  It's because we're merging an Ir
+                  with a following Dr or Dm.  The Ir derives from the
+                  instruction's IMark and the Dr/Dm from data
+                  references which follow it.  In short it holds
+                  because each insn starts with an IMark, hence an
+                  Ev_Ir, and so these Dr/Dm must pertain to the
+                  immediately preceding Ir.  Same applies to analogous
+                  assertions in the subsequent cases. */
+               tl_assert(ev2->inode == ev->inode);
+               helperName = "log_1I_1Dr_cache_access";
+               helperAddr = &log_1I_1Dr_cache_access;
+               argv = mkIRExprVec_3( i_node_expr,
+                                     get_Event_dea(ev2),
+                                     mkIRExpr_HWord( get_Event_dszB(ev2) ) );
+               regparms = 3;
+               i += 2;
+            }
+            /* Merge an Ir with a following Dw. */
+            else
+            if (ev2 && ev2->tag == Ev_Dw) {
+               tl_assert(ev2->inode == ev->inode);
+               helperName = "log_1I_1Dw_cache_access";
+               helperAddr = &log_1I_1Dw_cache_access;
+               argv = mkIRExprVec_3( i_node_expr,
+                                     get_Event_dea(ev2),
+                                     mkIRExpr_HWord( get_Event_dszB(ev2) ) );
+               regparms = 3;
+               i += 2;
+            }
+            /* Merge an Ir with two following Irs. */
+            else
+            if (ev2 && ev3 && ev2->tag == Ev_Ir && ev3->tag == Ev_Ir)
+            {
+               helperName = "log_3I_0D_cache_access";
+               helperAddr = &log_3I_0D_cache_access;
+               argv = mkIRExprVec_3( i_node_expr, 
+                                     mkIRExpr_HWord( (HWord)ev2->inode ), 
+                                     mkIRExpr_HWord( (HWord)ev3->inode ) );
+               regparms = 3;
+               i += 3;
+            }
+            /* Merge an Ir with one following Ir. */
+            else
+            if (ev2 && ev2->tag == Ev_Ir) {
+               helperName = "log_2I_0D_cache_access";
+               helperAddr = &log_2I_0D_cache_access;
+               argv = mkIRExprVec_2( i_node_expr,
+                                     mkIRExpr_HWord( (HWord)ev2->inode ) );
+               regparms = 2;
+               i += 2;
+            }
+            /* No merging possible; emit as-is. */
+            else {
+               helperName = "log_1I_0D_cache_access";
+               helperAddr = &log_1I_0D_cache_access;
+               argv = mkIRExprVec_1( i_node_expr );
+               regparms = 1;
+               i++;
+            }
+            break;
+         case Ev_Dr:
+         case Ev_Dm:
+            /* Data read or modify */
+            helperName = "log_0I_1Dr_cache_access";
+            helperAddr = &log_0I_1Dr_cache_access;
+            argv = mkIRExprVec_3( i_node_expr, 
+                                  get_Event_dea(ev), 
+                                  mkIRExpr_HWord( get_Event_dszB(ev) ) );
+            regparms = 3;
+            i++;
+            break;
+         case Ev_Dw:
+            /* Data write */
+            helperName = "log_0I_1Dw_cache_access";
+            helperAddr = &log_0I_1Dw_cache_access;
+            argv = mkIRExprVec_3( i_node_expr,
+                                  get_Event_dea(ev), 
+                                  mkIRExpr_HWord( get_Event_dszB(ev) ) );
+            regparms = 3;
+            i++;
+            break;
+         case Ev_Bc:
+            /* Conditional branch */
+            helperName = "log_cond_branch";
+            helperAddr = &log_cond_branch;
+            argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken );
+            regparms = 2;
+            i++;
+            break;
+         case Ev_Bi:
+            /* Branch to an unknown destination */
+            helperName = "log_ind_branch";
+            helperAddr = &log_ind_branch;
+            argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst );
+            regparms = 2;
+            i++;
+            break;
+         default:
+            tl_assert(0);
+      }
+
+      /* Add the helper. */
+      tl_assert(helperName);
+      tl_assert(helperAddr);
+      tl_assert(argv);
+      di = unsafeIRDirty_0_N( regparms, 
+                              helperName, VG_(fnptr_to_fnentry)( helperAddr ), 
+                              argv );
+      addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) );
+   }
+
+   cgs->events_used = 0;
+}
+
+static void addEvent_Ir ( CgState* cgs, InstrInfo* inode )
+{
+   Event* evt;
+   if (cgs->events_used == N_EVENTS)
+      flushEvents(cgs);
+   tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
+   evt = &cgs->events[cgs->events_used];
+   init_Event(evt);
+   evt->tag      = Ev_Ir;
+   evt->inode    = inode;
+   cgs->events_used++;
+}
+
+static
+void addEvent_Dr ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
+{
+   Event* evt;
+   tl_assert(isIRAtom(ea));
+   tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE);
+   if (!clo_cache_sim)
+      return;
+   if (cgs->events_used == N_EVENTS)
+      flushEvents(cgs);
+   tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
+   evt = &cgs->events[cgs->events_used];
+   init_Event(evt);
+   evt->tag       = Ev_Dr;
+   evt->inode     = inode;
+   evt->Ev.Dr.szB = datasize;
+   evt->Ev.Dr.ea  = ea;
+   cgs->events_used++;
+}
+
+static
+void addEvent_Dw ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
+{
+   Event* lastEvt;
+   Event* evt;
+
+   tl_assert(isIRAtom(ea));
+   tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE);
+
+   if (!clo_cache_sim)
+      return;
+
+   /* Is it possible to merge this write with the preceding read? */
+   lastEvt = &cgs->events[cgs->events_used-1];
+   if (cgs->events_used > 0
+    && lastEvt->tag       == Ev_Dr
+    && lastEvt->Ev.Dr.szB == datasize
+    && lastEvt->inode     == inode
+    && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
+   {
+      lastEvt->tag   = Ev_Dm;
+      return;
+   }
+
+   /* No.  Add as normal. */
+   if (cgs->events_used == N_EVENTS)
+      flushEvents(cgs);
+   tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
+   evt = &cgs->events[cgs->events_used];
+   init_Event(evt);
+   evt->tag       = Ev_Dw;
+   evt->inode     = inode;
+   evt->Ev.Dw.szB = datasize;
+   evt->Ev.Dw.ea  = ea;
+   cgs->events_used++;
+}
+
+static
+void addEvent_Bc ( CgState* cgs, InstrInfo* inode, IRAtom* guard )
+{
+   Event* evt;
+   tl_assert(isIRAtom(guard));
+   tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, guard) 
+             == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
+   if (!clo_branch_sim)
+      return;
+   if (cgs->events_used == N_EVENTS)
+      flushEvents(cgs);
+   tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
+   evt = &cgs->events[cgs->events_used];
+   init_Event(evt);
+   evt->tag         = Ev_Bc;
+   evt->inode       = inode;
+   evt->Ev.Bc.taken = guard;
+   cgs->events_used++;
+}
+
+static
+void addEvent_Bi ( CgState* cgs, InstrInfo* inode, IRAtom* whereTo )
+{
+   Event* evt;
+   tl_assert(isIRAtom(whereTo));
+   tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, whereTo) 
+             == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
+   if (!clo_branch_sim)
+      return;
+   if (cgs->events_used == N_EVENTS)
+      flushEvents(cgs);
+   tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
+   evt = &cgs->events[cgs->events_used];
+   init_Event(evt);
+   evt->tag       = Ev_Bi;
+   evt->inode     = inode;
+   evt->Ev.Bi.dst = whereTo;
+   cgs->events_used++;
+}
+
+////////////////////////////////////////////////////////////
+
+
+static
+IRSB* cg_instrument ( VgCallbackClosure* closure,
+                      IRSB* sbIn, 
+                      VexGuestLayout* layout, 
+                      VexGuestExtents* vge,
+                      IRType gWordTy, IRType hWordTy )
+{
+   Int        i, isize;
+   IRStmt*    st;
+   Addr64     cia; /* address of current insn */
+   CgState    cgs;
+   IRTypeEnv* tyenv = sbIn->tyenv;
+   InstrInfo* curr_inode = NULL;
+
+   if (gWordTy != hWordTy) {
+      /* We don't currently support this case. */
+      VG_(tool_panic)("host/guest word size mismatch");
+   }
+
+   // Set up new SB
+   cgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
+
+   // Copy verbatim any IR preamble preceding the first IMark
+   i = 0;
+   while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
+      addStmtToIRSB( cgs.sbOut, sbIn->stmts[i] );
+      i++;
+   }
+
+   // Get the first statement, and initial cia from it
+   tl_assert(sbIn->stmts_used > 0);
+   tl_assert(i < sbIn->stmts_used);
+   st = sbIn->stmts[i];
+   tl_assert(Ist_IMark == st->tag);
+
+   cia   = st->Ist.IMark.addr;
+   isize = st->Ist.IMark.len;
+   // If Vex fails to decode an instruction, the size will be zero.
+   // Pretend otherwise.
+   if (isize == 0) isize = VG_MIN_INSTR_SZB;
+
+   // Set up running state and get block info
+   tl_assert(closure->readdr == vge->base[0]);
+   cgs.events_used = 0;
+   cgs.sbInfo      = get_SB_info(sbIn, (Addr)closure->readdr);
+   cgs.sbInfo_i    = 0;
+
+   if (DEBUG_CG)
+      VG_(printf)("\n\n---------- cg_instrument ----------\n");
+
+   // Traverse the block, initialising inodes, adding events and flushing as
+   // necessary.
+   for (/*use current i*/; i < sbIn->stmts_used; i++) {
+
+      st = sbIn->stmts[i];
+      tl_assert(isFlatIRStmt(st));
+
+      switch (st->tag) {
+         case Ist_NoOp:
+         case Ist_AbiHint:
+         case Ist_Put:
+         case Ist_PutI:
+         case Ist_MBE:
+            break;
+
+         case Ist_IMark:
+            cia   = st->Ist.IMark.addr;
+            isize = st->Ist.IMark.len;
+
+            // If Vex fails to decode an instruction, the size will be zero.
+            // Pretend otherwise.
+            if (isize == 0) isize = VG_MIN_INSTR_SZB;
+
+            // Sanity-check size.
+            tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
+                     || VG_CLREQ_SZB == isize );
+
+            // Get space for and init the inode, record it as the current one.
+            // Subsequent Dr/Dw/Dm events from the same instruction will 
+            // also use it.
+            curr_inode = setup_InstrInfo(&cgs, cia, isize);
+
+            addEvent_Ir( &cgs, curr_inode );
+            break;
+
+         case Ist_WrTmp: {
+            IRExpr* data = st->Ist.WrTmp.data;
+            if (data->tag == Iex_Load) {
+               IRExpr* aexpr = data->Iex.Load.addr;
+               // Note also, endianness info is ignored.  I guess
+               // that's not interesting.
+               addEvent_Dr( &cgs, curr_inode, sizeofIRType(data->Iex.Load.ty), 
+                                  aexpr );
+            }
+            break;
+         }
+
+         case Ist_Store: {
+            IRExpr* data  = st->Ist.Store.data;
+            IRExpr* aexpr = st->Ist.Store.addr;
+            addEvent_Dw( &cgs, curr_inode, 
+                         sizeofIRType(typeOfIRExpr(tyenv, data)), aexpr );
+            break;
+         }
+
+         case Ist_Dirty: {
+            Int      dataSize;
+            IRDirty* d = st->Ist.Dirty.details;
+            if (d->mFx != Ifx_None) {
+               /* This dirty helper accesses memory.  Collect the details. */
+               tl_assert(d->mAddr != NULL);
+               tl_assert(d->mSize != 0);
+               dataSize = d->mSize;
+               // Large (eg. 28B, 108B, 512B on x86) data-sized
+               // instructions will be done inaccurately, but they're
+               // very rare and this avoids errors from hitting more
+               // than two cache lines in the simulation.
+               if (dataSize > MIN_LINE_SIZE)
+                  dataSize = MIN_LINE_SIZE;
+               if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
+                  addEvent_Dr( &cgs, curr_inode, dataSize, d->mAddr );
+               if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
+                  addEvent_Dw( &cgs, curr_inode, dataSize, d->mAddr );
+            } else {
+               tl_assert(d->mAddr == NULL);
+               tl_assert(d->mSize == 0);
+            }
+            break;
+         }
+
+         case Ist_Exit: {
+            /* Stuff to widen the guard expression to a host word, so
+               we can pass it to the branch predictor simulation
+               functions easily. */
+            Bool     inverted;
+            Addr64   nia, sea;
+            IRConst* dst;
+            IROp     tyW    = hWordTy;
+            IROp     widen  = tyW==Ity_I32  ? Iop_1Uto32  : Iop_1Uto64;
+            IROp     opXOR  = tyW==Ity_I32  ? Iop_Xor32   : Iop_Xor64;
+            IRTemp   guard1 = newIRTemp(cgs.sbOut->tyenv, Ity_I1);
+            IRTemp   guardW = newIRTemp(cgs.sbOut->tyenv, tyW);
+            IRTemp   guard  = newIRTemp(cgs.sbOut->tyenv, tyW);
+            IRExpr*  one    = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
+                                           : IRExpr_Const(IRConst_U64(1));
+
+            /* First we need to figure out whether the side exit got
+               inverted by the ir optimiser.  To do that, figure out
+               the next (fallthrough) instruction's address and the
+               side exit address and see if they are the same. */
+            nia = cia + (Addr64)isize;
+            if (tyW == Ity_I32) 
+               nia &= 0xFFFFFFFFULL;
+
+            /* Side exit address */
+            dst = st->Ist.Exit.dst;
+            if (tyW == Ity_I32) {
+               tl_assert(dst->tag == Ico_U32);
+               sea = (Addr64)(UInt)dst->Ico.U32;
+            } else {
+               tl_assert(tyW == Ity_I64);
+               tl_assert(dst->tag == Ico_U64);
+               sea = dst->Ico.U64;
+            }
+
+            inverted = nia == sea;
+
+            /* Widen the guard expression. */
+            addStmtToIRSB( cgs.sbOut, 
+                           IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
+            addStmtToIRSB( cgs.sbOut,
+                           IRStmt_WrTmp( guardW,
+                                         IRExpr_Unop(widen, 
+                                                     IRExpr_RdTmp(guard1))) );
+            /* If the exit is inverted, invert the sense of the guard. */
+            addStmtToIRSB( 
+               cgs.sbOut,
+               IRStmt_WrTmp( 
+                  guard,
+                  inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
+                           : IRExpr_RdTmp(guardW) 
+               ));
+            /* And post the event. */
+            addEvent_Bc( &cgs, curr_inode, IRExpr_RdTmp(guard) );
+
+            /* We may never reach the next statement, so need to flush
+               all outstanding transactions now. */
+            flushEvents( &cgs );
+            break;
+         }
+
+         default:
+            tl_assert(0);
+            break;
+      }
+
+      /* Copy the original statement */
+      addStmtToIRSB( cgs.sbOut, st );
+
+      if (DEBUG_CG) {
+         ppIRStmt(st);
+         VG_(printf)("\n");
+      }
+   }
+
+   /* Deal with branches to unknown destinations.  Except ignore ones
+      which are function returns as we assume the return stack
+      predictor never mispredicts. */
+   if (sbIn->jumpkind == Ijk_Boring) {
+      if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
+      switch (sbIn->next->tag) {
+         case Iex_Const: 
+            break; /* boring - branch to known address */
+         case Iex_RdTmp: 
+            /* looks like an indirect branch (branch to unknown) */
+            addEvent_Bi( &cgs, curr_inode, sbIn->next );
+            break;
+         default:
+            /* shouldn't happen - if the incoming IR is properly
+               flattened, should only have tmp and const cases to
+               consider. */
+            tl_assert(0); 
+      }
+   }
+
+   /* At the end of the bb.  Flush outstandings. */
+   flushEvents( &cgs );
+
+   /* done.  stay sane ... */
+   tl_assert(cgs.sbInfo_i == cgs.sbInfo->n_instrs);
+
+   if (DEBUG_CG) {
+      VG_(printf)( "goto {");
+      ppIRJumpKind(sbIn->jumpkind);
+      VG_(printf)( "} ");
+      ppIRExpr( sbIn->next );
+      VG_(printf)( "}\n");
+   }
+
+   return cgs.sbOut;
+}
+
+/*------------------------------------------------------------*/
+/*--- Cache configuration                                  ---*/
+/*------------------------------------------------------------*/
+
+#define UNDEFINED_CACHE     { -1, -1, -1 }
+
+static cache_t clo_I1_cache = UNDEFINED_CACHE;
+static cache_t clo_D1_cache = UNDEFINED_CACHE;
+static cache_t clo_L2_cache = UNDEFINED_CACHE;
+
+/* Checks cache config is ok;  makes it so if not. */
+static 
+void check_cache(cache_t* cache, Char *name)
+{
+   /* Simulator requires line size and set count to be powers of two */
+   if (( cache->size % (cache->line_size * cache->assoc) != 0) ||
+       (-1 == VG_(log2)(cache->size/cache->line_size/cache->assoc))) {
+      VG_UMSG("error: %s set count not a power of two; aborting.", name);
+      VG_(exit)(1);
+   }
+
+   if (-1 == VG_(log2)(cache->line_size)) {
+      VG_UMSG("error: %s line size of %dB not a power of two; aborting.",
+              name, cache->line_size);
+      VG_(exit)(1);
+   }
+
+   // Then check line size >= 16 -- any smaller and a single instruction could
+   // straddle three cache lines, which breaks a simulation assertion and is
+   // stupid anyway.
+   if (cache->line_size < MIN_LINE_SIZE) {
+      VG_UMSG("error: %s line size of %dB too small; aborting.", 
+              name, cache->line_size);
+      VG_(exit)(1);
+   }
+
+   /* Then check cache size > line size (causes seg faults if not). */
+   if (cache->size <= cache->line_size) {
+      VG_UMSG("error: %s cache size of %dB <= line size of %dB; aborting.",
+              name, cache->size, cache->line_size);
+      VG_(exit)(1);
+   }
+
+   /* Then check assoc <= (size / line size) (seg faults otherwise). */
+   if (cache->assoc > (cache->size / cache->line_size)) {
+      VG_UMSG("warning: %s associativity > (size / line size); aborting.",
+              name);
+      VG_(exit)(1);
+   }
+}
+
+static 
+void configure_caches(cache_t* I1c, cache_t* D1c, cache_t* L2c)
+{
+#define DEFINED(L)   (-1 != L.size  || -1 != L.assoc || -1 != L.line_size)
+
+   Int n_clos = 0;
+
+   // Count how many were defined on the command line.
+   if (DEFINED(clo_I1_cache)) { n_clos++; }
+   if (DEFINED(clo_D1_cache)) { n_clos++; }
+   if (DEFINED(clo_L2_cache)) { n_clos++; }
+
+   // Set the cache config (using auto-detection, if supported by the
+   // architecture)
+   VG_(configure_caches)( I1c, D1c, L2c, (3 == n_clos) );
+
+   // Then replace with any defined on the command line.
+   if (DEFINED(clo_I1_cache)) { *I1c = clo_I1_cache; }
+   if (DEFINED(clo_D1_cache)) { *D1c = clo_D1_cache; }
+   if (DEFINED(clo_L2_cache)) { *L2c = clo_L2_cache; }
+
+   // Then check values and fix if not acceptable.
+   check_cache(I1c, "I1");
+   check_cache(D1c, "D1");
+   check_cache(L2c, "L2");
+
+   if (VG_(clo_verbosity) >= 2) {
+      VG_UMSG("Cache configuration used:");
+      VG_UMSG("  I1: %dB, %d-way, %dB lines",
+              I1c->size, I1c->assoc, I1c->line_size);
+      VG_UMSG("  D1: %dB, %d-way, %dB lines",
+              D1c->size, D1c->assoc, D1c->line_size);
+      VG_UMSG("  L2: %dB, %d-way, %dB lines",
+              L2c->size, L2c->assoc, L2c->line_size);
+   }
+#undef CMD_LINE_DEFINED
+}
+
+/*------------------------------------------------------------*/
+/*--- cg_fini() and related function                       ---*/
+/*------------------------------------------------------------*/
+
+// Total reads/writes/misses.  Calculated during CC traversal at the end.
+// All auto-zeroed.
+static CacheCC  Ir_total;
+static CacheCC  Dr_total;
+static CacheCC  Dw_total;
+static BranchCC Bc_total;
+static BranchCC Bi_total;
+
+static void fprint_CC_table_and_calc_totals(void)
+{
+   Int     i, fd;
+   SysRes  sres;
+   Char    buf[512], *currFile = NULL, *currFn = NULL;
+   LineCC* lineCC;
+
+   // Setup output filename.  Nb: it's important to do this now, ie. as late
+   // as possible.  If we do it at start-up and the program forks and the
+   // output file format string contains a %p (pid) specifier, both the
+   // parent and child will incorrectly write to the same file;  this
+   // happened in 3.3.0.
+   Char* cachegrind_out_file =
+      VG_(expand_file_name)("--cachegrind-out-file", clo_cachegrind_out_file);
+
+   sres = VG_(open)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
+                                         VKI_S_IRUSR|VKI_S_IWUSR);
+   if (sres.isError) {
+      // If the file can't be opened for whatever reason (conflict
+      // between multiple cachegrinded processes?), give up now.
+      VG_UMSG("error: can't open cache simulation output file '%s'",
+              cachegrind_out_file );
+      VG_UMSG("       ... so simulation results will be missing.");
+      VG_(free)(cachegrind_out_file);
+      return;
+   } else {
+      fd = sres.res;
+      VG_(free)(cachegrind_out_file);
+   }
+
+   // "desc:" lines (giving I1/D1/L2 cache configuration).  The spaces after
+   // the 2nd colon makes cg_annotate's output look nicer.
+   VG_(sprintf)(buf, "desc: I1 cache:         %s\n"
+                     "desc: D1 cache:         %s\n"
+                     "desc: L2 cache:         %s\n",
+                     I1.desc_line, D1.desc_line, L2.desc_line);
+   VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+
+   // "cmd:" line
+   VG_(strcpy)(buf, "cmd:");
+   VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+   if (VG_(args_the_exename)) {
+      VG_(write)(fd, " ", 1);
+      VG_(write)(fd, VG_(args_the_exename), 
+                     VG_(strlen)( VG_(args_the_exename) ));
+   }
+   for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) {
+      HChar* arg = * (HChar**) VG_(indexXA)( VG_(args_for_client), i );
+      if (arg) {
+         VG_(write)(fd, " ", 1);
+         VG_(write)(fd, arg, VG_(strlen)( arg ));
+      }
+   }
+   // "events:" line
+   if (clo_cache_sim && clo_branch_sim) {
+      VG_(sprintf)(buf, "\nevents: Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw "
+                                  "Bc Bcm Bi Bim\n");
+   }
+   else if (clo_cache_sim && !clo_branch_sim) {
+      VG_(sprintf)(buf, "\nevents: Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw "
+                                  "\n");
+   }
+   else if (!clo_cache_sim && clo_branch_sim) {
+      VG_(sprintf)(buf, "\nevents: Ir "
+                                  "Bc Bcm Bi Bim\n");
+   }
+   else
+      tl_assert(0); /* can't happen */
+
+   VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+
+   // Traverse every lineCC
+   VG_(OSetGen_ResetIter)(CC_table);
+   while ( (lineCC = VG_(OSetGen_Next)(CC_table)) ) {
+      Bool just_hit_a_new_file = False;
+      // If we've hit a new file, print a "fl=" line.  Note that because
+      // each string is stored exactly once in the string table, we can use
+      // pointer comparison rather than strcmp() to test for equality, which
+      // is good because most of the time the comparisons are equal and so
+      // the whole strings would have to be checked.
+      if ( lineCC->loc.file != currFile ) {
+         currFile = lineCC->loc.file;
+         VG_(sprintf)(buf, "fl=%s\n", currFile);
+         VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+         distinct_files++;
+         just_hit_a_new_file = True;
+      }
+      // If we've hit a new function, print a "fn=" line.  We know to do
+      // this when the function name changes, and also every time we hit a
+      // new file (in which case the new function name might be the same as
+      // in the old file, hence the just_hit_a_new_file test).
+      if ( just_hit_a_new_file || lineCC->loc.fn != currFn ) {
+         currFn = lineCC->loc.fn;
+         VG_(sprintf)(buf, "fn=%s\n", currFn);
+         VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+         distinct_fns++;
+      }
+
+      // Print the LineCC
+      if (clo_cache_sim && clo_branch_sim) {
+         VG_(sprintf)(buf, "%u %llu %llu %llu"
+                             " %llu %llu %llu"
+                             " %llu %llu %llu"
+                             " %llu %llu %llu %llu\n",
+                            lineCC->loc.line,
+                            lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.m2, 
+                            lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.m2,
+                            lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.m2,
+                            lineCC->Bc.b, lineCC->Bc.mp, 
+                            lineCC->Bi.b, lineCC->Bi.mp);
+      }
+      else if (clo_cache_sim && !clo_branch_sim) {
+         VG_(sprintf)(buf, "%u %llu %llu %llu"
+                             " %llu %llu %llu"
+                             " %llu %llu %llu\n",
+                            lineCC->loc.line,
+                            lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.m2, 
+                            lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.m2,
+                            lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.m2);
+      }
+      else if (!clo_cache_sim && clo_branch_sim) {
+         VG_(sprintf)(buf, "%u %llu"
+                             " %llu %llu %llu %llu\n",
+                            lineCC->loc.line,
+                            lineCC->Ir.a, 
+                            lineCC->Bc.b, lineCC->Bc.mp, 
+                            lineCC->Bi.b, lineCC->Bi.mp);
+      }
+      else
+         tl_assert(0);
+
+      VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+
+      // Update summary stats
+      Ir_total.a  += lineCC->Ir.a;
+      Ir_total.m1 += lineCC->Ir.m1;
+      Ir_total.m2 += lineCC->Ir.m2;
+      Dr_total.a  += lineCC->Dr.a;
+      Dr_total.m1 += lineCC->Dr.m1;
+      Dr_total.m2 += lineCC->Dr.m2;
+      Dw_total.a  += lineCC->Dw.a;
+      Dw_total.m1 += lineCC->Dw.m1;
+      Dw_total.m2 += lineCC->Dw.m2;
+      Bc_total.b  += lineCC->Bc.b;
+      Bc_total.mp += lineCC->Bc.mp;
+      Bi_total.b  += lineCC->Bi.b;
+      Bi_total.mp += lineCC->Bi.mp;
+
+      distinct_lines++;
+   }
+
+   // Summary stats must come after rest of table, since we calculate them
+   // during traversal.  */
+   if (clo_cache_sim && clo_branch_sim) {
+      VG_(sprintf)(buf, "summary:"
+                        " %llu %llu %llu"
+                        " %llu %llu %llu"
+                        " %llu %llu %llu"
+                        " %llu %llu %llu %llu\n", 
+                        Ir_total.a, Ir_total.m1, Ir_total.m2,
+                        Dr_total.a, Dr_total.m1, Dr_total.m2,
+                        Dw_total.a, Dw_total.m1, Dw_total.m2,
+                        Bc_total.b, Bc_total.mp, 
+                        Bi_total.b, Bi_total.mp);
+   }
+   else if (clo_cache_sim && !clo_branch_sim) {
+      VG_(sprintf)(buf, "summary:"
+                        " %llu %llu %llu"
+                        " %llu %llu %llu"
+                        " %llu %llu %llu\n",
+                        Ir_total.a, Ir_total.m1, Ir_total.m2,
+                        Dr_total.a, Dr_total.m1, Dr_total.m2,
+                        Dw_total.a, Dw_total.m1, Dw_total.m2);
+   }
+   else if (!clo_cache_sim && clo_branch_sim) {
+      VG_(sprintf)(buf, "summary:"
+                        " %llu"
+                        " %llu %llu %llu %llu\n", 
+                        Ir_total.a,
+                        Bc_total.b, Bc_total.mp, 
+                        Bi_total.b, Bi_total.mp);
+   }
+   else
+      tl_assert(0);
+
+   VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+   VG_(close)(fd);
+}
+
+static UInt ULong_width(ULong n)
+{
+   UInt w = 0;
+   while (n > 0) {
+      n = n / 10;
+      w++;
+   }
+   if (w == 0) w = 1;
+   return w + (w-1)/3;   // add space for commas
+}
+
+static void cg_fini(Int exitcode)
+{
+   static Char buf1[128], buf2[128], buf3[128], buf4[123], fmt[128];
+
+   CacheCC  D_total;
+   BranchCC B_total;
+   ULong L2_total_m, L2_total_mr, L2_total_mw,
+         L2_total, L2_total_r, L2_total_w;
+   Int l1, l2, l3;
+
+   /* Running with both cache and branch simulation disabled is not
+      allowed (checked during command line option processing). */
+   tl_assert(clo_cache_sim || clo_branch_sim);
+
+   fprint_CC_table_and_calc_totals();
+
+   if (VG_(clo_verbosity) == 0) 
+      return;
+
+   #define MAX(a, b)  ((a) >= (b) ? (a) : (b))
+
+   /* I cache results.  Use the I_refs value to determine the first column
+    * width. */
+   l1 = ULong_width(Ir_total.a);
+   l2 = ULong_width(MAX(Dr_total.a, Bc_total.b));
+   l3 = ULong_width(MAX(Dw_total.a, Bi_total.b));
+
+   /* Make format string, getting width right for numbers */
+   VG_(sprintf)(fmt, "%%s %%,%dllu", l1);
+
+   /* Always print this */
+   VG_UMSG(fmt, "I   refs:     ", Ir_total.a);
+
+   /* If cache profiling is enabled, show D access numbers and all
+      miss numbers */
+   if (clo_cache_sim) {
+      VG_UMSG(fmt, "I1  misses:   ", Ir_total.m1);
+      VG_UMSG(fmt, "L2i misses:   ", Ir_total.m2);
+
+      if (0 == Ir_total.a) Ir_total.a = 1;
+      VG_(percentify)(Ir_total.m1, Ir_total.a, 2, l1+1, buf1);
+      VG_UMSG("I1  miss rate: %s", buf1);
+
+      VG_(percentify)(Ir_total.m2, Ir_total.a, 2, l1+1, buf1);
+      VG_UMSG("L2i miss rate: %s", buf1);
+      VG_UMSG("");
+
+      /* D cache results.  Use the D_refs.rd and D_refs.wr values to
+       * determine the width of columns 2 & 3. */
+      D_total.a  = Dr_total.a  + Dw_total.a;
+      D_total.m1 = Dr_total.m1 + Dw_total.m1;
+      D_total.m2 = Dr_total.m2 + Dw_total.m2;
+
+      /* Make format string, getting width right for numbers */
+      VG_(sprintf)(fmt, "%%s %%,%dllu  (%%,%dllu rd   + %%,%dllu wr)", l1, l2, l3);
+
+      VG_UMSG(fmt, "D   refs:     ", 
+                   D_total.a, Dr_total.a, Dw_total.a);
+      VG_UMSG(fmt, "D1  misses:   ",
+                   D_total.m1, Dr_total.m1, Dw_total.m1);
+      VG_UMSG(fmt, "L2d misses:   ",
+                   D_total.m2, Dr_total.m2, Dw_total.m2);
+
+      if (0 == D_total.a)  D_total.a = 1;
+      if (0 == Dr_total.a) Dr_total.a = 1;
+      if (0 == Dw_total.a) Dw_total.a = 1;
+      VG_(percentify)( D_total.m1,  D_total.a, 1, l1+1, buf1);
+      VG_(percentify)(Dr_total.m1, Dr_total.a, 1, l2+1, buf2);
+      VG_(percentify)(Dw_total.m1, Dw_total.a, 1, l3+1, buf3);
+      VG_UMSG("D1  miss rate: %s (%s     + %s  )", buf1, buf2,buf3);
+
+      VG_(percentify)( D_total.m2,  D_total.a, 1, l1+1, buf1);
+      VG_(percentify)(Dr_total.m2, Dr_total.a, 1, l2+1, buf2);
+      VG_(percentify)(Dw_total.m2, Dw_total.a, 1, l3+1, buf3);
+      VG_UMSG("L2d miss rate: %s (%s     + %s  )", buf1, buf2,buf3);
+      VG_UMSG("");
+
+      /* L2 overall results */
+
+      L2_total   = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
+      L2_total_r = Dr_total.m1 + Ir_total.m1;
+      L2_total_w = Dw_total.m1;
+      VG_UMSG(fmt, "L2 refs:      ",
+                   L2_total, L2_total_r, L2_total_w);
+
+      L2_total_m  = Dr_total.m2 + Dw_total.m2 + Ir_total.m2;
+      L2_total_mr = Dr_total.m2 + Ir_total.m2;
+      L2_total_mw = Dw_total.m2;
+      VG_UMSG(fmt, "L2 misses:    ",
+                   L2_total_m, L2_total_mr, L2_total_mw);
+
+      VG_(percentify)(L2_total_m,  (Ir_total.a + D_total.a),  1, l1+1, buf1);
+      VG_(percentify)(L2_total_mr, (Ir_total.a + Dr_total.a), 1, l2+1, buf2);
+      VG_(percentify)(L2_total_mw, Dw_total.a,                1, l3+1, buf3);
+      VG_UMSG("L2 miss rate:  %s (%s     + %s  )", buf1, buf2,buf3);
+   }
+
+   /* If branch profiling is enabled, show branch overall results. */
+   if (clo_branch_sim) {
+      /* Make format string, getting width right for numbers */
+      VG_(sprintf)(fmt, "%%s %%,%dllu  (%%,%dllu cond + %%,%dllu ind)", l1, l2, l3);
+
+      if (0 == Bc_total.b)  Bc_total.b = 1;
+      if (0 == Bi_total.b)  Bi_total.b = 1;
+      B_total.b  = Bc_total.b  + Bi_total.b;
+      B_total.mp = Bc_total.mp + Bi_total.mp;
+
+      VG_UMSG("");
+      VG_UMSG(fmt, "Branches:     ",
+                   B_total.b, Bc_total.b, Bi_total.b);
+
+      VG_UMSG(fmt, "Mispredicts:  ",
+                   B_total.mp, Bc_total.mp, Bi_total.mp);
+
+      VG_(percentify)(B_total.mp,  B_total.b,  1, l1+1, buf1);
+      VG_(percentify)(Bc_total.mp, Bc_total.b, 1, l2+1, buf2);
+      VG_(percentify)(Bi_total.mp, Bi_total.b, 1, l3+1, buf3);
+
+      VG_UMSG("Mispred rate:  %s (%s     + %s   )", buf1, buf2,buf3);
+   }
+
+   // Various stats
+   if (VG_(clo_verbosity) > 1) {
+      Int debug_lookups = full_debugs      + fn_debugs +
+                          file_line_debugs + no_debugs;
+
+      VG_DMSG("");
+      VG_DMSG("cachegrind: distinct files: %d", distinct_files);
+      VG_DMSG("cachegrind: distinct fns:   %d", distinct_fns);
+      VG_DMSG("cachegrind: distinct lines: %d", distinct_lines);
+      VG_DMSG("cachegrind: distinct instrs:%d", distinct_instrs);
+      VG_DMSG("cachegrind: debug lookups      : %d", debug_lookups);
+      
+      VG_(percentify)(full_debugs,      debug_lookups, 1, 6, buf1);
+      VG_(percentify)(file_line_debugs, debug_lookups, 1, 6, buf2);
+      VG_(percentify)(fn_debugs,        debug_lookups, 1, 6, buf3);
+      VG_(percentify)(no_debugs,        debug_lookups, 1, 6, buf4);
+      VG_DMSG("cachegrind: with full      info:%s (%d)", 
+              buf1, full_debugs);
+      VG_DMSG("cachegrind: with file/line info:%s (%d)", 
+              buf2, file_line_debugs);
+      VG_DMSG("cachegrind: with fn name   info:%s (%d)", 
+              buf3, fn_debugs);
+      VG_DMSG("cachegrind: with zero      info:%s (%d)", 
+              buf4, no_debugs);
+
+      VG_DMSG("cachegrind: string table size: %lu",
+              VG_(OSetGen_Size)(stringTable));
+      VG_DMSG("cachegrind: CC table size: %lu",
+              VG_(OSetGen_Size)(CC_table));
+      VG_DMSG("cachegrind: InstrInfo table size: %lu",
+              VG_(OSetGen_Size)(instrInfoTable));
+   }
+}
+
+/*--------------------------------------------------------------------*/
+/*--- Discarding BB info                                           ---*/
+/*--------------------------------------------------------------------*/
+
+// Called when a translation is removed from the translation cache for
+// any reason at all: to free up space, because the guest code was
+// unmapped or modified, or for any arbitrary reason.
+static
+void cg_discard_superblock_info ( Addr64 orig_addr64, VexGuestExtents vge )
+{
+   SB_info* sbInfo;
+   Addr     orig_addr = (Addr)vge.base[0];
+
+   tl_assert(vge.n_used > 0);
+
+   if (DEBUG_CG)
+      VG_(printf)( "discard_basic_block_info: %p, %p, %llu\n", 
+                   (void*)(Addr)orig_addr,
+                   (void*)(Addr)vge.base[0], (ULong)vge.len[0]);
+
+   // Get BB info, remove from table, free BB info.  Simple!  Note that we
+   // use orig_addr, not the first instruction address in vge.
+   sbInfo = VG_(OSetGen_Remove)(instrInfoTable, &orig_addr);
+   tl_assert(NULL != sbInfo);
+   VG_(OSetGen_FreeNode)(instrInfoTable, sbInfo);
+}
+
+/*--------------------------------------------------------------------*/
+/*--- Command line processing                                      ---*/
+/*--------------------------------------------------------------------*/
+
+static void parse_cache_opt ( cache_t* cache, Char* opt )
+{
+   Long i1, i2, i3;
+   Char* endptr;
+
+   // Option argument looks like "65536,2,64".  Extract them.
+   i1 = VG_(strtoll10)(opt,      &endptr); if (*endptr != ',')  goto bad;
+   i2 = VG_(strtoll10)(endptr+1, &endptr); if (*endptr != ',')  goto bad;
+   i3 = VG_(strtoll10)(endptr+1, &endptr); if (*endptr != '\0') goto bad;
+
+   // Check for overflow.
+   cache->size      = (Int)i1;
+   cache->assoc     = (Int)i2;
+   cache->line_size = (Int)i3;
+   if (cache->size      != i1) goto overflow;
+   if (cache->assoc     != i2) goto overflow;
+   if (cache->line_size != i3) goto overflow;
+
+   return;
+
+  overflow:
+   VG_UMSG("one of the cache parameters was too large and overflowed\n");
+  bad:
+   // XXX: this omits the "--I1/D1/L2=" part from the message, but that's
+   // not a big deal.
+   VG_(err_bad_option)(opt);
+}
+
+static Bool cg_process_cmd_line_option(Char* arg)
+{
+   Char* tmp_str;
+
+   // 5 is length of "--I1="
+   if      VG_STR_CLO(arg, "--I1", tmp_str)
+      parse_cache_opt(&clo_I1_cache, tmp_str);
+   else if VG_STR_CLO(arg, "--D1", tmp_str)
+      parse_cache_opt(&clo_D1_cache, tmp_str);
+   else if VG_STR_CLO(arg, "--L2", tmp_str)
+      parse_cache_opt(&clo_L2_cache, tmp_str);
+
+   else if VG_STR_CLO( arg, "--cachegrind-out-file", clo_cachegrind_out_file) {}
+   else if VG_BOOL_CLO(arg, "--cache-sim",  clo_cache_sim)  {}
+   else if VG_BOOL_CLO(arg, "--branch-sim", clo_branch_sim) {}
+   else
+      return False;
+
+   return True;
+}
+
+static void cg_print_usage(void)
+{
+   VG_(printf)(
+"    --I1=<size>,<assoc>,<line_size>  set I1 cache manually\n"
+"    --D1=<size>,<assoc>,<line_size>  set D1 cache manually\n"
+"    --L2=<size>,<assoc>,<line_size>  set L2 cache manually\n"
+"    --cache-sim=yes|no  [yes]        collect cache stats?\n"
+"    --branch-sim=yes|no [no]         collect branch prediction stats?\n"
+"    --cachegrind-out-file=<file>     output file name [cachegrind.out.%%p]\n"
+   );
+}
+
+static void cg_print_debug_usage(void)
+{
+   VG_(printf)(
+"    (none)\n"
+   );
+}
+
+/*--------------------------------------------------------------------*/
+/*--- Setup                                                        ---*/
+/*--------------------------------------------------------------------*/
+
+static void cg_post_clo_init(void); /* just below */
+
+static void cg_pre_clo_init(void)
+{
+   VG_(details_name)            ("Cachegrind");
+   VG_(details_version)         (NULL);
+   VG_(details_description)     ("a cache and branch-prediction profiler");
+   VG_(details_copyright_author)(
+      "Copyright (C) 2002-2009, and GNU GPL'd, by Nicholas Nethercote et al.");
+   VG_(details_bug_reports_to)  (VG_BUGS_TO);
+   VG_(details_avg_translation_sizeB) ( 500 );
+
+   VG_(basic_tool_funcs)          (cg_post_clo_init,
+                                   cg_instrument,
+                                   cg_fini);
+
+   VG_(needs_superblock_discards)(cg_discard_superblock_info);
+   VG_(needs_command_line_options)(cg_process_cmd_line_option,
+                                   cg_print_usage,
+                                   cg_print_debug_usage);
+}
+
+static void cg_post_clo_init(void)
+{
+   cache_t I1c, D1c, L2c; 
+
+   /* Can't disable both cache and branch profiling */
+   if ((!clo_cache_sim) && (!clo_branch_sim)) {
+      VG_UMSG("ERROR: --cache-sim=no --branch-sim=no is not allowed.");
+      VG_UMSG("You must select cache profiling, or branch profiling, or both.");
+      VG_(exit)(2);
+   }
+
+   CC_table =
+      VG_(OSetGen_Create)(offsetof(LineCC, loc),
+                          cmp_CodeLoc_LineCC,
+                          VG_(malloc), "cg.main.cpci.1",
+                          VG_(free));
+   instrInfoTable =
+      VG_(OSetGen_Create)(/*keyOff*/0,
+                          NULL,
+                          VG_(malloc), "cg.main.cpci.2",
+                          VG_(free));
+   stringTable =
+      VG_(OSetGen_Create)(/*keyOff*/0,
+                          stringCmp,
+                          VG_(malloc), "cg.main.cpci.3",
+                          VG_(free));
+
+   configure_caches(&I1c, &D1c, &L2c);
+
+   cachesim_I1_initcache(I1c);
+   cachesim_D1_initcache(D1c);
+   cachesim_L2_initcache(L2c);
+}
+
+VG_DETERMINE_INTERFACE_VERSION(cg_pre_clo_init)
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
+
diff --git a/cachegrind/cg_merge.c b/cachegrind/cg_merge.c
new file mode 100644
index 0000000..1d8ad41
--- /dev/null
+++ b/cachegrind/cg_merge.c
@@ -0,0 +1,1571 @@
+
+/*--------------------------------------------------------------------*/
+/*--- A program that merges multiple cachegrind output files.      ---*/
+/*---                                                   cg_merge.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+  This file is part of Cachegrind, a Valgrind tool for cache
+  profiling programs.
+
+  Copyright (C) 2002-2009 Nicholas Nethercote
+     njn@valgrind.org
+
+  AVL tree code derived from
+  ANSI C Library for maintainance of AVL Balanced Trees
+  (C) 2000 Daniel Nagy, Budapest University of Technology and Economics
+  Released under GNU General Public License (GPL) version 2
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of the
+  License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+  02111-1307, USA.
+
+  The GNU General Public License is contained in the file COPYING.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include <ctype.h>
+
+typedef  signed long   Word;
+typedef  unsigned long UWord;
+typedef  unsigned char Bool;
+#define True ((Bool)1)
+#define False ((Bool)0)
+typedef  signed int    Int;
+typedef  unsigned int  UInt;
+typedef  unsigned long long int ULong;
+typedef  signed char   Char;
+typedef  size_t        SizeT;
+
+
+//------------------------------------------------------------------//
+//---                           WordFM                           ---//
+//---                      Public interface                      ---//
+//------------------------------------------------------------------//
+
+typedef  struct _WordFM  WordFM; /* opaque */
+
+/* Initialise a WordFM */
+void initFM ( WordFM* t, 
+              void*   (*alloc_nofail)( SizeT ),
+              void    (*dealloc)(void*),
+              Word    (*kCmp)(Word,Word) );
+
+/* Allocate and initialise a WordFM */
+WordFM* newFM( void* (*alloc_nofail)( SizeT ),
+               void  (*dealloc)(void*),
+               Word  (*kCmp)(Word,Word) );
+
+/* Free up the FM.  If kFin is non-NULL, it is applied to keys
+   before the FM is deleted; ditto with vFin for vals. */
+void deleteFM ( WordFM*, void(*kFin)(Word), void(*vFin)(Word) );
+
+/* Add (k,v) to fm.  If a binding for k already exists, it is updated
+   to map to this new v.  In that case we should really return the
+   previous v so that caller can finalise it.  Oh well. */
+void addToFM ( WordFM* fm, Word k, Word v );
+
+// Delete key from fm, returning associated val if found
+Bool delFromFM ( WordFM* fm, /*OUT*/Word* oldV, Word key );
+
+// Look up in fm, assigning found val at spec'd address
+Bool lookupFM ( WordFM* fm, /*OUT*/Word* valP, Word key );
+
+Word sizeFM ( WordFM* fm );
+
+// set up FM for iteration
+void initIterFM ( WordFM* fm );
+
+// get next key/val pair.  Will assert if fm has been modified
+// or looked up in since initIterFM was called.
+Bool nextIterFM ( WordFM* fm, /*OUT*/Word* pKey, /*OUT*/Word* pVal );
+
+// clear the I'm iterating flag
+void doneIterFM ( WordFM* fm );
+
+// Deep copy a FM.  If dopyK is NULL, keys are copied verbatim.
+// If non-null, dopyK is applied to each key to generate the
+// version in the new copy.  In that case, if the argument to dopyK
+// is non-NULL but the result is NULL, it is assumed that dopyK
+// could not allocate memory, in which case the copy is abandoned
+// and NULL is returned.  Ditto with dopyV for values.
+WordFM* dopyFM ( WordFM* fm, Word(*dopyK)(Word), Word(*dopyV)(Word) );
+
+//------------------------------------------------------------------//
+//---                         end WordFM                         ---//
+//---                      Public interface                      ---//
+//------------------------------------------------------------------//
+
+
+static char* argv0 = "cg_merge";
+
+/* Keep track of source filename/line no so as to be able to
+   print decent error messages. */
+typedef
+   struct {
+      FILE* fp;
+      UInt  lno;
+      char* filename;
+   }
+   SOURCE;
+
+static void printSrcLoc ( SOURCE* s )
+{
+   fprintf(stderr, "%s: near %s line %u\n", argv0, s->filename, s->lno-1);
+}
+
+__attribute__((noreturn))
+static void mallocFail ( SOURCE* s, char* who )
+{
+   fprintf(stderr, "%s: out of memory in %s\n", argv0, who );
+   printSrcLoc( s );
+   exit(2);
+}
+
+__attribute__((noreturn))
+static void parseError ( SOURCE* s, char* msg )
+{
+   fprintf(stderr, "%s: parse error: %s\n", argv0, msg );
+   printSrcLoc( s );
+   exit(1);
+}
+
+__attribute__((noreturn))
+static void barf ( SOURCE* s, char* msg )
+{
+   fprintf(stderr, "%s: %s\n", argv0, msg );
+   printSrcLoc( s );
+   exit(1);
+}
+
+// Read a line
+#define M_LINEBUF 40960
+static char line[M_LINEBUF];
+
+// True if anything read, False if at EOF
+static Bool readline ( SOURCE* s )
+{
+   int ch, i = 0;
+   line[0] = 0;
+   while (1) {
+      if (i >= M_LINEBUF-10)
+         parseError(s, "Unexpected long line in input file");
+      ch = getc(s->fp);
+      if (ch != EOF) {
+          line[i++] = ch;
+          line[i] = 0;
+          if (ch == '\n') {
+             line[i-1] = 0;
+             s->lno++;
+             break;
+          }
+      } else {
+         if (ferror(s->fp)) {
+            perror(argv0);
+            barf(s, "I/O error while reading input file");
+         } else {
+            // hit EOF
+            break;
+         }
+      }
+   }
+   return line[0] != 0;
+}
+
+static Bool streqn ( char* s1, char* s2, size_t n )
+{
+   return 0 == strncmp(s1, s2, n);
+}
+
+static Bool streq ( char* s1, char* s2 )
+{
+   return 0 == strcmp(s1, s2 );
+}
+
+
+////////////////////////////////////////////////////////////////
+
+typedef
+   struct {
+      char* fi_name;
+      char* fn_name;
+   }
+   FileFn;
+
+typedef
+   struct {
+      Int n_counts;
+      ULong* counts;
+   }
+   Counts;
+
+typedef
+   struct {
+      // null-terminated vector of desc_lines
+      char** desc_lines;
+
+      // Cmd line
+      char* cmd_line;
+
+      // Events line
+      char* events_line;
+      Int   n_events;
+
+      // Summary line (copied from input)
+      char* summary_line;
+
+      /* Outermost map is
+            WordFM FileFn* innerMap
+         where innerMap is   WordFM line-number=UWord Counts */
+      WordFM* outerMap;
+
+      // Summary counts (computed whilst parsing)
+      // should match .summary_line
+      Counts* summary;
+   }
+   CacheProfFile;
+
+static FileFn* new_FileFn ( char* file_name, char* fn_name )
+{
+   FileFn* ffn = malloc(sizeof(FileFn));
+   if (ffn == NULL)
+      return NULL;
+   ffn->fi_name = file_name;
+   ffn->fn_name = fn_name;
+   return ffn;
+}
+
+static void ddel_FileFn ( FileFn* ffn )
+{
+   if (ffn->fi_name)
+      free(ffn->fi_name);
+   if (ffn->fn_name)
+      free(ffn->fn_name);
+   memset(ffn, 0, sizeof(FileFn));
+   free(ffn);
+}
+
+static FileFn* dopy_FileFn ( FileFn* ff )
+{
+   char* fi2 = strdup(ff->fi_name);
+   char* fn2 = strdup(ff->fn_name);
+   if ((!fi2) || (!fn2))
+      return NULL;
+   return new_FileFn( fi2, fn2 );
+}
+
+static Counts* new_Counts ( Int n_counts, /*COPIED*/ULong* counts )
+{
+   Int i;
+   Counts* cts = malloc(sizeof(Counts));
+   if (cts == NULL)
+      return NULL;
+
+   assert(n_counts >= 0);
+   cts->counts = malloc(n_counts * sizeof(ULong));
+   if (cts->counts == NULL)
+      return NULL;
+
+   cts->n_counts = n_counts;
+   for (i = 0; i < n_counts; i++)
+      cts->counts[i] = counts[i];
+
+   return cts;
+}
+
+static Counts* new_Counts_Zeroed ( Int n_counts )
+{
+   Int i;
+   Counts* cts = malloc(sizeof(Counts));
+   if (cts == NULL)
+      return NULL;
+
+   assert(n_counts >= 0);
+   cts->counts = malloc(n_counts * sizeof(ULong));
+   if (cts->counts == NULL)
+      return NULL;
+
+   cts->n_counts = n_counts;
+   for (i = 0; i < n_counts; i++)
+      cts->counts[i] = 0;
+
+   return cts;
+}
+
+static void sdel_Counts ( Counts* cts )
+{
+   memset(cts, 0, sizeof(Counts));
+   free(cts);
+}
+
+static void ddel_Counts ( Counts* cts )
+{
+   if (cts->counts)
+      free(cts->counts);
+   memset(cts, 0, sizeof(Counts));
+   free(cts);
+}
+
+static Counts* dopy_Counts ( Counts* cts )
+{
+   return new_Counts( cts->n_counts, cts->counts );
+}
+
+static
+CacheProfFile* new_CacheProfFile ( char**  desc_lines,
+                                   char*   cmd_line,
+                                   char*   events_line,
+                                   Int     n_events,
+                                   char*   summary_line,
+                                   WordFM* outerMap,
+                                   Counts* summary )
+{
+   CacheProfFile* cpf = malloc(sizeof(CacheProfFile));
+   if (cpf == NULL)
+      return NULL;
+   cpf->desc_lines   = desc_lines;
+   cpf->cmd_line     = cmd_line;
+   cpf->events_line  = events_line;
+   cpf->n_events     = n_events;
+   cpf->summary_line = summary_line;
+   cpf->outerMap     = outerMap;
+   cpf->summary      = summary;
+   return cpf;
+}
+
+static WordFM* dopy_InnerMap ( WordFM* innerMap )
+{
+   return dopyFM ( innerMap, NULL,
+                             (Word(*)(Word))dopy_Counts );
+}
+
+static void ddel_InnerMap ( WordFM* innerMap )
+{
+   deleteFM( innerMap, NULL, (void(*)(Word))ddel_Counts );
+}
+
+static void ddel_CacheProfFile ( CacheProfFile* cpf )
+{
+   char** p;
+   if (cpf->desc_lines) {
+      for (p = cpf->desc_lines; *p; p++)
+         free(*p);
+      free(cpf->desc_lines);
+   }
+   if (cpf->cmd_line)
+      free(cpf->cmd_line);
+   if (cpf->events_line)
+      free(cpf->events_line);
+   if (cpf->summary_line)
+      free(cpf->summary_line);
+   if (cpf->outerMap)
+      deleteFM( cpf->outerMap, (void(*)(Word))ddel_FileFn, 
+                               (void(*)(Word))ddel_InnerMap );
+   if (cpf->summary)
+      ddel_Counts(cpf->summary);
+
+   memset(cpf, 0, sizeof(CacheProfFile));
+   free(cpf);
+}
+
+static void showCounts ( FILE* f, Counts* c )
+{
+   Int i;
+   for (i = 0; i < c->n_counts; i++) {
+      fprintf(f, "%lld ", c->counts[i]);
+   }
+}
+
+static void show_CacheProfFile ( FILE* f, CacheProfFile* cpf )
+{
+   Int     i;
+   char**  d;
+   FileFn* topKey;
+   WordFM* topVal;
+   UWord   subKey;
+   Counts* subVal;  
+
+   for (d = cpf->desc_lines; *d; d++)
+      fprintf(f, "%s\n", *d);
+   fprintf(f, "%s\n", cpf->cmd_line);
+   fprintf(f, "%s\n", cpf->events_line);
+
+   initIterFM( cpf->outerMap );
+   while (nextIterFM( cpf->outerMap, (Word*)(&topKey), (Word*)(&topVal) )) {
+      fprintf(f, "fl=%s\nfn=%s\n", 
+                 topKey->fi_name, topKey->fn_name );
+      initIterFM( topVal );
+      while (nextIterFM( topVal, (Word*)(&subKey), (Word*)(&subVal) )) {
+         fprintf(f, "%ld   ", subKey );
+         showCounts( f, subVal );
+         fprintf(f, "\n");
+      }
+      doneIterFM( topVal );
+   }
+   doneIterFM( cpf->outerMap );
+
+   //fprintf(f, "%s\n", cpf->summary_line);
+   fprintf(f, "summary:");
+   for (i = 0; i < cpf->summary->n_counts; i++)
+      fprintf(f, " %lld", cpf->summary->counts[i]);
+   fprintf(f, "\n");
+}
+
+////////////////////////////////////////////////////////////////
+
+static Word cmp_FileFn ( Word s1, Word s2 )
+{
+   FileFn* ff1 = (FileFn*)s1;
+   FileFn* ff2 = (FileFn*)s2;
+   Word r = strcmp(ff1->fi_name, ff2->fi_name);
+   if (r == 0)
+      r = strcmp(ff1->fn_name, ff2->fn_name);
+   return r;
+}
+
+static Word cmp_unboxed_UWord ( Word s1, Word s2 )
+{
+   UWord u1 = (UWord)s1;
+   UWord u2 = (UWord)s2;
+   if (u1 < u2) return -1;
+   if (u1 > u2) return 1;
+   return 0;
+}
+
+////////////////////////////////////////////////////////////////
+
+static Bool parse_ULong ( /*OUT*/ULong* res, /*INOUT*/char** pptr)
+{
+   ULong u64;
+   char* ptr = *pptr;
+   while (isspace(*ptr)) ptr++;
+   if (!isdigit(*ptr)) {
+      return False; /* end of string, or junk */
+      *pptr = ptr;
+   }
+   u64 = 0;
+   while (isdigit(*ptr)) {
+      u64 = (u64 * 10) + (ULong)(*ptr - '0');
+      ptr++;
+   }
+   *res = u64;
+   *pptr = ptr;
+   return True;
+}
+
+// str is a line of digits, starting with a line number.  Parse it,
+// returning the first number in *lnno and the rest in a newly
+// allocated Counts struct.  If lnno is non-NULL, treat the first
+// number as a line number and assign it to *lnno instead of
+// incorporating it in the counts array.
+static 
+Counts* splitUpCountsLine ( SOURCE* s, /*OUT*/UWord* lnno, char* str )
+{
+#define N_TMPC 50
+   Bool    ok;
+   Counts* counts;
+   ULong   tmpC[N_TMPC];
+   Int     n_tmpC = 0;
+   while (1) {
+      ok = parse_ULong( &tmpC[n_tmpC], &str );
+      if (!ok)
+         break;
+      n_tmpC++;
+      if (n_tmpC >= N_TMPC)
+         barf(s, "N_TMPC too low.  Increase and recompile.");
+   }
+   if (*str != 0)
+      parseError(s, "garbage in counts line");
+   if (lnno ? (n_tmpC < 2) : (n_tmpC < 1))
+      parseError(s, "too few counts in count line");
+
+   if (lnno) {
+      *lnno = (UWord)tmpC[0];
+      counts = new_Counts( n_tmpC-1, /*COPIED*/&tmpC[1] );
+   } else {
+      counts = new_Counts( n_tmpC, /*COPIED*/&tmpC[0] );
+   }
+
+   return counts;
+#undef N_TMPC
+}
+
+static void addCounts ( SOURCE* s, /*OUT*/Counts* counts1, Counts* counts2 )
+{
+   Int i;
+   if (counts1->n_counts != counts2->n_counts)
+      parseError(s, "addCounts: inconsistent number of counts");
+   for (i = 0; i < counts1->n_counts; i++)
+      counts1->counts[i] += counts2->counts[i];
+}
+
+static Bool addCountsToMap ( SOURCE* s,
+                             WordFM* counts_map, 
+                             UWord lnno, Counts* newCounts )
+{
+   Counts* oldCounts;
+   // look up lnno in the map.  If none present, add a binding
+   // lnno->counts.  If present, add counts to the existing entry.
+   if (lookupFM( counts_map, (Word*)(&oldCounts), (Word)lnno )) {
+      // merge with existing binding
+      addCounts( s, oldCounts, newCounts );
+      return True;
+   } else {
+      // create new binding
+      addToFM( counts_map, (Word)lnno, (Word)newCounts );
+      return False;
+   }
+}
+
+static
+void handle_counts ( SOURCE* s,
+                     CacheProfFile* cpf, 
+                     char* fi, char* fn, char* newCountsStr )
+{
+   WordFM* countsMap;
+   Bool    freeNewCounts;
+   UWord   lnno;
+   Counts* newCounts;
+   FileFn* topKey; 
+
+   if (0)  printf("%s %s %s\n", fi, fn, newCountsStr );
+
+   // parse the numbers
+   newCounts = splitUpCountsLine( s, &lnno, newCountsStr );
+
+   // Did we get the right number?
+   if (newCounts->n_counts != cpf->n_events)
+      goto oom;
+
+   // allocate the key
+   topKey = malloc(sizeof(FileFn));
+   if (topKey) {
+      topKey->fi_name = strdup(fi);
+      topKey->fn_name = strdup(fn);
+   }
+   if (! (topKey && topKey->fi_name && topKey->fn_name))
+      mallocFail(s, "handle_counts:");
+
+   // search for it
+   if (lookupFM( cpf->outerMap, (Word*)(&countsMap), (Word)topKey )) {
+      // found it.  Merge in new counts
+      freeNewCounts = addCountsToMap( s, countsMap, lnno, newCounts );
+      ddel_FileFn(topKey);
+   } else {
+      // not found in the top map.  Create new entry
+      countsMap = newFM( malloc, free, cmp_unboxed_UWord );
+      if (!countsMap)
+         goto oom;
+      addToFM( cpf->outerMap, (Word)topKey, (Word)countsMap );
+      freeNewCounts = addCountsToMap( s, countsMap, lnno, newCounts );
+   }
+
+   // also add to running summary total
+   addCounts( s, cpf->summary, newCounts );
+
+   // if safe to do so, free up the count vector
+   if (freeNewCounts)
+      ddel_Counts(newCounts);
+
+   return;
+
+  oom:
+   parseError(s, "# counts doesn't match # events");
+}
+
+
+/* Parse a complete file from the stream in 's'.  If a parse error
+   happens, do not return; instead exit via parseError().  If an
+   out-of-memory condition happens, do not return; instead exit via
+   mallocError().
+*/
+static CacheProfFile* parse_CacheProfFile ( SOURCE* s )
+{
+#define M_TMP_DESCLINES 10
+
+   Int            i;
+   Bool           b;
+   char*          tmp_desclines[M_TMP_DESCLINES];
+   char*          p;
+   int            n_tmp_desclines = 0;
+   CacheProfFile* cpf;
+   Counts*        summaryRead; 
+   char*          curr_fn_init = "???";
+   char*          curr_fl_init = "???";
+   char*          curr_fn      = curr_fn_init;
+   char*          curr_fl      = curr_fl_init;
+
+   cpf = new_CacheProfFile( NULL, NULL, NULL, 0, NULL, NULL, NULL );
+   if (cpf == NULL)
+      mallocFail(s, "parse_CacheProfFile(1)");
+
+   // Parse "desc:" lines
+   while (1) {
+      b = readline(s);
+      if (!b) 
+         break;
+      if (!streqn(line, "desc: ", 6))
+         break;
+      if (n_tmp_desclines >= M_TMP_DESCLINES)
+         barf(s, "M_TMP_DESCLINES too low; increase and recompile");
+      tmp_desclines[n_tmp_desclines++] = strdup(line);
+   }
+
+   if (n_tmp_desclines == 0)
+      parseError(s, "parse_CacheProfFile: no DESC lines present");
+
+   cpf->desc_lines = malloc( (1+n_tmp_desclines) * sizeof(char*) );
+   if (cpf->desc_lines == NULL)
+      mallocFail(s, "parse_CacheProfFile(2)");
+
+   cpf->desc_lines[n_tmp_desclines] = NULL;
+   for (i = 0; i < n_tmp_desclines; i++)
+      cpf->desc_lines[i] = tmp_desclines[i];
+
+   // Parse "cmd:" line
+   if (!streqn(line, "cmd: ", 5))
+      parseError(s, "parse_CacheProfFile: no CMD line present");
+
+   cpf->cmd_line = strdup(line);
+   if (cpf->cmd_line == NULL)
+      mallocFail(s, "parse_CacheProfFile(3)");
+
+   // Parse "events:" line and figure out how many events there are
+   b = readline(s);
+   if (!b)
+      parseError(s, "parse_CacheProfFile: eof before EVENTS line");
+   if (!streqn(line, "events: ", 8))
+      parseError(s, "parse_CacheProfFile: no EVENTS line present");
+
+   // figure out how many events there are by counting the number
+   // of space-alphanum transitions in the events_line
+   cpf->events_line = strdup(line);
+   if (cpf->events_line == NULL)
+      mallocFail(s, "parse_CacheProfFile(3)");
+
+   cpf->n_events = 0;
+   assert(cpf->events_line[6] == ':');
+   for (p = &cpf->events_line[6]; *p; p++) {
+      if (p[0] == ' ' && isalpha(p[1]))
+         cpf->n_events++;
+   }
+
+   // create the running cross-check summary
+   cpf->summary = new_Counts_Zeroed( cpf->n_events );
+   if (cpf->summary == NULL)
+      mallocFail(s, "parse_CacheProfFile(4)");
+
+   // create the outer map (file+fn name --> inner map)
+   cpf->outerMap = newFM ( malloc, free, cmp_FileFn );
+   if (cpf->outerMap == NULL)
+      mallocFail(s, "parse_CacheProfFile(5)");
+
+   // process count lines
+   while (1) {
+      b = readline(s);
+      if (!b)
+         parseError(s, "parse_CacheProfFile: eof before SUMMARY line");
+
+      if (isdigit(line[0])) {
+         handle_counts(s, cpf, curr_fl, curr_fn, line);
+         continue;
+      }
+      else
+      if (streqn(line, "fn=", 3)) {
+         if (curr_fn != curr_fn_init)
+            free(curr_fn);
+         curr_fn = strdup(line+3);
+         continue;
+      }
+      else
+      if (streqn(line, "fl=", 3)) {
+         if (curr_fl != curr_fl_init)
+            free(curr_fl);
+         curr_fl = strdup(line+3);
+         continue;
+      }
+      else
+      if (streqn(line, "summary: ", 9)) {
+         break;
+      }
+      else
+         parseError(s, "parse_CacheProfFile: unexpected line in main data");
+   }
+
+   // finally, the "summary:" line
+   if (!streqn(line, "summary: ", 9))
+      parseError(s, "parse_CacheProfFile: missing SUMMARY line");
+
+   cpf->summary_line = strdup(line);
+   if (cpf->summary_line == NULL)
+      mallocFail(s, "parse_CacheProfFile(6)");
+
+   // there should be nothing more
+   b = readline(s);
+   if (b)
+      parseError(s, "parse_CacheProfFile: "
+                    "extraneous content after SUMMARY line");
+
+   // check the summary counts are as expected
+   summaryRead = splitUpCountsLine( s, NULL, &cpf->summary_line[8] );
+   if (summaryRead == NULL)
+      mallocFail(s, "parse_CacheProfFile(7)");
+   if (summaryRead->n_counts != cpf->n_events)
+      parseError(s, "parse_CacheProfFile: wrong # counts in SUMMARY line");
+   for (i = 0; i < summaryRead->n_counts; i++) {
+      if (summaryRead->counts[i] != cpf->summary->counts[i]) {
+         parseError(s, "parse_CacheProfFile: "
+                       "computed vs stated SUMMARY counts mismatch");
+      }
+   }
+   free(summaryRead->counts);
+   sdel_Counts(summaryRead);
+
+   // since the summary counts are OK, free up the summary_line text
+   // which contains the same info.
+   if (cpf->summary_line) {
+      free(cpf->summary_line);
+      cpf->summary_line = NULL;
+   }
+
+   if (curr_fn != curr_fn_init)
+      free(curr_fn);
+   if (curr_fl != curr_fl_init)
+      free(curr_fl);
+
+   // All looks OK
+   return cpf;
+
+#undef N_TMP_DESCLINES  
+}
+
+
+static void merge_CacheProfInfo ( SOURCE* s,
+                                  /*MOD*/CacheProfFile* dst,
+                                  CacheProfFile* src )
+{
+   /* For each (filefn, innerMap) in src
+      if filefn not in dst
+         add binding dopy(filefn)->dopy(innerMap) in src
+      else
+         // merge src->innerMap with dst->innerMap
+         for each (lineno, counts) in src->innerMap
+         if lineno not in dst->innerMap
+            add binding lineno->dopy(counts) to dst->innerMap
+         else
+            add counts into dst->innerMap[lineno]
+   */
+   /* Outer iterator:  FileFn* -> WordFM* (inner iterator)
+      Inner iterator:  UWord   -> Counts*
+   */
+   FileFn* soKey;
+   WordFM* soVal;
+   WordFM* doVal;
+   UWord   siKey;
+   Counts* siVal;
+   Counts* diVal;
+
+   /* First check mundane things: that the events: lines are
+      identical. */
+   if (!streq( dst->events_line, src->events_line ))
+     barf(s, "\"events:\" line of most recent file does "
+             "not match those previously processed");
+
+   initIterFM( src->outerMap );
+
+   // for (filefn, innerMap) in src
+   while (nextIterFM( src->outerMap, (Word*)&soKey, (Word*)&soVal )) {
+
+      // is filefn in dst?   
+      if (! lookupFM( dst->outerMap, (Word*)&doVal, (Word)soKey )) {
+
+         // no .. add dopy(filefn) -> dopy(innerMap) to src
+         FileFn* c_soKey = dopy_FileFn(soKey);
+         WordFM* c_soVal = dopy_InnerMap(soVal);
+         if ((!c_soKey) || (!c_soVal)) goto oom;
+         addToFM( dst->outerMap, (Word)c_soKey, (Word)c_soVal );
+
+      } else {
+
+         // yes .. merge the two innermaps
+         initIterFM( soVal );
+
+         // for (lno, counts) in soVal (source inner map)
+         while (nextIterFM( soVal, (Word*)&siKey, (Word*)&siVal )) {
+
+            // is lno in the corresponding dst inner map?
+            if (! lookupFM( doVal, (Word*)&diVal, siKey )) {
+
+               // no .. add lineno->dopy(counts) to dst inner map
+               Counts* c_siVal = dopy_Counts( siVal );
+               if (!c_siVal) goto oom;
+               addToFM( doVal, siKey, (Word)c_siVal );
+
+            } else {
+
+               // yes .. merge counts into dst inner map val
+               addCounts( s, diVal, siVal );
+
+            }
+         }
+
+      }
+
+   }
+
+   // add the summaries too
+   addCounts(s, dst->summary, src->summary );
+
+   return;
+
+  oom:
+   mallocFail(s, "merge_CacheProfInfo");
+}
+
+static void usage ( void )
+{
+   fprintf(stderr, "%s: Merges multiple cachegrind output files into one\n", 
+                   argv0);
+   fprintf(stderr, "%s: usage: %s [-o outfile] [files-to-merge]\n", 
+                   argv0, argv0);
+   exit(1);
+}
+
+int main ( int argc, char** argv )
+{
+   Int            i;
+   SOURCE         src;
+   CacheProfFile  *cpf, *cpfTmp;
+
+   FILE*          outfile = NULL;
+   char*          outfilename = NULL;
+   Int            outfileix = 0;
+
+   if (argv[0])
+      argv0 = argv[0];
+
+   if (argc < 2)
+      usage();
+
+   for (i = 1; i < argc; i++) {
+      if (streq(argv[i], "-h") || streq(argv[i], "--help"))
+         usage();
+   }
+
+   /* Scan args, looking for '-o outfilename'. */
+   for (i = 1; i < argc; i++) {
+      if (streq(argv[i], "-o")) {
+         if (i+1 < argc) {
+            outfilename = argv[i+1];
+            outfileix   = i;
+            break;
+         } else {
+            usage();
+         }
+      }
+   }
+
+   cpf = NULL;
+
+   for (i = 1; i < argc; i++) {
+
+      if (i == outfileix) {
+         /* Skip '-o' and whatever follows it */
+         i += 1;
+         continue;
+      }
+
+      fprintf(stderr, "%s: parsing %s\n", argv0, argv[i]);
+      src.lno      = 1;
+      src.filename = argv[i];
+      src.fp       = fopen(src.filename, "r");
+      if (!src.fp) {
+         perror(argv0);
+         barf(&src, "Cannot open input file");
+      }
+      assert(src.fp);
+      cpfTmp = parse_CacheProfFile( &src );
+      fclose(src.fp);
+
+      /* If this isn't the first file, merge */
+      if (cpf == NULL) {
+         /* this is the first file */
+         cpf = cpfTmp;
+      } else {
+         /* not the first file; merge */
+         fprintf(stderr, "%s: merging %s\n", argv0, argv[i]);
+         merge_CacheProfInfo( &src, cpf, cpfTmp );
+         ddel_CacheProfFile( cpfTmp );
+      }
+
+   }
+
+   /* Now create the output file. */
+
+   if (cpf) {
+
+      fprintf(stderr, "%s: writing %s\n", 
+                       argv0, outfilename ? outfilename : "(stdout)" );
+
+      /* Write the output. */
+      if (outfilename) {
+         outfile = fopen(outfilename, "w");
+         if (!outfile) {
+            fprintf(stderr, "%s: can't create output file %s\n", 
+                            argv0, outfilename);
+            perror(argv0);
+            exit(1);
+         }
+      } else {
+         outfile = stdout;
+      }
+
+      show_CacheProfFile( outfile, cpf );
+      if (ferror(outfile)) {
+         fprintf(stderr, "%s: error writing output file %s\n", 
+                         argv0, outfilename);
+         perror(argv0);
+         if (outfile != stdout)
+            fclose(outfile);
+         exit(1);
+      }
+
+      fflush(outfile);
+      if (outfile != stdout)
+         fclose( outfile );
+
+      ddel_CacheProfFile( cpf );
+   }
+
+   return 0;
+}
+
+
+//------------------------------------------------------------------//
+//---                           WordFM                           ---//
+//---                       Implementation                       ---//
+//------------------------------------------------------------------//
+
+/* ------------ Implementation ------------ */
+
+/* One element of the AVL tree */
+typedef
+   struct _AvlNode {
+      Word key;
+      Word val;
+      struct _AvlNode* left;
+      struct _AvlNode* right;
+      Char balance;
+   }
+   AvlNode;
+
+typedef 
+   struct {
+      Word w;
+      Bool b;
+   }
+   MaybeWord;
+
+#define WFM_STKMAX    32    // At most 2**32 entries can be iterated over
+
+struct _WordFM {
+   AvlNode* root;
+   void*    (*alloc_nofail)( SizeT );
+   void     (*dealloc)(void*);
+   Word     (*kCmp)(Word,Word);
+   AvlNode* nodeStack[WFM_STKMAX]; // Iterator node stack
+   Int      numStack[WFM_STKMAX];  // Iterator num stack
+   Int      stackTop;              // Iterator stack pointer, one past end
+}; 
+
+/* forward */
+static Bool avl_removeroot_wrk(AvlNode** t, Word(*kCmp)(Word,Word));
+
+/* Swing to the left.  Warning: no balance maintainance. */
+static void avl_swl ( AvlNode** root )
+{
+   AvlNode* a = *root;
+   AvlNode* b = a->right;
+   *root    = b;
+   a->right = b->left;
+   b->left  = a;
+}
+
+/* Swing to the right.  Warning: no balance maintainance. */
+static void avl_swr ( AvlNode** root )
+{
+   AvlNode* a = *root;
+   AvlNode* b = a->left;
+   *root    = b;
+   a->left  = b->right;
+   b->right = a;
+}
+
+/* Balance maintainance after especially nasty swings. */
+static void avl_nasty ( AvlNode* root )
+{
+   switch (root->balance) {
+      case -1: 
+         root->left->balance  = 0;
+         root->right->balance = 1;
+         break;
+      case 1:
+         root->left->balance  = -1;
+         root->right->balance = 0;
+         break;
+      case 0:
+         root->left->balance  = 0;
+         root->right->balance = 0;
+         break;
+      default:
+         assert(0);
+   }
+   root->balance=0;
+}
+
+/* Find size of a non-NULL tree. */
+static Word size_avl_nonNull ( AvlNode* nd )
+{
+   return 1 + (nd->left  ? size_avl_nonNull(nd->left)  : 0)
+            + (nd->right ? size_avl_nonNull(nd->right) : 0);
+}
+
+/* Insert element a into the AVL tree t.  Returns True if the depth of
+   the tree has grown.  If element with that key is already present,
+   just copy a->val to existing node, first returning old ->val field
+   of existing node in *oldV, so that the caller can finalize it
+   however it wants.
+*/
+static 
+Bool avl_insert_wrk ( AvlNode**         rootp, 
+                      /*OUT*/MaybeWord* oldV,
+                      AvlNode*          a, 
+                      Word              (*kCmp)(Word,Word) )
+{
+   Word cmpres;
+
+   /* initialize */
+   a->left    = 0;
+   a->right   = 0;
+   a->balance = 0;
+   oldV->b    = False;
+
+   /* insert into an empty tree? */
+   if (!(*rootp)) {
+      (*rootp) = a;
+      return True;
+   }
+ 
+   cmpres = kCmp( (*rootp)->key, a->key );
+
+   if (cmpres > 0) {
+      /* insert into the left subtree */
+      if ((*rootp)->left) {
+         AvlNode* left_subtree = (*rootp)->left;
+         if (avl_insert_wrk(&left_subtree, oldV, a, kCmp)) {
+            switch ((*rootp)->balance--) {
+               case  1: return False;
+               case  0: return True;
+               case -1: break;
+               default: assert(0);
+            }
+            if ((*rootp)->left->balance < 0) {
+               avl_swr( rootp );
+               (*rootp)->balance = 0;
+               (*rootp)->right->balance = 0;
+            } else {
+               avl_swl( &((*rootp)->left) );
+               avl_swr( rootp );
+               avl_nasty( *rootp );
+            }
+         } else {
+            (*rootp)->left = left_subtree;
+         }
+         return False;
+      } else {
+         (*rootp)->left = a;
+         if ((*rootp)->balance--) 
+            return False;
+         return True;
+      }
+      assert(0);/*NOTREACHED*/
+   }
+   else 
+   if (cmpres < 0) {
+      /* insert into the right subtree */
+      if ((*rootp)->right) {
+         AvlNode* right_subtree = (*rootp)->right;
+         if (avl_insert_wrk(&right_subtree, oldV, a, kCmp)) {
+            switch((*rootp)->balance++){
+               case -1: return False;
+               case  0: return True;
+               case  1: break;
+               default: assert(0);
+            }
+            if ((*rootp)->right->balance > 0) {
+               avl_swl( rootp );
+               (*rootp)->balance = 0;
+               (*rootp)->left->balance = 0;
+            } else {
+               avl_swr( &((*rootp)->right) );
+               avl_swl( rootp );
+               avl_nasty( *rootp );
+            }
+         } else {
+            (*rootp)->right = right_subtree;
+         }
+         return False;
+      } else {
+         (*rootp)->right = a;
+         if ((*rootp)->balance++) 
+            return False;
+         return True;
+      }
+      assert(0);/*NOTREACHED*/
+   }
+   else {
+      /* cmpres == 0, a duplicate - replace the val, but don't
+         incorporate the node in the tree */
+      oldV->b = True;
+      oldV->w = (*rootp)->val;
+      (*rootp)->val = a->val;
+      return False;
+   }
+}
+
+/* Remove an element a from the AVL tree t.  a must be part of
+   the tree.  Returns True if the depth of the tree has shrunk. 
+*/
+static
+Bool avl_remove_wrk ( AvlNode** rootp, 
+                      AvlNode*  a, 
+                      Word(*kCmp)(Word,Word) )
+{
+   Bool ch;
+   Word cmpres = kCmp( (*rootp)->key, a->key );
+
+   if (cmpres > 0){
+      /* remove from the left subtree */
+      AvlNode* left_subtree = (*rootp)->left;
+      assert(left_subtree);
+      ch = avl_remove_wrk(&left_subtree, a, kCmp);
+      (*rootp)->left=left_subtree;
+      if (ch) {
+         switch ((*rootp)->balance++) {
+            case -1: return True;
+            case  0: return False;
+            case  1: break;
+            default: assert(0);
+         }
+         switch ((*rootp)->right->balance) {
+            case 0:
+               avl_swl( rootp );
+               (*rootp)->balance = -1;
+               (*rootp)->left->balance = 1;
+               return False;
+            case 1: 
+               avl_swl( rootp );
+               (*rootp)->balance = 0;
+               (*rootp)->left->balance = 0;
+               return -1;
+            case -1:
+               break;
+            default:
+               assert(0);
+         }
+         avl_swr( &((*rootp)->right) );
+         avl_swl( rootp );
+         avl_nasty( *rootp );
+         return True;
+      }
+   }
+   else
+   if (cmpres < 0) {
+      /* remove from the right subtree */
+      AvlNode* right_subtree = (*rootp)->right;
+      assert(right_subtree);
+      ch = avl_remove_wrk(&right_subtree, a, kCmp);
+      (*rootp)->right = right_subtree;
+      if (ch) {
+         switch ((*rootp)->balance--) {
+            case  1: return True;
+            case  0: return False;
+            case -1: break;
+            default: assert(0);
+         }
+         switch ((*rootp)->left->balance) {
+            case 0:
+               avl_swr( rootp );
+               (*rootp)->balance = 1;
+               (*rootp)->right->balance = -1;
+               return False;
+            case -1:
+               avl_swr( rootp );
+               (*rootp)->balance = 0;
+               (*rootp)->right->balance = 0;
+               return True;
+            case 1:
+               break;
+            default:
+               assert(0);
+         }
+         avl_swl( &((*rootp)->left) );
+         avl_swr( rootp );
+         avl_nasty( *rootp );
+         return True;
+      }
+   }
+   else {
+      assert(cmpres == 0);
+      assert((*rootp)==a);
+      return avl_removeroot_wrk(rootp, kCmp);
+   }
+   return 0;
+}
+
+/* Remove the root of the AVL tree *rootp.
+ * Warning: dumps core if *rootp is empty
+ */
+static 
+Bool avl_removeroot_wrk ( AvlNode** rootp, 
+                          Word(*kCmp)(Word,Word) )
+{
+   Bool     ch;
+   AvlNode* a;
+   if (!(*rootp)->left) {
+      if (!(*rootp)->right) {
+         (*rootp) = 0;
+         return True;
+      }
+      (*rootp) = (*rootp)->right;
+      return True;
+   }
+   if (!(*rootp)->right) {
+      (*rootp) = (*rootp)->left;
+      return True;
+   }
+   if ((*rootp)->balance < 0) {
+      /* remove from the left subtree */
+      a = (*rootp)->left;
+      while (a->right) a = a->right;
+   } else {
+      /* remove from the right subtree */
+      a = (*rootp)->right;
+      while (a->left) a = a->left;
+   }
+   ch = avl_remove_wrk(rootp, a, kCmp);
+   a->left    = (*rootp)->left;
+   a->right   = (*rootp)->right;
+   a->balance = (*rootp)->balance;
+   (*rootp)   = a;
+   if(a->balance == 0) return ch;
+   return False;
+}
+
+static 
+AvlNode* avl_find_node ( AvlNode* t, Word k, Word(*kCmp)(Word,Word) )
+{
+   Word cmpres;
+   while (True) {
+      if (t == NULL) return NULL;
+      cmpres = kCmp(t->key, k);
+      if (cmpres > 0) t = t->left;  else
+      if (cmpres < 0) t = t->right; else
+      return t;
+   }
+}
+
+// Clear the iterator stack.
+static void stackClear(WordFM* fm)
+{
+   Int i;
+   assert(fm);
+   for (i = 0; i < WFM_STKMAX; i++) {
+      fm->nodeStack[i] = NULL;
+      fm->numStack[i]  = 0;
+   }
+   fm->stackTop = 0;
+}
+
+// Push onto the iterator stack.
+static inline void stackPush(WordFM* fm, AvlNode* n, Int i)
+{
+   assert(fm->stackTop < WFM_STKMAX);
+   assert(1 <= i && i <= 3);
+   fm->nodeStack[fm->stackTop] = n;
+   fm-> numStack[fm->stackTop] = i;
+   fm->stackTop++;
+}
+
+// Pop from the iterator stack.
+static inline Bool stackPop(WordFM* fm, AvlNode** n, Int* i)
+{
+   assert(fm->stackTop <= WFM_STKMAX);
+
+   if (fm->stackTop > 0) {
+      fm->stackTop--;
+      *n = fm->nodeStack[fm->stackTop];
+      *i = fm-> numStack[fm->stackTop];
+      assert(1 <= *i && *i <= 3);
+      fm->nodeStack[fm->stackTop] = NULL;
+      fm-> numStack[fm->stackTop] = 0;
+      return True;
+   } else {
+      return False;
+   }
+}
+
+static 
+AvlNode* avl_dopy ( AvlNode* nd, 
+                    Word(*dopyK)(Word), 
+                    Word(*dopyV)(Word),
+                    void*(alloc_nofail)(SizeT) )
+{
+   AvlNode* nyu;
+   if (! nd)
+      return NULL;
+   nyu = alloc_nofail(sizeof(AvlNode));
+   assert(nyu);
+   
+   nyu->left = nd->left;
+   nyu->right = nd->right;
+   nyu->balance = nd->balance;
+
+   /* Copy key */
+   if (dopyK) {
+      nyu->key = dopyK( nd->key );
+      if (nd->key != 0 && nyu->key == 0)
+         return NULL; /* oom in key dcopy */
+   } else {
+      /* copying assumedly unboxed keys */
+      nyu->key = nd->key;
+   }
+
+   /* Copy val */
+   if (dopyV) {
+      nyu->val = dopyV( nd->val );
+      if (nd->val != 0 && nyu->val == 0)
+         return NULL; /* oom in val dcopy */
+   } else {
+      /* copying assumedly unboxed vals */
+      nyu->val = nd->val;
+   }
+
+   /* Copy subtrees */
+   if (nyu->left) {
+      nyu->left = avl_dopy( nyu->left, dopyK, dopyV, alloc_nofail );
+      if (! nyu->left)
+         return NULL;
+   }
+   if (nyu->right) {
+      nyu->right = avl_dopy( nyu->right, dopyK, dopyV, alloc_nofail );
+      if (! nyu->right)
+         return NULL;
+   }
+
+   return nyu;
+}
+
+/* --- Public interface functions --- */
+
+/* Initialise a WordFM. */
+void initFM ( WordFM* fm,
+              void*   (*alloc_nofail)( SizeT ),
+              void    (*dealloc)(void*),
+              Word    (*kCmp)(Word,Word) )
+{
+   fm->root         = 0;
+   fm->kCmp         = kCmp;
+   fm->alloc_nofail = alloc_nofail;
+   fm->dealloc      = dealloc;
+   fm->stackTop     = 0;
+}
+
+/* Allocate and Initialise a WordFM. */
+WordFM* newFM( void* (*alloc_nofail)( SizeT ),
+               void  (*dealloc)(void*),
+               Word  (*kCmp)(Word,Word) )
+{
+   WordFM* fm = alloc_nofail(sizeof(WordFM));
+   assert(fm);
+   initFM(fm, alloc_nofail, dealloc, kCmp);
+   return fm;
+}
+
+static void avl_free ( AvlNode* nd, 
+                       void(*kFin)(Word),
+                       void(*vFin)(Word),
+                       void(*dealloc)(void*) )
+{
+   if (!nd)
+      return;
+   if (nd->left)
+      avl_free(nd->left, kFin, vFin, dealloc);
+   if (nd->right)
+      avl_free(nd->right, kFin, vFin, dealloc);
+   if (kFin)
+      kFin( nd->key );
+   if (vFin)
+      vFin( nd->val );
+   memset(nd, 0, sizeof(AvlNode));
+   dealloc(nd);
+}
+
+/* Free up the FM.  If kFin is non-NULL, it is applied to keys
+   before the FM is deleted; ditto with vFin for vals. */
+void deleteFM ( WordFM* fm, void(*kFin)(Word), void(*vFin)(Word) )
+{
+   void(*dealloc)(void*) = fm->dealloc;
+   avl_free( fm->root, kFin, vFin, dealloc );
+   memset(fm, 0, sizeof(WordFM) );
+   dealloc(fm);
+}
+
+/* Add (k,v) to fm. */
+void addToFM ( WordFM* fm, Word k, Word v )
+{
+   MaybeWord oldV;
+   AvlNode* node;
+   node = fm->alloc_nofail( sizeof(struct _AvlNode) );
+   node->key = k;
+   node->val = v;
+   oldV.b = False;
+   oldV.w = 0;
+   avl_insert_wrk( &fm->root, &oldV, node, fm->kCmp );
+   //if (oldV.b && fm->vFin)
+   //   fm->vFin( oldV.w );
+   if (oldV.b)
+      free(node);
+}
+
+// Delete key from fm, returning associated val if found
+Bool delFromFM ( WordFM* fm, /*OUT*/Word* oldV, Word key )
+{
+   AvlNode* node = avl_find_node( fm->root, key, fm->kCmp );
+   if (node) {
+      avl_remove_wrk( &fm->root, node, fm->kCmp );
+      if (oldV)
+         *oldV = node->val;
+      fm->dealloc(node);
+      return True;
+   } else {
+      return False;
+   }
+}
+
+// Look up in fm, assigning found val at spec'd address
+Bool lookupFM ( WordFM* fm, /*OUT*/Word* valP, Word key )
+{
+   AvlNode* node = avl_find_node( fm->root, key, fm->kCmp );
+   if (node) {
+      if (valP)
+         *valP = node->val;
+      return True;
+   } else {
+      return False;
+   }
+}
+
+Word sizeFM ( WordFM* fm )
+{
+   // Hmm, this is a bad way to do this
+   return fm->root ? size_avl_nonNull( fm->root ) : 0;
+}
+
+// set up FM for iteration
+void initIterFM ( WordFM* fm )
+{
+   assert(fm);
+   stackClear(fm);
+   if (fm->root)
+      stackPush(fm, fm->root, 1);
+}
+
+// get next key/val pair.  Will assert if fm has been modified
+// or looked up in since initIterFM was called.
+Bool nextIterFM ( WordFM* fm, /*OUT*/Word* pKey, /*OUT*/Word* pVal )
+{
+   Int i = 0;
+   AvlNode* n = NULL;
+   
+   assert(fm);
+
+   // This in-order traversal requires each node to be pushed and popped
+   // three times.  These could be avoided by updating nodes in-situ on the
+   // top of the stack, but the push/pop cost is so small that it's worth
+   // keeping this loop in this simpler form.
+   while (stackPop(fm, &n, &i)) {
+      switch (i) {
+      case 1: 
+         stackPush(fm, n, 2);
+         if (n->left)  stackPush(fm, n->left, 1);
+         break;
+      case 2: 
+         stackPush(fm, n, 3);
+         if (pKey) *pKey = n->key;
+         if (pVal) *pVal = n->val;
+         return True;
+      case 3:
+         if (n->right) stackPush(fm, n->right, 1);
+         break;
+      default:
+         assert(0);
+      }
+   }
+
+   // Stack empty, iterator is exhausted, return NULL
+   return False;
+}
+
+// clear the I'm iterating flag
+void doneIterFM ( WordFM* fm )
+{
+}
+
+WordFM* dopyFM ( WordFM* fm, Word(*dopyK)(Word), Word(*dopyV)(Word) )
+{
+   WordFM* nyu; 
+
+   /* can't clone the fm whilst iterating on it */
+   assert(fm->stackTop == 0);
+
+   nyu = fm->alloc_nofail( sizeof(WordFM) );
+   assert(nyu);
+
+   *nyu = *fm;
+
+   fm->stackTop = 0;
+   memset(fm->nodeStack, 0, sizeof(fm->nodeStack));
+   memset(fm->numStack, 0,  sizeof(fm->numStack));
+
+   if (nyu->root) {
+      nyu->root = avl_dopy( nyu->root, dopyK, dopyV, fm->alloc_nofail );
+      if (! nyu->root)
+         return NULL;
+   }
+
+   return nyu;
+}
+
+//------------------------------------------------------------------//
+//---                         end WordFM                         ---//
+//---                       Implementation                       ---//
+//------------------------------------------------------------------//
+
+/*--------------------------------------------------------------------*/
+/*--- end                                               cg_merge.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/cachegrind/cg_sim.c b/cachegrind/cg_sim.c
new file mode 100644
index 0000000..57abdfc
--- /dev/null
+++ b/cachegrind/cg_sim.c
@@ -0,0 +1,198 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Cache simulation                                    cg_sim.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Cachegrind, a Valgrind tool for cache
+   profiling programs.
+
+   Copyright (C) 2002-2009 Nicholas Nethercote
+      njn@valgrind.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+/* Notes:
+  - simulates a write-allocate cache
+  - (block --> set) hash function uses simple bit selection
+  - handling of references straddling two cache blocks:
+      - counts as only one cache access (not two)
+      - both blocks hit                  --> one hit
+      - one block hits, the other misses --> one miss
+      - both blocks miss                 --> one miss (not two)
+*/
+
+typedef struct {
+   Int          size;                   /* bytes */
+   Int          assoc;
+   Int          line_size;              /* bytes */
+   Int          sets;
+   Int          sets_min_1;
+   Int          line_size_bits;
+   Int          tag_shift;
+   Char         desc_line[128];
+   UWord*       tags;
+} cache_t2;
+
+/* By this point, the size/assoc/line_size has been checked. */
+static void cachesim_initcache(cache_t config, cache_t2* c)
+{
+   Int i;
+
+   c->size      = config.size;
+   c->assoc     = config.assoc;
+   c->line_size = config.line_size;
+
+   c->sets           = (c->size / c->line_size) / c->assoc;
+   c->sets_min_1     = c->sets - 1;
+   c->line_size_bits = VG_(log2)(c->line_size);
+   c->tag_shift      = c->line_size_bits + VG_(log2)(c->sets);
+
+   if (c->assoc == 1) {
+      VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped", 
+                                 c->size, c->line_size);
+   } else {
+      VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative",
+                                 c->size, c->line_size, c->assoc);
+   }
+
+   c->tags = VG_(malloc)("cg.sim.ci.1",
+                         sizeof(UWord) * c->sets * c->assoc);
+
+   for (i = 0; i < c->sets * c->assoc; i++)
+      c->tags[i] = 0;
+}
+
+/* This is done as a macro rather than by passing in the cache_t2 as an 
+ * arg because it slows things down by a small amount (3-5%) due to all 
+ * that extra indirection. */
+
+#define CACHESIM(L, MISS_TREATMENT)                                         \
+/* The cache and associated bits and pieces. */                             \
+static cache_t2 L;                                                          \
+                                                                            \
+static void cachesim_##L##_initcache(cache_t config)                        \
+{                                                                           \
+    cachesim_initcache(config, &L);                                         \
+}                                                                           \
+                                                                            \
+/* This attribute forces GCC to inline this function, even though it's */   \
+/* bigger than its usual limit.  Inlining gains around 5--10% speedup. */   \
+__attribute__((always_inline))                                              \
+static __inline__                                                           \
+void cachesim_##L##_doref(Addr a, UChar size, ULong* m1, ULong *m2)         \
+{                                                                           \
+   UInt  set1 = ( a         >> L.line_size_bits) & (L.sets_min_1);          \
+   UInt  set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1);          \
+   UWord tag  = a >> L.tag_shift;                                           \
+   UWord tag2;                                                              \
+   Int i, j;                                                                \
+   Bool is_miss = False;                                                    \
+   UWord* set;                                                              \
+                                                                            \
+   /* First case: word entirely within line. */                             \
+   if (set1 == set2) {                                                      \
+                                                                            \
+      set = &(L.tags[set1 * L.assoc]);                                      \
+                                                                            \
+      /* This loop is unrolled for just the first case, which is the most */\
+      /* common.  We can't unroll any further because it would screw up   */\
+      /* if we have a direct-mapped (1-way) cache.                        */\
+      if (tag == set[0]) {                                                  \
+         return;                                                            \
+      }                                                                     \
+      /* If the tag is one other than the MRU, move it into the MRU spot  */\
+      /* and shuffle the rest down.                                       */\
+      for (i = 1; i < L.assoc; i++) {                                       \
+         if (tag == set[i]) {                                               \
+            for (j = i; j > 0; j--) {                                       \
+               set[j] = set[j - 1];                                         \
+            }                                                               \
+            set[0] = tag;                                                   \
+            return;                                                         \
+         }                                                                  \
+      }                                                                     \
+                                                                            \
+      /* A miss;  install this tag as MRU, shuffle rest down. */            \
+      for (j = L.assoc - 1; j > 0; j--) {                                   \
+         set[j] = set[j - 1];                                               \
+      }                                                                     \
+      set[0] = tag;                                                         \
+      MISS_TREATMENT;                                                       \
+      return;                                                               \
+                                                                            \
+   /* Second case: word straddles two lines. */                             \
+   /* Nb: this is a fast way of doing ((set1+1) % L.sets) */                \
+   } else if (((set1 + 1) & (L.sets-1)) == set2) {                          \
+      set = &(L.tags[set1 * L.assoc]);                                      \
+      if (tag == set[0]) {                                                  \
+         goto block2;                                                       \
+      }                                                                     \
+      for (i = 1; i < L.assoc; i++) {                                       \
+         if (tag == set[i]) {                                               \
+            for (j = i; j > 0; j--) {                                       \
+               set[j] = set[j - 1];                                         \
+            }                                                               \
+            set[0] = tag;                                                   \
+            goto block2;                                                    \
+         }                                                                  \
+      }                                                                     \
+      for (j = L.assoc - 1; j > 0; j--) {                                   \
+         set[j] = set[j - 1];                                               \
+      }                                                                     \
+      set[0] = tag;                                                         \
+      is_miss = True;                                                       \
+block2:                                                                     \
+      set = &(L.tags[set2 * L.assoc]);                                      \
+      tag2 = (a+size-1) >> L.tag_shift;                                     \
+      if (tag2 == set[0]) {                                                 \
+         goto miss_treatment;                                               \
+      }                                                                     \
+      for (i = 1; i < L.assoc; i++) {                                       \
+         if (tag2 == set[i]) {                                              \
+            for (j = i; j > 0; j--) {                                       \
+               set[j] = set[j - 1];                                         \
+            }                                                               \
+            set[0] = tag2;                                                  \
+            goto miss_treatment;                                            \
+         }                                                                  \
+      }                                                                     \
+      for (j = L.assoc - 1; j > 0; j--) {                                   \
+         set[j] = set[j - 1];                                               \
+      }                                                                     \
+      set[0] = tag2;                                                        \
+      is_miss = True;                                                       \
+miss_treatment:                                                             \
+      if (is_miss) { MISS_TREATMENT; }                                      \
+                                                                            \
+   } else {                                                                 \
+       VG_(printf)("addr: %lx  size: %u  sets: %d %d", a, size, set1, set2);\
+       VG_(tool_panic)("item straddles more than two cache sets");          \
+   }                                                                        \
+   return;                                                                  \
+}
+
+CACHESIM(L2, (*m2)++ );
+CACHESIM(I1, { (*m1)++; cachesim_L2_doref(a, size, m1, m2); } );
+CACHESIM(D1, { (*m1)++; cachesim_L2_doref(a, size, m1, m2); } );
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                 cg_sim.c ---*/
+/*--------------------------------------------------------------------*/
+
diff --git a/cachegrind/docs/.svn/dir-prop-base b/cachegrind/docs/.svn/dir-prop-base
new file mode 100644
index 0000000..585381a
--- /dev/null
+++ b/cachegrind/docs/.svn/dir-prop-base
@@ -0,0 +1,6 @@
+K 10
+svn:ignore
+V 20
+Makefile.in
+Makefile
+END
diff --git a/cachegrind/docs/.svn/entries b/cachegrind/docs/.svn/entries
new file mode 100644
index 0000000..f39a657
--- /dev/null
+++ b/cachegrind/docs/.svn/entries
@@ -0,0 +1,53 @@
+8
+
+dir
+9703
+svn://svn.valgrind.org/valgrind/trunk/cachegrind/docs
+svn://svn.valgrind.org/valgrind
+
+
+
+2009-01-26T22:56:14.413264Z
+9080
+weidendo
+has-props
+
+svn:special svn:externals svn:needs-lock
+
+
+
+
+
+
+
+
+
+
+
+a5019735-40e9-0310-863c-91ae7b9d1cf9
+
+cg-manual.xml
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+a11096bb815b2331149519251cb7357e
+2009-01-26T22:56:14.413264Z
+9080
+weidendo
+
+Makefile.am
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+0bd39a84244289a3ff06a439379b67ff
+2006-10-21T23:18:57.360258Z
+6336
+njn
+has-props
+
diff --git a/cachegrind/docs/.svn/format b/cachegrind/docs/.svn/format
new file mode 100644
index 0000000..45a4fb7
--- /dev/null
+++ b/cachegrind/docs/.svn/format
@@ -0,0 +1 @@
+8
diff --git a/cachegrind/docs/.svn/prop-base/Makefile.am.svn-base b/cachegrind/docs/.svn/prop-base/Makefile.am.svn-base
new file mode 100644
index 0000000..df54a06
--- /dev/null
+++ b/cachegrind/docs/.svn/prop-base/Makefile.am.svn-base
@@ -0,0 +1,9 @@
+K 13
+svn:eol-style
+V 6
+native
+K 12
+svn:keywords
+V 23
+author date id revision
+END
diff --git a/cachegrind/docs/.svn/text-base/Makefile.am.svn-base b/cachegrind/docs/.svn/text-base/Makefile.am.svn-base
new file mode 100644
index 0000000..8e61709
--- /dev/null
+++ b/cachegrind/docs/.svn/text-base/Makefile.am.svn-base
@@ -0,0 +1 @@
+EXTRA_DIST = cg-manual.xml
diff --git a/cachegrind/docs/.svn/text-base/cg-manual.xml.svn-base b/cachegrind/docs/.svn/text-base/cg-manual.xml.svn-base
new file mode 100644
index 0000000..512eeb4
--- /dev/null
+++ b/cachegrind/docs/.svn/text-base/cg-manual.xml.svn-base
@@ -0,0 +1,1349 @@
+<?xml version="1.0"?> <!-- -*- sgml -*- -->
+<!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+  "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd"
+[ <!ENTITY % vg-entities SYSTEM "../../docs/xml/vg-entities.xml"> %vg-entities; ]>
+
+
+<chapter id="cg-manual" xreflabel="Cachegrind: a cache-miss profiler">
+<title>Cachegrind: a cache and branch profiler</title>
+
+<sect1 id="cg-manual.cache" xreflabel="Cache profiling">
+<title>Cache and branch profiling</title>
+
+<para>To use this tool, you must specify
+<computeroutput>--tool=cachegrind</computeroutput> on the
+Valgrind command line.</para>
+
+<para>Cachegrind is a tool for finding places where programs
+interact badly with typical modern superscalar processors
+and run slowly as a result.
+In particular, it will do a cache simulation of your program,
+and optionally a branch-predictor simulation, and can
+then annotate your source line-by-line with the number of cache
+misses and branch mispredictions.  The following statistics are 
+collected:</para>
+<itemizedlist>
+  <listitem>
+    <para>L1 instruction cache reads and misses;</para>
+  </listitem>
+  <listitem>
+    <para>L1 data cache reads and read misses, writes and write
+    misses;</para>
+  </listitem>
+  <listitem>
+    <para>L2 unified cache reads and read misses, writes and
+    writes misses.</para>
+  </listitem>
+  <listitem>
+    <para>Conditional branches and mispredicted conditional branches.</para>
+  </listitem>
+  <listitem>
+    <para>Indirect branches and mispredicted indirect branches.  An
+    indirect branch is a jump or call to a destination only known at
+    run time.</para>
+  </listitem>
+</itemizedlist>
+
+<para>On a modern machine, an L1 miss will typically cost
+around 10 cycles, an L2 miss can cost as much as 200
+cycles, and a mispredicted branch costs in the region of 10
+to 30 cycles.  Detailed cache and branch profiling can be very useful
+for improving the performance of your program.</para>
+
+<para>Also, since one instruction cache read is performed per
+instruction executed, you can find out how many instructions are
+executed per line, which can be useful for traditional profiling
+and test coverage.</para>
+
+<para>Branch profiling is not enabled by default.  To use it, you must
+additionally specify <computeroutput>--branch-sim=yes</computeroutput>
+on the command line.</para>
+
+
+<sect2 id="cg-manual.overview" xreflabel="Overview">
+<title>Overview</title>
+
+<para>First off, as for normal Valgrind use, you probably want to
+compile with debugging info (the
+<computeroutput>-g</computeroutput> flag).  But by contrast with
+normal Valgrind use, you probably <command>do</command> want to turn
+optimisation on, since you should profile your program as it will
+be normally run.</para>
+
+<para>The two steps are:</para>
+<orderedlist>
+  <listitem>
+    <para>Run your program with <computeroutput>valgrind
+    --tool=cachegrind</computeroutput> in front of the normal
+    command line invocation.  When the program finishes,
+    Cachegrind will print summary cache statistics. It also
+    collects line-by-line information in a file
+    <computeroutput>cachegrind.out.&lt;pid&gt;</computeroutput>, where
+    <computeroutput>&lt;pid&gt;</computeroutput> is the program's process
+    ID.</para>
+
+    <para>Branch prediction statistics are not collected by default.
+    To do so, add the flag
+    <computeroutput>--branch-sim=yes</computeroutput>.
+    </para>
+
+    <para>This step should be done every time you want to collect
+    information about a new program, a changed program, or about
+    the same program with different input.</para>
+  </listitem>
+
+  <listitem>
+    <para>Generate a function-by-function summary, and possibly
+    annotate source files, using the supplied
+    cg_annotate program. Source
+    files to annotate can be specified manually, or manually on
+    the command line, or "interesting" source files can be
+    annotated automatically with the
+    <computeroutput>--auto=yes</computeroutput> option.  You can
+    annotate C/C++ files or assembly language files equally
+    easily.</para>
+
+    <para>This step can be performed as many times as you like
+    for each Step 2.  You may want to do multiple annotations
+    showing different information each time.</para>
+  </listitem>
+
+</orderedlist>
+
+<para>As an optional intermediate step, you can use the supplied
+cg_merge program to sum together the
+outputs of multiple Cachegrind runs, into a single file which you then
+use as the input for cg_annotate.</para>
+
+<para>These steps are described in detail in the following
+sections.</para>
+
+</sect2>
+
+
+<sect2 id="cache-sim" xreflabel="Cache simulation specifics">
+<title>Cache simulation specifics</title>
+
+<para>Cachegrind simulates a machine with independent
+first level instruction and data caches (I1 and D1), backed by a
+unified second level cache (L2).  This configuration is used by almost
+all modern machines.  Some old Cyrix CPUs had a unified I and D L1
+cache, but they are ancient history now.</para>
+
+<para>Specific characteristics of the simulation are as
+follows:</para>
+
+<itemizedlist>
+
+  <listitem>
+    <para>Write-allocate: when a write miss occurs, the block
+    written to is brought into the D1 cache.  Most modern caches
+    have this property.</para>
+  </listitem>
+
+  <listitem>
+    <para>Bit-selection hash function: the set of line(s) in the cache
+    to which a memory block maps is chosen by the middle bits
+    M--(M+N-1) of the byte address, where:</para>
+    <itemizedlist>
+      <listitem>
+        <para>line size = 2^M bytes</para>
+      </listitem>
+      <listitem>
+        <para>(cache size / line size / associativity) = 2^N bytes</para>
+      </listitem>
+    </itemizedlist> 
+  </listitem>
+
+  <listitem>
+    <para>Inclusive L2 cache: the L2 cache typically replicates all
+    the entries of the L1 caches, because fetching into L1 involves
+    fetching into L2 first (this does not guarantee strict inclusiveness,
+    as lines evicted from L2 still could reside in L1).  This is
+    standard on Pentium chips, but AMD Opterons, Athlons and Durons
+    use an exclusive L2 cache that only holds
+    blocks evicted from L1.  Ditto most modern VIA CPUs.</para>
+  </listitem>
+
+</itemizedlist>
+
+<para>The cache configuration simulated (cache size,
+associativity and line size) is determined automagically using
+the CPUID instruction.  If you have an old machine that (a)
+doesn't support the CPUID instruction, or (b) supports it in an
+early incarnation that doesn't give any cache information, then
+Cachegrind will fall back to using a default configuration (that
+of a model 3/4 Athlon).  Cachegrind will tell you if this
+happens.  You can manually specify one, two or all three levels
+(I1/D1/L2) of the cache from the command line using the
+<computeroutput>--I1</computeroutput>,
+<computeroutput>--D1</computeroutput> and
+<computeroutput>--L2</computeroutput> options.
+For cache parameters to be valid for simulation, the number
+of sets (with associativity being the number of cache lines in
+each set) has to be a power of two.</para>
+
+<para>On PowerPC platforms
+Cachegrind cannot automatically 
+determine the cache configuration, so you will 
+need to specify it with the
+<computeroutput>--I1</computeroutput>,
+<computeroutput>--D1</computeroutput> and
+<computeroutput>--L2</computeroutput> options.</para>
+
+
+<para>Other noteworthy behaviour:</para>
+
+<itemizedlist>
+  <listitem>
+    <para>References that straddle two cache lines are treated as
+    follows:</para>
+    <itemizedlist>
+      <listitem>
+        <para>If both blocks hit --&gt; counted as one hit</para>
+      </listitem>
+      <listitem>
+        <para>If one block hits, the other misses --&gt; counted
+        as one miss.</para>
+      </listitem>
+      <listitem>
+        <para>If both blocks miss --&gt; counted as one miss (not
+        two)</para>
+      </listitem>
+    </itemizedlist>
+  </listitem>
+
+  <listitem>
+    <para>Instructions that modify a memory location
+    (eg. <computeroutput>inc</computeroutput> and
+    <computeroutput>dec</computeroutput>) are counted as doing
+    just a read, ie. a single data reference.  This may seem
+    strange, but since the write can never cause a miss (the read
+    guarantees the block is in the cache) it's not very
+    interesting.</para>
+
+    <para>Thus it measures not the number of times the data cache
+    is accessed, but the number of times a data cache miss could
+    occur.</para>
+  </listitem>
+
+</itemizedlist>
+
+<para>If you are interested in simulating a cache with different
+properties, it is not particularly hard to write your own cache
+simulator, or to modify the existing ones in
+<computeroutput>cg_sim.c</computeroutput>. We'd be
+interested to hear from anyone who does.</para>
+
+</sect2>
+
+
+<sect2 id="branch-sim" xreflabel="Branch simulation specifics">
+<title>Branch simulation specifics</title>
+
+<para>Cachegrind simulates branch predictors intended to be
+typical of mainstream desktop/server processors of around 2004.</para>
+
+<para>Conditional branches are predicted using an array of 16384 2-bit
+saturating counters.  The array index used for a branch instruction is
+computed partly from the low-order bits of the branch instruction's
+address and partly using the taken/not-taken behaviour of the last few
+conditional branches.  As a result the predictions for any specific
+branch depend both on its own history and the behaviour of previous
+branches.  This is a standard technique for improving prediction
+accuracy.</para>
+
+<para>For indirect branches (that is, jumps to unknown destinations)
+Cachegrind uses a simple branch target address predictor.  Targets are
+predicted using an array of 512 entries indexed by the low order 9
+bits of the branch instruction's address.  Each branch is predicted to
+jump to the same address it did last time.  Any other behaviour causes
+a mispredict.</para>
+
+<para>More recent processors have better branch predictors, in
+particular better indirect branch predictors.  Cachegrind's predictor
+design is deliberately conservative so as to be representative of the
+large installed base of processors which pre-date widespread
+deployment of more sophisticated indirect branch predictors.  In
+particular, late model Pentium 4s (Prescott), Pentium M, Core and Core
+2 have more sophisticated indirect branch predictors than modelled by
+Cachegrind.  </para>
+
+<para>Cachegrind does not simulate a return stack predictor.  It
+assumes that processors perfectly predict function return addresses,
+an assumption which is probably close to being true.</para>
+
+<para>See Hennessy and Patterson's classic text "Computer
+Architecture: A Quantitative Approach", 4th edition (2007), Section
+2.3 (pages 80-89) for background on modern branch predictors.</para>
+
+</sect2>
+
+
+</sect1>
+
+
+
+<sect1 id="cg-manual.profile" xreflabel="Profiling programs">
+<title>Profiling programs</title>
+
+<para>To gather cache profiling information about the program
+<computeroutput>ls -l</computeroutput>, invoke Cachegrind like
+this:</para>
+
+<programlisting><![CDATA[
+valgrind --tool=cachegrind ls -l]]></programlisting>
+
+<para>The program will execute (slowly).  Upon completion,
+summary statistics that look like this will be printed:</para>
+
+<programlisting><![CDATA[
+==31751== I   refs:      27,742,716
+==31751== I1  misses:           276
+==31751== L2  misses:           275
+==31751== I1  miss rate:        0.0%
+==31751== L2i miss rate:        0.0%
+==31751== 
+==31751== D   refs:      15,430,290  (10,955,517 rd + 4,474,773 wr)
+==31751== D1  misses:        41,185  (    21,905 rd +    19,280 wr)
+==31751== L2  misses:        23,085  (     3,987 rd +    19,098 wr)
+==31751== D1  miss rate:        0.2% (       0.1%   +       0.4%)
+==31751== L2d miss rate:        0.1% (       0.0%   +       0.4%)
+==31751== 
+==31751== L2 misses:         23,360  (     4,262 rd +    19,098 wr)
+==31751== L2 miss rate:         0.0% (       0.0%   +       0.4%)]]></programlisting>
+
+<para>Cache accesses for instruction fetches are summarised
+first, giving the number of fetches made (this is the number of
+instructions executed, which can be useful to know in its own
+right), the number of I1 misses, and the number of L2 instruction
+(<computeroutput>L2i</computeroutput>) misses.</para>
+
+<para>Cache accesses for data follow. The information is similar
+to that of the instruction fetches, except that the values are
+also shown split between reads and writes (note each row's
+<computeroutput>rd</computeroutput> and
+<computeroutput>wr</computeroutput> values add up to the row's
+total).</para>
+
+<para>Combined instruction and data figures for the L2 cache
+follow that.</para>
+
+
+
+<sect2 id="cg-manual.outputfile" xreflabel="Output file">
+<title>Output file</title>
+
+<para>As well as printing summary information, Cachegrind also
+writes line-by-line cache profiling information to a user-specified
+file.  By default this file is named
+<computeroutput>cachegrind.out.&lt;pid&gt;</computeroutput>.  This file
+is human-readable, but is intended to be interpreted by the accompanying
+program cg_annotate, described in the next section.</para>
+
+<para>Things to note about the
+<computeroutput>cachegrind.out.&lt;pid&gt;</computeroutput>
+file:</para>
+
+<itemizedlist>
+  <listitem>
+    <para>It is written every time Cachegrind is run, and will
+    overwrite any existing
+    <computeroutput>cachegrind.out.&lt;pid&gt;</computeroutput>
+    in the current directory (but that won't happen very often
+    because it takes some time for process ids to be
+    recycled).</para>
+  </listitem>
+  <listitem>
+    <para>To use an output file name other than the default
+    <computeroutput>cachegrind.out</computeroutput>,
+    use the <computeroutput>--cachegrind-out-file</computeroutput>
+    switch.</para>
+  </listitem>
+  <listitem>
+    <para>It can be big: <computeroutput>ls -l</computeroutput>
+    generates a file of about 350KB.  Browsing a few files and
+    web pages with a Konqueror built with full debugging
+    information generates a file of around 15 MB.</para>
+  </listitem>
+</itemizedlist>
+
+<para>The default <computeroutput>.&lt;pid&gt;</computeroutput> suffix
+on the output file name serves two purposes.  Firstly, it means you 
+don't have to rename old log files that you don't want to overwrite.  
+Secondly, and more importantly, it allows correct profiling with the
+<computeroutput>--trace-children=yes</computeroutput> option of
+programs that spawn child processes.</para>
+
+</sect2>
+
+
+
+<sect2 id="cg-manual.cgopts" xreflabel="Cachegrind options">
+<title>Cachegrind options</title>
+
+<!-- start of xi:include in the manpage -->
+<para id="cg.opts.para">Using command line options, you can 
+manually specify the I1/D1/L2 cache
+configuration to simulate.  For each cache, you can specify the
+size, associativity and line size.  The size and line size
+are measured in bytes.  The three items
+must be comma-separated, but with no spaces, eg:
+<literallayout>    valgrind --tool=cachegrind --I1=65535,2,64</literallayout>
+
+You can specify one, two or three of the I1/D1/L2 caches.  Any level not
+manually specified will be simulated using the configuration found in
+the normal way (via the CPUID instruction for automagic cache
+configuration, or failing that, via defaults).</para>
+
+<para>Cache-simulation specific options are:</para>
+
+<variablelist id="cg.opts.list">
+
+  <varlistentry id="opt.I1" xreflabel="--I1">
+    <term>
+      <option><![CDATA[--I1=<size>,<associativity>,<line size> ]]></option>
+    </term>
+    <listitem>
+      <para>Specify the size, associativity and line size of the level 1
+      instruction cache.  </para>
+    </listitem>
+  </varlistentry>
+
+  <varlistentry id="opt.D1" xreflabel="--D1">
+    <term>
+      <option><![CDATA[--D1=<size>,<associativity>,<line size> ]]></option>
+    </term>
+    <listitem>
+      <para>Specify the size, associativity and line size of the level 1
+      data cache.</para>
+    </listitem>
+  </varlistentry>
+
+  <varlistentry id="opt.L2" xreflabel="--L2">
+    <term>
+      <option><![CDATA[--L2=<size>,<associativity>,<line size> ]]></option>
+    </term>
+    <listitem>
+      <para>Specify the size, associativity and line size of the level 2
+      cache.</para>
+    </listitem>
+  </varlistentry>
+
+  <varlistentry id="opt.cachegrind-out-file" xreflabel="--cachegrind-out-file">
+    <term>
+      <option><![CDATA[--cachegrind-out-file=<file> ]]></option>
+    </term>
+    <listitem>
+      <para>Write the profile data to 
+            <computeroutput>file</computeroutput> rather than to the default
+            output file,
+            <computeroutput>cachegrind.out.&lt;pid&gt;</computeroutput>.  The
+            <option>%p</option> and <option>%q</option> format specifiers
+            can be used to embed the process ID and/or the contents of an
+            environment variable in the name, as is the case for the core
+            option <option>--log-file</option>.  See <link
+            linkend="manual-core.basicopts">here</link> for details.
+      </para>
+    </listitem>
+  </varlistentry>
+
+  <varlistentry id="opt.cache-sim" xreflabel="--cache-sim">
+    <term>
+      <option><![CDATA[--cache-sim=no|yes [yes] ]]></option>
+    </term>
+    <listitem>
+      <para>Enables or disables collection of cache access and miss
+            counts.</para>
+    </listitem>
+  </varlistentry>
+
+  <varlistentry id="opt.branch-sim" xreflabel="--branch-sim">
+    <term>
+      <option><![CDATA[--branch-sim=no|yes [no] ]]></option>
+    </term>
+    <listitem>
+      <para>Enables or disables collection of branch instruction and
+            misprediction counts.  By default this is disabled as it
+            slows Cachegrind down by approximately 25%.  Note that you
+            cannot specify <computeroutput>--cache-sim=no</computeroutput>
+            and <computeroutput>--branch-sim=no</computeroutput>
+            together, as that would leave Cachegrind with no
+            information to collect.</para>
+    </listitem>
+  </varlistentry>
+
+</variablelist>
+<!-- end of xi:include in the manpage -->
+
+</sect2>
+
+
+  
+<sect2 id="cg-manual.annotate" xreflabel="Annotating C/C++ programs">
+<title>Annotating C/C++ programs</title>
+
+<para>Before using cg_annotate,
+it is worth widening your window to be at least 120-characters
+wide if possible, as the output lines can be quite long.</para>
+
+<para>To get a function-by-function summary, run <computeroutput>cg_annotate
+&lt;filename&gt;</computeroutput> on a Cachegrind output file.</para>
+
+<para>The output looks like this:</para>
+
+<programlisting><![CDATA[
+--------------------------------------------------------------------------------
+I1 cache:              65536 B, 64 B, 2-way associative
+D1 cache:              65536 B, 64 B, 2-way associative
+L2 cache:              262144 B, 64 B, 8-way associative
+Command:               concord vg_to_ucode.c
+Events recorded:       Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw
+Events shown:          Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw
+Event sort order:      Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw
+Threshold:             99%
+Chosen for annotation:
+Auto-annotation:       on
+
+--------------------------------------------------------------------------------
+Ir         I1mr I2mr Dr         D1mr   D2mr  Dw        D1mw   D2mw
+--------------------------------------------------------------------------------
+27,742,716  276  275 10,955,517 21,905 3,987 4,474,773 19,280 19,098  PROGRAM TOTALS
+
+--------------------------------------------------------------------------------
+Ir        I1mr I2mr Dr        D1mr  D2mr  Dw        D1mw   D2mw    file:function
+--------------------------------------------------------------------------------
+8,821,482    5    5 2,242,702 1,621    73 1,794,230      0      0  getc.c:_IO_getc
+5,222,023    4    4 2,276,334    16    12   875,959      1      1  concord.c:get_word
+2,649,248    2    2 1,344,810 7,326 1,385         .      .      .  vg_main.c:strcmp
+2,521,927    2    2   591,215     0     0   179,398      0      0  concord.c:hash
+2,242,740    2    2 1,046,612   568    22   448,548      0      0  ctype.c:tolower
+1,496,937    4    4   630,874 9,000 1,400   279,388      0      0  concord.c:insert
+  897,991   51   51   897,831    95    30        62      1      1  ???:???
+  598,068    1    1   299,034     0     0   149,517      0      0  ../sysdeps/generic/lockfile.c:__flockfile
+  598,068    0    0   299,034     0     0   149,517      0      0  ../sysdeps/generic/lockfile.c:__funlockfile
+  598,024    4    4   213,580    35    16   149,506      0      0  vg_clientmalloc.c:malloc
+  446,587    1    1   215,973 2,167   430   129,948 14,057 13,957  concord.c:add_existing
+  341,760    2    2   128,160     0     0   128,160      0      0  vg_clientmalloc.c:vg_trap_here_WRAPPER
+  320,782    4    4   150,711   276     0    56,027     53     53  concord.c:init_hash_table
+  298,998    1    1   106,785     0     0    64,071      1      1  concord.c:create
+  149,518    0    0   149,516     0     0         1      0      0  ???:tolower@@GLIBC_2.0
+  149,518    0    0   149,516     0     0         1      0      0  ???:fgetc@@GLIBC_2.0
+   95,983    4    4    38,031     0     0    34,409  3,152  3,150  concord.c:new_word_node
+   85,440    0    0    42,720     0     0    21,360      0      0  vg_clientmalloc.c:vg_bogus_epilogue]]></programlisting>
+
+
+<para>First up is a summary of the annotation options:</para>
+                    
+<itemizedlist>
+
+  <listitem>
+    <para>I1 cache, D1 cache, L2 cache: cache configuration.  So
+    you know the configuration with which these results were
+    obtained.</para>
+  </listitem>
+
+  <listitem>
+    <para>Command: the command line invocation of the program
+      under examination.</para>
+  </listitem>
+
+  <listitem>
+   <para>Events recorded: event abbreviations are:</para>
+   <itemizedlist>
+     <listitem>
+       <para><computeroutput>Ir</computeroutput>: I cache reads
+       (ie. instructions executed)</para>
+     </listitem>
+     <listitem>
+       <para><computeroutput>I1mr</computeroutput>: I1 cache read
+       misses</para>
+     </listitem>
+     <listitem>
+       <para><computeroutput>I2mr</computeroutput>: L2 cache
+       instruction read misses</para>
+     </listitem>
+     <listitem>
+       <para><computeroutput>Dr</computeroutput>: D cache reads
+       (ie. memory reads)</para>
+     </listitem>
+     <listitem>
+       <para><computeroutput>D1mr</computeroutput>: D1 cache read
+       misses</para>
+     </listitem>
+     <listitem>
+       <para><computeroutput>D2mr</computeroutput>: L2 cache data
+       read misses</para>
+     </listitem>
+     <listitem>
+       <para><computeroutput>Dw</computeroutput>: D cache writes
+       (ie. memory writes)</para>
+     </listitem>
+     <listitem>
+       <para><computeroutput>D1mw</computeroutput>: D1 cache write
+       misses</para>
+     </listitem>
+     <listitem>
+       <para><computeroutput>D2mw</computeroutput>: L2 cache data
+       write misses</para>
+     </listitem>
+     <listitem>
+       <para><computeroutput>Bc</computeroutput>: Conditional branches
+       executed</para>
+     </listitem>
+     <listitem>
+       <para><computeroutput>Bcm</computeroutput>: Conditional branches
+       mispredicted</para>
+     </listitem>
+     <listitem>
+       <para><computeroutput>Bi</computeroutput>: Indirect branches
+       executed</para>
+     </listitem>
+     <listitem>
+       <para><computeroutput>Bim</computeroutput>: Conditional branches
+       mispredicted</para>
+     </listitem>
+   </itemizedlist>
+
+   <para>Note that D1 total accesses is given by
+   <computeroutput>D1mr</computeroutput> +
+   <computeroutput>D1mw</computeroutput>, and that L2 total
+   accesses is given by <computeroutput>I2mr</computeroutput> +
+   <computeroutput>D2mr</computeroutput> +
+   <computeroutput>D2mw</computeroutput>.</para>
+ </listitem>
+
+ <listitem>
+   <para>Events shown: the events shown, which is a subset of the events
+   gathered.  This can be adjusted with the
+   <computeroutput>--show</computeroutput> option.</para>
+  </listitem>
+
+  <listitem>
+    <para>Event sort order: the sort order in which functions are
+    shown.  For example, in this case the functions are sorted
+    from highest <computeroutput>Ir</computeroutput> counts to
+    lowest.  If two functions have identical
+    <computeroutput>Ir</computeroutput> counts, they will then be
+    sorted by <computeroutput>I1mr</computeroutput> counts, and
+    so on.  This order can be adjusted with the
+    <computeroutput>--sort</computeroutput> option.</para>
+
+    <para>Note that this dictates the order the functions appear.
+    It is <command>not</command> the order in which the columns
+    appear; that is dictated by the "events shown" line (and can
+    be changed with the <computeroutput>--show</computeroutput>
+    option).</para>
+  </listitem>
+
+  <listitem>
+    <para>Threshold: cg_annotate
+    by default omits functions that cause very low counts
+    to avoid drowning you in information.  In this case,
+    cg_annotate shows summaries the functions that account for
+    99% of the <computeroutput>Ir</computeroutput> counts;
+    <computeroutput>Ir</computeroutput> is chosen as the
+    threshold event since it is the primary sort event.  The
+    threshold can be adjusted with the
+    <computeroutput>--threshold</computeroutput>
+    option.</para>
+  </listitem>
+
+  <listitem>
+    <para>Chosen for annotation: names of files specified
+    manually for annotation; in this case none.</para>
+  </listitem>
+
+  <listitem>
+    <para>Auto-annotation: whether auto-annotation was requested
+    via the <computeroutput>--auto=yes</computeroutput>
+    option. In this case no.</para>
+  </listitem>
+
+</itemizedlist>
+
+<para>Then follows summary statistics for the whole
+program. These are similar to the summary provided when running
+<computeroutput>valgrind --tool=cachegrind</computeroutput>.</para>
+  
+<para>Then follows function-by-function statistics. Each function
+is identified by a
+<computeroutput>file_name:function_name</computeroutput> pair. If
+a column contains only a dot it means the function never performs
+that event (eg. the third row shows that
+<computeroutput>strcmp()</computeroutput> contains no
+instructions that write to memory). The name
+<computeroutput>???</computeroutput> is used if the the file name
+and/or function name could not be determined from debugging
+information. If most of the entries have the form
+<computeroutput>???:???</computeroutput> the program probably
+wasn't compiled with <computeroutput>-g</computeroutput>.  If any
+code was invalidated (either due to self-modifying code or
+unloading of shared objects) its counts are aggregated into a
+single cost centre written as
+<computeroutput>(discarded):(discarded)</computeroutput>.</para>
+
+<para>It is worth noting that functions will come both from
+the profiled program (eg. <filename>concord.c</filename>)
+and from libraries (eg. <filename>getc.c</filename>)</para>
+
+<para>There are two ways to annotate source files -- by choosing
+them manually, or with the
+<computeroutput>--auto=yes</computeroutput> option. To do it
+manually, just specify the filenames as additional arguments to
+cg_annotate. For example, the
+output from running <filename>cg_annotate &lt;filename&gt;
+concord.c</filename> for our example produces the same output as above
+followed by an annotated version of <filename>concord.c</filename>, a
+section of which looks like:</para>
+
+<programlisting><![CDATA[
+--------------------------------------------------------------------------------
+-- User-annotated source: concord.c
+--------------------------------------------------------------------------------
+Ir        I1mr I2mr Dr      D1mr  D2mr  Dw      D1mw   D2mw
+
+[snip]
+
+        .    .    .       .     .     .       .      .      .  void init_hash_table(char *file_name, Word_Node *table[])
+        3    1    1       .     .     .       1      0      0  {
+        .    .    .       .     .     .       .      .      .      FILE *file_ptr;
+        .    .    .       .     .     .       .      .      .      Word_Info *data;
+        1    0    0       .     .     .       1      1      1      int line = 1, i;
+        .    .    .       .     .     .       .      .      .
+        5    0    0       .     .     .       3      0      0      data = (Word_Info *) create(sizeof(Word_Info));
+        .    .    .       .     .     .       .      .      .
+    4,991    0    0   1,995     0     0     998      0      0      for (i = 0; i < TABLE_SIZE; i++)
+    3,988    1    1   1,994     0     0     997     53     52          table[i] = NULL;
+        .    .    .       .     .     .       .      .      .
+        .    .    .       .     .     .       .      .      .      /* Open file, check it. */
+        6    0    0       1     0     0       4      0      0      file_ptr = fopen(file_name, "r");
+        2    0    0       1     0     0       .      .      .      if (!(file_ptr)) {
+        .    .    .       .     .     .       .      .      .          fprintf(stderr, "Couldn't open '%s'.\n", file_name);
+        1    1    1       .     .     .       .      .      .          exit(EXIT_FAILURE);
+        .    .    .       .     .     .       .      .      .      }
+        .    .    .       .     .     .       .      .      .
+  165,062    1    1  73,360     0     0  91,700      0      0      while ((line = get_word(data, line, file_ptr)) != EOF)
+  146,712    0    0  73,356     0     0  73,356      0      0          insert(data->;word, data->line, table);
+        .    .    .       .     .     .       .      .      .
+        4    0    0       1     0     0       2      0      0      free(data);
+        4    0    0       1     0     0       2      0      0      fclose(file_ptr);
+        3    0    0       2     0     0       .      .      .  }]]></programlisting>
+
+<para>(Although column widths are automatically minimised, a wide
+terminal is clearly useful.)</para>
+  
+<para>Each source file is clearly marked
+(<computeroutput>User-annotated source</computeroutput>) as
+having been chosen manually for annotation.  If the file was
+found in one of the directories specified with the
+<computeroutput>-I / --include</computeroutput> option, the directory
+and file are both given.</para>
+
+<para>Each line is annotated with its event counts.  Events not
+applicable for a line are represented by a dot.  This is useful
+for distinguishing between an event which cannot happen, and one
+which can but did not.</para>
+
+<para>Sometimes only a small section of a source file is
+executed.  To minimise uninteresting output, Cachegrind only shows
+annotated lines and lines within a small distance of annotated
+lines.  Gaps are marked with the line numbers so you know which
+part of a file the shown code comes from, eg:</para>
+
+<programlisting><![CDATA[
+(figures and code for line 704)
+-- line 704 ----------------------------------------
+-- line 878 ----------------------------------------
+(figures and code for line 878)]]></programlisting>
+
+<para>The amount of context to show around annotated lines is
+controlled by the <computeroutput>--context</computeroutput>
+option.</para>
+
+<para>To get automatic annotation, run
+<computeroutput>cg_annotate &lt;filename&gt; --auto=yes</computeroutput>.
+cg_annotate will automatically annotate every source file it can
+find that is mentioned in the function-by-function summary.
+Therefore, the files chosen for auto-annotation are affected by
+the <computeroutput>--sort</computeroutput> and
+<computeroutput>--threshold</computeroutput> options.  Each
+source file is clearly marked (<computeroutput>Auto-annotated
+source</computeroutput>) as being chosen automatically.  Any
+files that could not be found are mentioned at the end of the
+output, eg:</para>
+
+<programlisting><![CDATA[
+------------------------------------------------------------------
+The following files chosen for auto-annotation could not be found:
+------------------------------------------------------------------
+  getc.c
+  ctype.c
+  ../sysdeps/generic/lockfile.c]]></programlisting>
+
+<para>This is quite common for library files, since libraries are
+usually compiled with debugging information, but the source files
+are often not present on a system.  If a file is chosen for
+annotation <command>both</command> manually and automatically, it
+is marked as <computeroutput>User-annotated
+source</computeroutput>. Use the <computeroutput>-I /
+--include</computeroutput> option to tell Valgrind where to look
+for source files if the filenames found from the debugging
+information aren't specific enough.</para>
+
+<para>Beware that cg_annotate can take some time to digest large
+<computeroutput>cachegrind.out.&lt;pid&gt;</computeroutput> files,
+e.g. 30 seconds or more.  Also beware that auto-annotation can
+produce a lot of output if your program is large!</para>
+
+</sect2>
+
+
+<sect2 id="cg-manual.assembler" xreflabel="Annotating assembler programs">
+<title>Annotating assembly code programs</title>
+
+<para>Valgrind can annotate assembly code programs too, or annotate
+the assembly code generated for your C program.  Sometimes this is
+useful for understanding what is really happening when an
+interesting line of C code is translated into multiple
+instructions.</para>
+
+<para>To do this, you just need to assemble your
+<computeroutput>.s</computeroutput> files with assembly-level debug
+information.  You can use <computeroutput>gcc
+-S</computeroutput> to compile C/C++ programs to assembly code, and then
+<computeroutput>gcc -g</computeroutput> on the assembly code files to
+achieve this.  You can then profile and annotate the assembly code source
+files in the same way as C/C++ source files.</para>
+
+</sect2>
+
+<sect2 id="ms-manual.forkingprograms" xreflabel="Forking Programs">
+<title>Forking Programs</title>
+<para>If your program forks, the child will inherit all the profiling data that
+has been gathered for the parent.</para>
+
+<para>If the output file format string (controlled by
+<option>--cachegrind-out-file</option>) does not contain <option>%p</option>,
+then the outputs from the parent and child will be intermingled in a single
+output file, which will almost certainly make it unreadable by
+cg_annotate.</para>
+</sect2>
+
+
+</sect1>
+
+
+<sect1 id="cg-manual.annopts" xreflabel="cg_annotate options">
+<title>cg_annotate options</title>
+
+<itemizedlist>
+
+  <listitem>
+    <para><computeroutput>-h, --help</computeroutput></para>
+    <para><computeroutput>-v, --version</computeroutput></para>
+    <para>Help and version, as usual.</para>
+  </listitem>
+
+  <listitem id="sort">
+    <para><computeroutput>--sort=A,B,C</computeroutput> [default:
+    order in
+    <computeroutput>cachegrind.out.&lt;pid&gt;</computeroutput>]</para>
+    <para>Specifies the events upon which the sorting of the
+    function-by-function entries will be based.  Useful if you
+    want to concentrate on eg. I cache misses
+    (<computeroutput>--sort=I1mr,I2mr</computeroutput>), or D
+    cache misses
+    (<computeroutput>--sort=D1mr,D2mr</computeroutput>), or L2
+    misses
+    (<computeroutput>--sort=D2mr,I2mr</computeroutput>).</para>
+  </listitem>
+
+  <listitem id="show">
+    <para><computeroutput>--show=A,B,C</computeroutput> [default:
+    all, using order in
+    <computeroutput>cachegrind.out.&lt;pid&gt;</computeroutput>]</para>
+    <para>Specifies which events to show (and the column
+    order). Default is to use all present in the
+    <computeroutput>cachegrind.out.&lt;pid&gt;</computeroutput> file (and
+    use the order in the file).</para>
+  </listitem>
+
+  <listitem id="threshold">
+    <para><computeroutput>--threshold=X</computeroutput>
+    [default: 99%]</para>
+    <para>Sets the threshold for the function-by-function
+    summary.  Functions are shown that account for more than X%
+    of the primary sort event.  If auto-annotating, also affects
+    which files are annotated.</para>
+      
+    <para>Note: thresholds can be set for more than one of the
+    events by appending any events for the
+    <computeroutput>--sort</computeroutput> option with a colon
+    and a number (no spaces, though).  E.g. if you want to see
+    the functions that cover 99% of L2 read misses and 99% of L2
+    write misses, use this option:</para>
+    <para><computeroutput>--sort=D2mr:99,D2mw:99</computeroutput></para>
+  </listitem>
+
+  <listitem id="auto">
+    <para><computeroutput>--auto=no</computeroutput> [default]</para>
+    <para><computeroutput>--auto=yes</computeroutput></para>
+    <para>When enabled, automatically annotates every file that
+    is mentioned in the function-by-function summary that can be
+    found.  Also gives a list of those that couldn't be found.</para>
+  </listitem>
+
+  <listitem id="context">
+    <para><computeroutput>--context=N</computeroutput> [default:
+    8]</para>
+    <para>Print N lines of context before and after each
+    annotated line.  Avoids printing large sections of source
+    files that were not executed.  Use a large number
+    (eg. 10,000) to show all source lines.</para>
+  </listitem>
+
+  <listitem id="include">
+    <para><computeroutput>-I&lt;dir&gt;,
+      --include=&lt;dir&gt;</computeroutput> [default: empty
+      string]</para>
+    <para>Adds a directory to the list in which to search for
+    files.  Multiple -I/--include options can be given to add
+    multiple directories.</para>
+  </listitem>
+
+</itemizedlist>
+  
+
+
+<sect2 id="cg-manual.annopts.warnings" xreflabel="Warnings">
+<title>Warnings</title>
+
+<para>There are a couple of situations in which
+cg_annotate issues warnings.</para>
+
+<itemizedlist>
+  <listitem>
+    <para>If a source file is more recent than the
+    <computeroutput>cachegrind.out.&lt;pid&gt;</computeroutput> file.
+    This is because the information in
+    <computeroutput>cachegrind.out.&lt;pid&gt;</computeroutput> is only
+    recorded with line numbers, so if the line numbers change at
+    all in the source (eg.  lines added, deleted, swapped), any
+    annotations will be incorrect.</para>
+  </listitem>
+  <listitem>
+    <para>If information is recorded about line numbers past the
+    end of a file.  This can be caused by the above problem,
+    ie. shortening the source file while using an old
+    <computeroutput>cachegrind.out.&lt;pid&gt;</computeroutput> file.  If
+    this happens, the figures for the bogus lines are printed
+    anyway (clearly marked as bogus) in case they are
+    important.</para>
+  </listitem>
+</itemizedlist>
+
+</sect2>
+
+
+
+<sect2 id="cg-manual.annopts.things-to-watch-out-for"
+       xreflabel="Things to watch out for">
+<title>Things to watch out for</title>
+
+<para>Some odd things that can occur during annotation:</para>
+
+<itemizedlist>
+  <listitem>
+    <para>If annotating at the assembler level, you might see
+    something like this:</para>
+<programlisting><![CDATA[
+      1    0    0  .    .    .  .    .    .          leal -12(%ebp),%eax
+      1    0    0  .    .    .  1    0    0          movl %eax,84(%ebx)
+      2    0    0  0    0    0  1    0    0          movl $1,-20(%ebp)
+      .    .    .  .    .    .  .    .    .          .align 4,0x90
+      1    0    0  .    .    .  .    .    .          movl $.LnrB,%eax
+      1    0    0  .    .    .  1    0    0          movl %eax,-16(%ebp)]]></programlisting>
+
+    <para>How can the third instruction be executed twice when
+    the others are executed only once?  As it turns out, it
+    isn't.  Here's a dump of the executable, using
+    <computeroutput>objdump -d</computeroutput>:</para>
+<programlisting><![CDATA[
+      8048f25:       8d 45 f4                lea    0xfffffff4(%ebp),%eax
+      8048f28:       89 43 54                mov    %eax,0x54(%ebx)
+      8048f2b:       c7 45 ec 01 00 00 00    movl   $0x1,0xffffffec(%ebp)
+      8048f32:       89 f6                   mov    %esi,%esi
+      8048f34:       b8 08 8b 07 08          mov    $0x8078b08,%eax
+      8048f39:       89 45 f0                mov    %eax,0xfffffff0(%ebp)]]></programlisting>
+
+    <para>Notice the extra <computeroutput>mov
+    %esi,%esi</computeroutput> instruction.  Where did this come
+    from?  The GNU assembler inserted it to serve as the two
+    bytes of padding needed to align the <computeroutput>movl
+    $.LnrB,%eax</computeroutput> instruction on a four-byte
+    boundary, but pretended it didn't exist when adding debug
+    information.  Thus when Valgrind reads the debug info it
+    thinks that the <computeroutput>movl
+    $0x1,0xffffffec(%ebp)</computeroutput> instruction covers the
+    address range 0x8048f2b--0x804833 by itself, and attributes
+    the counts for the <computeroutput>mov
+    %esi,%esi</computeroutput> to it.</para>
+  </listitem>
+
+  <listitem>
+    <para>Inlined functions can cause strange results in the
+    function-by-function summary.  If a function
+    <computeroutput>inline_me()</computeroutput> is defined in
+    <filename>foo.h</filename> and inlined in the functions
+    <computeroutput>f1()</computeroutput>,
+    <computeroutput>f2()</computeroutput> and
+    <computeroutput>f3()</computeroutput> in
+    <filename>bar.c</filename>, there will not be a
+    <computeroutput>foo.h:inline_me()</computeroutput> function
+    entry.  Instead, there will be separate function entries for
+    each inlining site, ie.
+    <computeroutput>foo.h:f1()</computeroutput>,
+    <computeroutput>foo.h:f2()</computeroutput> and
+    <computeroutput>foo.h:f3()</computeroutput>.  To find the
+    total counts for
+    <computeroutput>foo.h:inline_me()</computeroutput>, add up
+    the counts from each entry.</para>
+
+    <para>The reason for this is that although the debug info
+    output by gcc indicates the switch from
+    <filename>bar.c</filename> to <filename>foo.h</filename>, it
+    doesn't indicate the name of the function in
+    <filename>foo.h</filename>, so Valgrind keeps using the old
+    one.</para>
+  </listitem>
+
+  <listitem>
+    <para>Sometimes, the same filename might be represented with
+    a relative name and with an absolute name in different parts
+    of the debug info, eg:
+    <filename>/home/user/proj/proj.h</filename> and
+    <filename>../proj.h</filename>.  In this case, if you use
+    auto-annotation, the file will be annotated twice with the
+    counts split between the two.</para>
+  </listitem>
+
+  <listitem>
+    <para>Files with more than 65,535 lines cause difficulties
+    for the Stabs-format debug info reader.  This is because the line
+    number in the <computeroutput>struct nlist</computeroutput>
+    defined in <filename>a.out.h</filename> under Linux is only a
+    16-bit value.  Valgrind can handle some files with more than
+    65,535 lines correctly by making some guesses to identify
+    line number overflows.  But some cases are beyond it, in
+    which case you'll get a warning message explaining that
+    annotations for the file might be incorrect.</para>
+    
+    <para>If you are using gcc 3.1 or later, this is most likely
+    irrelevant, since gcc switched to using the more modern DWARF2 
+    format by default at version 3.1.  DWARF2 does not have any such
+    limitations on line numbers.</para>
+  </listitem>
+
+  <listitem>
+    <para>If you compile some files with
+    <computeroutput>-g</computeroutput> and some without, some
+    events that take place in a file without debug info could be
+    attributed to the last line of a file with debug info
+    (whichever one gets placed before the non-debug-info file in
+    the executable).</para>
+  </listitem>
+
+</itemizedlist>
+
+<para>This list looks long, but these cases should be fairly
+rare.</para>
+
+</sect2>
+
+
+
+<sect2 id="cg-manual.annopts.accuracy" xreflabel="Accuracy">
+<title>Accuracy</title>
+
+<para>Valgrind's cache profiling has a number of
+shortcomings:</para>
+
+<itemizedlist>
+  <listitem>
+    <para>It doesn't account for kernel activity -- the effect of
+    system calls on the cache contents is ignored.</para>
+  </listitem>
+
+  <listitem>
+    <para>It doesn't account for other process activity.
+    This is probably desirable when considering a single
+    program.</para>
+  </listitem>
+
+  <listitem>
+    <para>It doesn't account for virtual-to-physical address
+    mappings.  Hence the simulation is not a true
+    representation of what's happening in the
+    cache.  Most caches are physically indexed, but Cachegrind
+    simulates caches using virtual addresses.</para>
+  </listitem>
+
+  <listitem>
+    <para>It doesn't account for cache misses not visible at the
+    instruction level, eg. those arising from TLB misses, or
+    speculative execution.</para>
+  </listitem>
+
+  <listitem>
+    <para>Valgrind will schedule
+    threads differently from how they would be when running natively.
+    This could warp the results for threaded programs.</para>
+  </listitem>
+
+  <listitem>
+    <para>The x86/amd64 instructions <computeroutput>bts</computeroutput>,
+    <computeroutput>btr</computeroutput> and
+    <computeroutput>btc</computeroutput> will incorrectly be
+    counted as doing a data read if both the arguments are
+    registers, eg:</para>
+<programlisting><![CDATA[
+    btsl %eax, %edx]]></programlisting>
+
+    <para>This should only happen rarely.</para>
+  </listitem>
+
+  <listitem>
+    <para>x86/amd64 FPU instructions with data sizes of 28 and 108 bytes
+    (e.g.  <computeroutput>fsave</computeroutput>) are treated as
+    though they only access 16 bytes.  These instructions seem to
+    be rare so hopefully this won't affect accuracy much.</para>
+  </listitem>
+
+</itemizedlist>
+
+<para>Another thing worth noting is that results are very sensitive.
+Changing the size of the the executable being profiled, or the sizes
+of any of the shared libraries it uses, or even the length of their
+file names, can perturb the results.  Variations will be small, but
+don't expect perfectly repeatable results if your program changes at
+all.</para>
+
+<para>More recent GNU/Linux distributions do address space
+randomisation, in which identical runs of the same program have their
+shared libraries loaded at different locations, as a security measure.
+This also perturbs the results.</para>
+
+<para>While these factors mean you shouldn't trust the results to
+be super-accurate, hopefully they should be close enough to be
+useful.</para>
+
+</sect2>
+
+</sect1>
+
+
+
+<sect1 id="cg-manual.cg_merge" xreflabel="cg_merge">
+<title>Merging profiles with cg_merge</title>
+
+<para>
+cg_merge is a simple program which
+reads multiple profile files, as created by cachegrind, merges them
+together, and writes the results into another file in the same format.
+You can then examine the merged results using
+<computeroutput>cg_annotate &lt;filename&gt;</computeroutput>, as
+described above.  The merging functionality might be useful if you
+want to aggregate costs over multiple runs of the same program, or
+from a single parallel run with multiple instances of the same
+program.</para>
+
+<para>
+cg_merge is invoked as follows:
+</para>
+
+<programlisting><![CDATA[
+cg_merge -o outputfile file1 file2 file3 ...]]></programlisting>
+
+<para>
+It reads and checks <computeroutput>file1</computeroutput>, then read
+and checks <computeroutput>file2</computeroutput> and merges it into
+the running totals, then the same with
+<computeroutput>file3</computeroutput>, etc.  The final results are
+written to <computeroutput>outputfile</computeroutput>, or to standard
+out if no output file is specified.</para>
+
+<para>
+Costs are summed on a per-function, per-line and per-instruction
+basis.  Because of this, the order in which the input files does not
+matter, although you should take care to only mention each file once,
+since any file mentioned twice will be added in twice.</para>
+
+<para>
+cg_merge does not attempt to check
+that the input files come from runs of the same executable.  It will
+happily merge together profile files from completely unrelated
+programs.  It does however check that the
+<computeroutput>Events:</computeroutput> lines of all the inputs are
+identical, so as to ensure that the addition of costs makes sense.
+For example, it would be nonsensical for it to add a number indicating
+D1 read references to a number from a different file indicating L2
+write misses.</para>
+
+<para>
+A number of other syntax and sanity checks are done whilst reading the
+inputs.  cg_merge will stop and
+attempt to print a helpful error message if any of the input files
+fail these checks.</para>
+
+</sect1>
+
+
+<sect1 id="cg-manual.acting-on"
+       xreflabel="Acting on Cachegrind's information">
+<title>Acting on Cachegrind's information</title>
+<para>
+So, you've managed to profile your program with Cachegrind.  Now what?
+What's the best way to actually act on the information it provides to speed
+up your program?  Here are some rules of thumb that we have found to be
+useful.</para>
+
+<para>
+First of all, the global hit/miss rate numbers are not that useful.  If you
+have multiple programs or multiple runs of a program, comparing the numbers
+might identify if any are outliers and worthy of closer investigation.
+Otherwise, they're not enough to act on.</para>
+
+<para>
+The line-by-line source code annotations are much more useful.  In our
+experience, the best place to start is by looking at the
+<computeroutput>Ir</computeroutput> numbers.  They simply measure how many
+instructions were executed for each line, and don't include any cache
+information, but they can still be very useful for identifying
+bottlenecks.</para>
+
+<para>
+After that, we have found that L2 misses are typically a much bigger source
+of slow-downs than L1 misses.  So it's worth looking for any snippets of
+code that cause a high proportion of the L2 misses.  If you find any, it's
+still not always easy to work out how to improve things.  You need to have a
+reasonable understanding of how caches work, the principles of locality, and
+your program's data access patterns.  Improving things may require
+redesigning a data structure, for example.</para>
+
+<para>
+In short, Cachegrind can tell you where some of the bottlenecks in your code
+are, but it can't tell you how to fix them.  You have to work that out for
+yourself.  But at least you have the information!
+</para>
+
+</sect1>
+
+<sect1 id="cg-manual.impl-details"
+       xreflabel="Implementation details">
+<title>Implementation details</title>
+<para>
+This section talks about details you don't need to know about in order to
+use Cachegrind, but may be of interest to some people.
+</para>
+
+<sect2 id="cg-manual.impl-details.how-cg-works"
+       xreflabel="How Cachegrind works">
+<title>How Cachegrind works</title>
+<para>The best reference for understanding how Cachegrind works is chapter 3 of
+"Dynamic Binary Analysis and Instrumentation", by Nicholas Nethercote.  It
+is available on the <ulink url="&vg-pubs;">Valgrind publications
+page</ulink>.</para>
+</sect2>
+
+<sect2 id="cg-manual.impl-details.file-format"
+       xreflabel="Cachegrind output file format">
+<title>Cachegrind output file format</title>
+<para>The file format is fairly straightforward, basically giving the
+cost centre for every line, grouped by files and
+functions.  Total counts (eg. total cache accesses, total L1
+misses) are calculated when traversing this structure rather than
+during execution, to save time; the cache simulation functions
+are called so often that even one or two extra adds can make a
+sizeable difference.</para>
+
+<para>The file format:</para>
+<programlisting><![CDATA[
+file         ::= desc_line* cmd_line events_line data_line+ summary_line
+desc_line    ::= "desc:" ws? non_nl_string
+cmd_line     ::= "cmd:" ws? cmd
+events_line  ::= "events:" ws? (event ws)+
+data_line    ::= file_line | fn_line | count_line
+file_line    ::= "fl=" filename
+fn_line      ::= "fn=" fn_name
+count_line   ::= line_num ws? (count ws)+
+summary_line ::= "summary:" ws? (count ws)+
+count        ::= num | "."]]></programlisting>
+
+<para>Where:</para>
+<itemizedlist>
+  <listitem>
+    <para><computeroutput>non_nl_string</computeroutput> is any
+    string not containing a newline.</para>
+  </listitem>
+  <listitem>
+    <para><computeroutput>cmd</computeroutput> is a string holding the
+    command line of the profiled program.</para>
+  </listitem>
+  <listitem>
+    <para><computeroutput>event</computeroutput> is a string containing
+    no whitespace.</para>
+  </listitem>
+  <listitem>
+    <para><computeroutput>filename</computeroutput> and
+    <computeroutput>fn_name</computeroutput> are strings.</para>
+  </listitem>
+  <listitem>
+    <para><computeroutput>num</computeroutput> and
+    <computeroutput>line_num</computeroutput> are decimal
+    numbers.</para>
+  </listitem>
+  <listitem>
+    <para><computeroutput>ws</computeroutput> is whitespace.</para>
+  </listitem>
+</itemizedlist>
+
+<para>The contents of the "desc:" lines are printed out at the top
+of the summary.  This is a generic way of providing simulation
+specific information, eg. for giving the cache configuration for
+cache simulation.</para>
+
+<para>More than one line of info can be presented for each file/fn/line number.
+In such cases, the counts for the named events will be accumulated.</para>
+
+<para>Counts can be "." to represent zero.  This makes the files easier for
+humans to read.</para>
+
+<para>The number of counts in each
+<computeroutput>line</computeroutput> and the
+<computeroutput>summary_line</computeroutput> should not exceed
+the number of events in the
+<computeroutput>event_line</computeroutput>.  If the number in
+each <computeroutput>line</computeroutput> is less, cg_annotate
+treats those missing as though they were a "." entry.  This saves space.
+</para>
+
+<para>A <computeroutput>file_line</computeroutput> changes the
+current file name.  A <computeroutput>fn_line</computeroutput>
+changes the current function name.  A
+<computeroutput>count_line</computeroutput> contains counts that
+pertain to the current filename/fn_name.  A "fn="
+<computeroutput>file_line</computeroutput> and a
+<computeroutput>fn_line</computeroutput> must appear before any
+<computeroutput>count_line</computeroutput>s to give the context
+of the first <computeroutput>count_line</computeroutput>s.</para>
+
+<para>Each <computeroutput>file_line</computeroutput> will normally be
+immediately followed by a <computeroutput>fn_line</computeroutput>.  But it
+doesn't have to be.</para>
+
+
+</sect2>
+
+</sect1>
+</chapter>
diff --git a/cachegrind/docs/Makefile b/cachegrind/docs/Makefile
new file mode 100644
index 0000000..8325d8e
--- /dev/null
+++ b/cachegrind/docs/Makefile
@@ -0,0 +1,341 @@
+# Makefile.in generated by automake 1.10.1 from Makefile.am.
+# cachegrind/docs/Makefile.  Generated from Makefile.in by configure.
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008  Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+
+
+pkgdatadir = $(datadir)/valgrind
+pkglibdir = $(libdir)/valgrind
+pkgincludedir = $(includedir)/valgrind
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = x86_64-unknown-linux-gnu
+host_triplet = x86_64-unknown-linux-gnu
+subdir = cachegrind/docs
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+SOURCES =
+DIST_SOURCES =
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = ${SHELL} /home/steph/compile/valgrind/missing --run aclocal-1.10
+AMTAR = ${SHELL} /home/steph/compile/valgrind/missing --run tar
+AR = /usr/bin/ar
+AUTOCONF = ${SHELL} /home/steph/compile/valgrind/missing --run autoconf
+AUTOHEADER = ${SHELL} /home/steph/compile/valgrind/missing --run autoheader
+AUTOMAKE = ${SHELL} /home/steph/compile/valgrind/missing --run automake-1.10
+AWK = gawk
+BOOST_CFLAGS = 
+BOOST_LIBS = -lboost_thread-mt -m64
+CC = gcc
+CCAS = gcc
+CCASDEPMODE = depmode=gcc3
+CCASFLAGS = -Wno-long-long
+CCDEPMODE = depmode=gcc3
+CFLAGS = -Wno-long-long -Wno-pointer-sign -Wdeclaration-after-statement -fno-stack-protector
+CPP = gcc -E
+CPPFLAGS = 
+CXX = g++
+CXXDEPMODE = depmode=gcc3
+CXXFLAGS = -g -O2
+CYGPATH_W = echo
+DEFAULT_SUPP = exp-ptrcheck.supp xfree-3.supp xfree-4.supp glibc-2.X-drd.supp glibc-2.34567-NPTL-helgrind.supp glibc-2.X.supp 
+DEFS = -DHAVE_CONFIG_H
+DEPDIR = .deps
+DIFF = diff -u
+DISTCHECK_CONFIGURE_FLAGS = --with-vex=$(top_srcdir)/VEX
+ECHO_C = 
+ECHO_N = -n
+ECHO_T = 
+EGREP = /bin/grep -E
+EXEEXT = 
+FLAG_FNO_STACK_PROTECTOR = -fno-stack-protector
+FLAG_M32 = -m32
+FLAG_M64 = -m64
+FLAG_MAIX32 = 
+FLAG_MAIX64 = 
+FLAG_MMMX = -mmmx
+FLAG_MSSE = -msse
+FLAG_UNLIMITED_INLINE_UNIT_GROWTH = --param inline-unit-growth=900
+FLAG_WDECL_AFTER_STMT = -Wdeclaration-after-statement
+FLAG_W_EXTRA = -Wextra
+FLAG_W_NO_FORMAT_ZERO_LENGTH = -Wno-format-zero-length
+GDB = /usr/bin/gdb
+GLIBC_VERSION = 2.8
+GREP = /bin/grep
+INSTALL = /usr/bin/install -c
+INSTALL_DATA = ${INSTALL} -m 644
+INSTALL_PROGRAM = ${INSTALL}
+INSTALL_SCRIPT = ${INSTALL}
+INSTALL_STRIP_PROGRAM = $(install_sh) -c -s
+LDFLAGS = 
+LIBOBJS = 
+LIBS = 
+LN_S = ln -s
+LTLIBOBJS = 
+MAINT = #
+MAKEINFO = ${SHELL} /home/steph/compile/valgrind/missing --run makeinfo
+MKDIR_P = /bin/mkdir -p
+MPI_CC = mpicc
+OBJEXT = o
+PACKAGE = valgrind
+PACKAGE_BUGREPORT = valgrind-users@lists.sourceforge.net
+PACKAGE_NAME = Valgrind
+PACKAGE_STRING = Valgrind 3.5.0.SVN
+PACKAGE_TARNAME = valgrind
+PACKAGE_VERSION = 3.5.0.SVN
+PATH_SEPARATOR = :
+PERL = /usr/bin/perl
+PKG_CONFIG = /usr/bin/pkg-config
+PREFERRED_STACK_BOUNDARY = 
+QTCORE_CFLAGS = -DQT_SHARED -I/usr/include/QtCore  
+QTCORE_LIBS = -lQtCore  
+RANLIB = ranlib
+SET_MAKE = 
+SHELL = /bin/sh
+STRIP = 
+VALT_LOAD_ADDRESS = 0x38000000
+VERSION = 3.5.0.SVN
+VEX_DIR = $(top_srcdir)/VEX
+VGCONF_ARCH_PRI = amd64
+VGCONF_OS = linux
+VGCONF_PLATFORM_PRI_CAPS = AMD64_LINUX
+VGCONF_PLATFORM_SEC_CAPS = 
+abs_builddir = /home/steph/compile/valgrind/cachegrind/docs
+abs_srcdir = /home/steph/compile/valgrind/cachegrind/docs
+abs_top_builddir = /home/steph/compile/valgrind
+abs_top_srcdir = /home/steph/compile/valgrind
+ac_ct_CC = gcc
+ac_ct_CXX = g++
+am__include = include
+am__leading_dot = .
+am__quote = 
+am__tar = ${AMTAR} chof - "$$tardir"
+am__untar = ${AMTAR} xf -
+bindir = ${exec_prefix}/bin
+build = x86_64-unknown-linux-gnu
+build_alias = 
+build_cpu = x86_64
+build_os = linux-gnu
+build_vendor = unknown
+builddir = .
+datadir = ${datarootdir}
+datarootdir = ${prefix}/share
+docdir = ${datarootdir}/doc/${PACKAGE_TARNAME}
+dvidir = ${docdir}
+exec_prefix = ${prefix}
+host = x86_64-unknown-linux-gnu
+host_alias = 
+host_cpu = x86_64
+host_os = linux-gnu
+host_vendor = unknown
+htmldir = ${docdir}
+includedir = ${prefix}/include
+infodir = ${datarootdir}/info
+install_sh = $(SHELL) /home/steph/compile/valgrind/install-sh
+libdir = ${exec_prefix}/lib
+libexecdir = ${exec_prefix}/libexec
+localedir = ${datarootdir}/locale
+localstatedir = ${prefix}/var
+mandir = ${datarootdir}/man
+mkdir_p = /bin/mkdir -p
+oldincludedir = /usr/include
+pdfdir = ${docdir}
+prefix = /usr/local
+program_transform_name = s,x,x,
+psdir = ${docdir}
+sbindir = ${exec_prefix}/sbin
+sharedstatedir = ${prefix}/com
+srcdir = .
+sysconfdir = ${prefix}/etc
+target_alias = 
+top_builddir = ../..
+top_srcdir = ../..
+EXTRA_DIST = cg-manual.xml
+all: all-am
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: # $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
+		&& exit 0; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign  cachegrind/docs/Makefile'; \
+	cd $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign  cachegrind/docs/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: # $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): # $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+tags: TAGS
+TAGS:
+
+ctags: CTAGS
+CTAGS:
+
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+	    fi; \
+	    cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+	  else \
+	    test -f $(distdir)/$$file \
+	    || cp -p $$d/$$file $(distdir)/$$file \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	  install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	  `test -z '$(STRIP)' || \
+	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic mostlyclean-am
+
+distclean: distclean-am
+	-rm -f Makefile
+distclean-am: clean-am distclean-generic
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-info: install-info-am
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-ps: install-ps-am
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: install-am install-strip
+
+.PHONY: all all-am check check-am clean clean-generic distclean \
+	distclean-generic distdir dvi dvi-am html html-am info info-am \
+	install install-am install-data install-data-am install-dvi \
+	install-dvi-am install-exec install-exec-am install-html \
+	install-html-am install-info install-info-am install-man \
+	install-pdf install-pdf-am install-ps install-ps-am \
+	install-strip installcheck installcheck-am installdirs \
+	maintainer-clean maintainer-clean-generic mostlyclean \
+	mostlyclean-generic pdf pdf-am ps ps-am uninstall uninstall-am
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/cachegrind/docs/Makefile.am b/cachegrind/docs/Makefile.am
new file mode 100644
index 0000000..8e61709
--- /dev/null
+++ b/cachegrind/docs/Makefile.am
@@ -0,0 +1 @@
+EXTRA_DIST = cg-manual.xml
diff --git a/cachegrind/docs/Makefile.in b/cachegrind/docs/Makefile.in
new file mode 100644
index 0000000..54da31e
--- /dev/null
+++ b/cachegrind/docs/Makefile.in
@@ -0,0 +1,341 @@
+# Makefile.in generated by automake 1.10.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008  Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = cachegrind/docs
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+SOURCES =
+DIST_SOURCES =
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BOOST_CFLAGS = @BOOST_CFLAGS@
+BOOST_LIBS = @BOOST_LIBS@
+CC = @CC@
+CCAS = @CCAS@
+CCASDEPMODE = @CCASDEPMODE@
+CCASFLAGS = @CCASFLAGS@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFAULT_SUPP = @DEFAULT_SUPP@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DIFF = @DIFF@
+DISTCHECK_CONFIGURE_FLAGS = @DISTCHECK_CONFIGURE_FLAGS@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FLAG_FNO_STACK_PROTECTOR = @FLAG_FNO_STACK_PROTECTOR@
+FLAG_M32 = @FLAG_M32@
+FLAG_M64 = @FLAG_M64@
+FLAG_MAIX32 = @FLAG_MAIX32@
+FLAG_MAIX64 = @FLAG_MAIX64@
+FLAG_MMMX = @FLAG_MMMX@
+FLAG_MSSE = @FLAG_MSSE@
+FLAG_UNLIMITED_INLINE_UNIT_GROWTH = @FLAG_UNLIMITED_INLINE_UNIT_GROWTH@
+FLAG_WDECL_AFTER_STMT = @FLAG_WDECL_AFTER_STMT@
+FLAG_W_EXTRA = @FLAG_W_EXTRA@
+FLAG_W_NO_FORMAT_ZERO_LENGTH = @FLAG_W_NO_FORMAT_ZERO_LENGTH@
+GDB = @GDB@
+GLIBC_VERSION = @GLIBC_VERSION@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+MPI_CC = @MPI_CC@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PERL = @PERL@
+PKG_CONFIG = @PKG_CONFIG@
+PREFERRED_STACK_BOUNDARY = @PREFERRED_STACK_BOUNDARY@
+QTCORE_CFLAGS = @QTCORE_CFLAGS@
+QTCORE_LIBS = @QTCORE_LIBS@
+RANLIB = @RANLIB@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VALT_LOAD_ADDRESS = @VALT_LOAD_ADDRESS@
+VERSION = @VERSION@
+VEX_DIR = @VEX_DIR@
+VGCONF_ARCH_PRI = @VGCONF_ARCH_PRI@
+VGCONF_OS = @VGCONF_OS@
+VGCONF_PLATFORM_PRI_CAPS = @VGCONF_PLATFORM_PRI_CAPS@
+VGCONF_PLATFORM_SEC_CAPS = @VGCONF_PLATFORM_SEC_CAPS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+EXTRA_DIST = cg-manual.xml
+all: all-am
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
+		&& exit 0; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign  cachegrind/docs/Makefile'; \
+	cd $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign  cachegrind/docs/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+tags: TAGS
+TAGS:
+
+ctags: CTAGS
+CTAGS:
+
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+	    fi; \
+	    cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+	  else \
+	    test -f $(distdir)/$$file \
+	    || cp -p $$d/$$file $(distdir)/$$file \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	  install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	  `test -z '$(STRIP)' || \
+	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic mostlyclean-am
+
+distclean: distclean-am
+	-rm -f Makefile
+distclean-am: clean-am distclean-generic
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-info: install-info-am
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-ps: install-ps-am
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: install-am install-strip
+
+.PHONY: all all-am check check-am clean clean-generic distclean \
+	distclean-generic distdir dvi dvi-am html html-am info info-am \
+	install install-am install-data install-data-am install-dvi \
+	install-dvi-am install-exec install-exec-am install-html \
+	install-html-am install-info install-info-am install-man \
+	install-pdf install-pdf-am install-ps install-ps-am \
+	install-strip installcheck installcheck-am installdirs \
+	maintainer-clean maintainer-clean-generic mostlyclean \
+	mostlyclean-generic pdf pdf-am ps ps-am uninstall uninstall-am
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/cachegrind/docs/cg-manual.xml b/cachegrind/docs/cg-manual.xml
new file mode 100644
index 0000000..512eeb4
--- /dev/null
+++ b/cachegrind/docs/cg-manual.xml
@@ -0,0 +1,1349 @@
+<?xml version="1.0"?> <!-- -*- sgml -*- -->
+<!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+  "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd"
+[ <!ENTITY % vg-entities SYSTEM "../../docs/xml/vg-entities.xml"> %vg-entities; ]>
+
+
+<chapter id="cg-manual" xreflabel="Cachegrind: a cache-miss profiler">
+<title>Cachegrind: a cache and branch profiler</title>
+
+<sect1 id="cg-manual.cache" xreflabel="Cache profiling">
+<title>Cache and branch profiling</title>
+
+<para>To use this tool, you must specify
+<computeroutput>--tool=cachegrind</computeroutput> on the
+Valgrind command line.</para>
+
+<para>Cachegrind is a tool for finding places where programs
+interact badly with typical modern superscalar processors
+and run slowly as a result.
+In particular, it will do a cache simulation of your program,
+and optionally a branch-predictor simulation, and can
+then annotate your source line-by-line with the number of cache
+misses and branch mispredictions.  The following statistics are 
+collected:</para>
+<itemizedlist>
+  <listitem>
+    <para>L1 instruction cache reads and misses;</para>
+  </listitem>
+  <listitem>
+    <para>L1 data cache reads and read misses, writes and write
+    misses;</para>
+  </listitem>
+  <listitem>
+    <para>L2 unified cache reads and read misses, writes and
+    writes misses.</para>
+  </listitem>
+  <listitem>
+    <para>Conditional branches and mispredicted conditional branches.</para>
+  </listitem>
+  <listitem>
+    <para>Indirect branches and mispredicted indirect branches.  An
+    indirect branch is a jump or call to a destination only known at
+    run time.</para>
+  </listitem>
+</itemizedlist>
+
+<para>On a modern machine, an L1 miss will typically cost
+around 10 cycles, an L2 miss can cost as much as 200
+cycles, and a mispredicted branch costs in the region of 10
+to 30 cycles.  Detailed cache and branch profiling can be very useful
+for improving the performance of your program.</para>
+
+<para>Also, since one instruction cache read is performed per
+instruction executed, you can find out how many instructions are
+executed per line, which can be useful for traditional profiling
+and test coverage.</para>
+
+<para>Branch profiling is not enabled by default.  To use it, you must
+additionally specify <computeroutput>--branch-sim=yes</computeroutput>
+on the command line.</para>
+
+
+<sect2 id="cg-manual.overview" xreflabel="Overview">
+<title>Overview</title>
+
+<para>First off, as for normal Valgrind use, you probably want to
+compile with debugging info (the
+<computeroutput>-g</computeroutput> flag).  But by contrast with
+normal Valgrind use, you probably <command>do</command> want to turn
+optimisation on, since you should profile your program as it will
+be normally run.</para>
+
+<para>The two steps are:</para>
+<orderedlist>
+  <listitem>
+    <para>Run your program with <computeroutput>valgrind
+    --tool=cachegrind</computeroutput> in front of the normal
+    command line invocation.  When the program finishes,
+    Cachegrind will print summary cache statistics. It also
+    collects line-by-line information in a file
+    <computeroutput>cachegrind.out.&lt;pid&gt;</computeroutput>, where
+    <computeroutput>&lt;pid&gt;</computeroutput> is the program's process
+    ID.</para>
+
+    <para>Branch prediction statistics are not collected by default.
+    To do so, add the flag
+    <computeroutput>--branch-sim=yes</computeroutput>.
+    </para>
+
+    <para>This step should be done every time you want to collect
+    information about a new program, a changed program, or about
+    the same program with different input.</para>
+  </listitem>
+
+  <listitem>
+    <para>Generate a function-by-function summary, and possibly
+    annotate source files, using the supplied
+    cg_annotate program. Source
+    files to annotate can be specified manually, or manually on
+    the command line, or "interesting" source files can be
+    annotated automatically with the
+    <computeroutput>--auto=yes</computeroutput> option.  You can
+    annotate C/C++ files or assembly language files equally
+    easily.</para>
+
+    <para>This step can be performed as many times as you like
+    for each Step 2.  You may want to do multiple annotations
+    showing different information each time.</para>
+  </listitem>
+
+</orderedlist>
+
+<para>As an optional intermediate step, you can use the supplied
+cg_merge program to sum together the
+outputs of multiple Cachegrind runs, into a single file which you then
+use as the input for cg_annotate.</para>
+
+<para>These steps are described in detail in the following
+sections.</para>
+
+</sect2>
+
+
+<sect2 id="cache-sim" xreflabel="Cache simulation specifics">
+<title>Cache simulation specifics</title>
+
+<para>Cachegrind simulates a machine with independent
+first level instruction and data caches (I1 and D1), backed by a
+unified second level cache (L2).  This configuration is used by almost
+all modern machines.  Some old Cyrix CPUs had a unified I and D L1
+cache, but they are ancient history now.</para>
+
+<para>Specific characteristics of the simulation are as
+follows:</para>
+
+<itemizedlist>
+
+  <listitem>
+    <para>Write-allocate: when a write miss occurs, the block
+    written to is brought into the D1 cache.  Most modern caches
+    have this property.</para>
+  </listitem>
+
+  <listitem>
+    <para>Bit-selection hash function: the set of line(s) in the cache
+    to which a memory block maps is chosen by the middle bits
+    M--(M+N-1) of the byte address, where:</para>
+    <itemizedlist>
+      <listitem>
+        <para>line size = 2^M bytes</para>
+      </listitem>
+      <listitem>
+        <para>(cache size / line size / associativity) = 2^N bytes</para>
+      </listitem>
+    </itemizedlist> 
+  </listitem>
+
+  <listitem>
+    <para>Inclusive L2 cache: the L2 cache typically replicates all
+    the entries of the L1 caches, because fetching into L1 involves
+    fetching into L2 first (this does not guarantee strict inclusiveness,
+    as lines evicted from L2 still could reside in L1).  This is
+    standard on Pentium chips, but AMD Opterons, Athlons and Durons
+    use an exclusive L2 cache that only holds
+    blocks evicted from L1.  Ditto most modern VIA CPUs.</para>
+  </listitem>
+
+</itemizedlist>
+
+<para>The cache configuration simulated (cache size,
+associativity and line size) is determined automagically using
+the CPUID instruction.  If you have an old machine that (a)
+doesn't support the CPUID instruction, or (b) supports it in an
+early incarnation that doesn't give any cache information, then
+Cachegrind will fall back to using a default configuration (that
+of a model 3/4 Athlon).  Cachegrind will tell you if this
+happens.  You can manually specify one, two or all three levels
+(I1/D1/L2) of the cache from the command line using the
+<computeroutput>--I1</computeroutput>,
+<computeroutput>--D1</computeroutput> and
+<computeroutput>--L2</computeroutput> options.
+For cache parameters to be valid for simulation, the number
+of sets (with associativity being the number of cache lines in
+each set) has to be a power of two.</para>
+
+<para>On PowerPC platforms
+Cachegrind cannot automatically 
+determine the cache configuration, so you will 
+need to specify it with the
+<computeroutput>--I1</computeroutput>,
+<computeroutput>--D1</computeroutput> and
+<computeroutput>--L2</computeroutput> options.</para>
+
+
+<para>Other noteworthy behaviour:</para>
+
+<itemizedlist>
+  <listitem>
+    <para>References that straddle two cache lines are treated as
+    follows:</para>
+    <itemizedlist>
+      <listitem>
+        <para>If both blocks hit --&gt; counted as one hit</para>
+      </listitem>
+      <listitem>
+        <para>If one block hits, the other misses --&gt; counted
+        as one miss.</para>
+      </listitem>
+      <listitem>
+        <para>If both blocks miss --&gt; counted as one miss (not
+        two)</para>
+      </listitem>
+    </itemizedlist>
+  </listitem>
+
+  <listitem>
+    <para>Instructions that modify a memory location
+    (eg. <computeroutput>inc</computeroutput> and
+    <computeroutput>dec</computeroutput>) are counted as doing
+    just a read, ie. a single data reference.  This may seem
+    strange, but since the write can never cause a miss (the read
+    guarantees the block is in the cache) it's not very
+    interesting.</para>
+
+    <para>Thus it measures not the number of times the data cache
+    is accessed, but the number of times a data cache miss could
+    occur.</para>
+  </listitem>
+
+</itemizedlist>
+
+<para>If you are interested in simulating a cache with different
+properties, it is not particularly hard to write your own cache
+simulator, or to modify the existing ones in
+<computeroutput>cg_sim.c</computeroutput>. We'd be
+interested to hear from anyone who does.</para>
+
+</sect2>
+
+
+<sect2 id="branch-sim" xreflabel="Branch simulation specifics">
+<title>Branch simulation specifics</title>
+
+<para>Cachegrind simulates branch predictors intended to be
+typical of mainstream desktop/server processors of around 2004.</para>
+
+<para>Conditional branches are predicted using an array of 16384 2-bit
+saturating counters.  The array index used for a branch instruction is
+computed partly from the low-order bits of the branch instruction's
+address and partly using the taken/not-taken behaviour of the last few
+conditional branches.  As a result the predictions for any specific
+branch depend both on its own history and the behaviour of previous
+branches.  This is a standard technique for improving prediction
+accuracy.</para>
+
+<para>For indirect branches (that is, jumps to unknown destinations)
+Cachegrind uses a simple branch target address predictor.  Targets are
+predicted using an array of 512 entries indexed by the low order 9
+bits of the branch instruction's address.  Each branch is predicted to
+jump to the same address it did last time.  Any other behaviour causes
+a mispredict.</para>
+
+<para>More recent processors have better branch predictors, in
+particular better indirect branch predictors.  Cachegrind's predictor
+design is deliberately conservative so as to be representative of the
+large installed base of processors which pre-date widespread
+deployment of more sophisticated indirect branch predictors.  In
+particular, late model Pentium 4s (Prescott), Pentium M, Core and Core
+2 have more sophisticated indirect branch predictors than modelled by
+Cachegrind.  </para>
+
+<para>Cachegrind does not simulate a return stack predictor.  It
+assumes that processors perfectly predict function return addresses,
+an assumption which is probably close to being true.</para>
+
+<para>See Hennessy and Patterson's classic text "Computer
+Architecture: A Quantitative Approach", 4th edition (2007), Section
+2.3 (pages 80-89) for background on modern branch predictors.</para>
+
+</sect2>
+
+
+</sect1>
+
+
+
+<sect1 id="cg-manual.profile" xreflabel="Profiling programs">
+<title>Profiling programs</title>
+
+<para>To gather cache profiling information about the program
+<computeroutput>ls -l</computeroutput>, invoke Cachegrind like
+this:</para>
+
+<programlisting><![CDATA[
+valgrind --tool=cachegrind ls -l]]></programlisting>
+
+<para>The program will execute (slowly).  Upon completion,
+summary statistics that look like this will be printed:</para>
+
+<programlisting><![CDATA[
+==31751== I   refs:      27,742,716
+==31751== I1  misses:           276
+==31751== L2  misses:           275
+==31751== I1  miss rate:        0.0%
+==31751== L2i miss rate:        0.0%
+==31751== 
+==31751== D   refs:      15,430,290  (10,955,517 rd + 4,474,773 wr)
+==31751== D1  misses:        41,185  (    21,905 rd +    19,280 wr)
+==31751== L2  misses:        23,085  (     3,987 rd +    19,098 wr)
+==31751== D1  miss rate:        0.2% (       0.1%   +       0.4%)
+==31751== L2d miss rate:        0.1% (       0.0%   +       0.4%)
+==31751== 
+==31751== L2 misses:         23,360  (     4,262 rd +    19,098 wr)
+==31751== L2 miss rate:         0.0% (       0.0%   +       0.4%)]]></programlisting>
+
+<para>Cache accesses for instruction fetches are summarised
+first, giving the number of fetches made (this is the number of
+instructions executed, which can be useful to know in its own
+right), the number of I1 misses, and the number of L2 instruction
+(<computeroutput>L2i</computeroutput>) misses.</para>
+
+<para>Cache accesses for data follow. The information is similar
+to that of the instruction fetches, except that the values are
+also shown split between reads and writes (note each row's
+<computeroutput>rd</computeroutput> and
+<computeroutput>wr</computeroutput> values add up to the row's
+total).</para>
+
+<para>Combined instruction and data figures for the L2 cache
+follow that.</para>
+
+
+
+<sect2 id="cg-manual.outputfile" xreflabel="Output file">
+<title>Output file</title>
+
+<para>As well as printing summary information, Cachegrind also
+writes line-by-line cache profiling information to a user-specified
+file.  By default this file is named
+<computeroutput>cachegrind.out.&lt;pid&gt;</computeroutput>.  This file
+is human-readable, but is intended to be interpreted by the accompanying
+program cg_annotate, described in the next section.</para>
+
+<para>Things to note about the
+<computeroutput>cachegrind.out.&lt;pid&gt;</computeroutput>
+file:</para>
+
+<itemizedlist>
+  <listitem>
+    <para>It is written every time Cachegrind is run, and will
+    overwrite any existing
+    <computeroutput>cachegrind.out.&lt;pid&gt;</computeroutput>
+    in the current directory (but that won't happen very often
+    because it takes some time for process ids to be
+    recycled).</para>
+  </listitem>
+  <listitem>
+    <para>To use an output file name other than the default
+    <computeroutput>cachegrind.out</computeroutput>,
+    use the <computeroutput>--cachegrind-out-file</computeroutput>
+    switch.</para>
+  </listitem>
+  <listitem>
+    <para>It can be big: <computeroutput>ls -l</computeroutput>
+    generates a file of about 350KB.  Browsing a few files and
+    web pages with a Konqueror built with full debugging
+    information generates a file of around 15 MB.</para>
+  </listitem>
+</itemizedlist>
+
+<para>The default <computeroutput>.&lt;pid&gt;</computeroutput> suffix
+on the output file name serves two purposes.  Firstly, it means you 
+don't have to rename old log files that you don't want to overwrite.  
+Secondly, and more importantly, it allows correct profiling with the
+<computeroutput>--trace-children=yes</computeroutput> option of
+programs that spawn child processes.</para>
+
+</sect2>
+
+
+
+<sect2 id="cg-manual.cgopts" xreflabel="Cachegrind options">
+<title>Cachegrind options</title>
+
+<!-- start of xi:include in the manpage -->
+<para id="cg.opts.para">Using command line options, you can 
+manually specify the I1/D1/L2 cache
+configuration to simulate.  For each cache, you can specify the
+size, associativity and line size.  The size and line size
+are measured in bytes.  The three items
+must be comma-separated, but with no spaces, eg:
+<literallayout>    valgrind --tool=cachegrind --I1=65535,2,64</literallayout>
+
+You can specify one, two or three of the I1/D1/L2 caches.  Any level not
+manually specified will be simulated using the configuration found in
+the normal way (via the CPUID instruction for automagic cache
+configuration, or failing that, via defaults).</para>
+
+<para>Cache-simulation specific options are:</para>
+
+<variablelist id="cg.opts.list">
+
+  <varlistentry id="opt.I1" xreflabel="--I1">
+    <term>
+      <option><![CDATA[--I1=<size>,<associativity>,<line size> ]]></option>
+    </term>
+    <listitem>
+      <para>Specify the size, associativity and line size of the level 1
+      instruction cache.  </para>
+    </listitem>
+  </varlistentry>
+
+  <varlistentry id="opt.D1" xreflabel="--D1">
+    <term>
+      <option><![CDATA[--D1=<size>,<associativity>,<line size> ]]></option>
+    </term>
+    <listitem>
+      <para>Specify the size, associativity and line size of the level 1
+      data cache.</para>
+    </listitem>
+  </varlistentry>
+
+  <varlistentry id="opt.L2" xreflabel="--L2">
+    <term>
+      <option><![CDATA[--L2=<size>,<associativity>,<line size> ]]></option>
+    </term>
+    <listitem>
+      <para>Specify the size, associativity and line size of the level 2
+      cache.</para>
+    </listitem>
+  </varlistentry>
+
+  <varlistentry id="opt.cachegrind-out-file" xreflabel="--cachegrind-out-file">
+    <term>
+      <option><![CDATA[--cachegrind-out-file=<file> ]]></option>
+    </term>
+    <listitem>
+      <para>Write the profile data to 
+            <computeroutput>file</computeroutput> rather than to the default
+            output file,
+            <computeroutput>cachegrind.out.&lt;pid&gt;</computeroutput>.  The
+            <option>%p</option> and <option>%q</option> format specifiers
+            can be used to embed the process ID and/or the contents of an
+            environment variable in the name, as is the case for the core
+            option <option>--log-file</option>.  See <link
+            linkend="manual-core.basicopts">here</link> for details.
+      </para>
+    </listitem>
+  </varlistentry>
+
+  <varlistentry id="opt.cache-sim" xreflabel="--cache-sim">
+    <term>
+      <option><![CDATA[--cache-sim=no|yes [yes] ]]></option>
+    </term>
+    <listitem>
+      <para>Enables or disables collection of cache access and miss
+            counts.</para>
+    </listitem>
+  </varlistentry>
+
+  <varlistentry id="opt.branch-sim" xreflabel="--branch-sim">
+    <term>
+      <option><![CDATA[--branch-sim=no|yes [no] ]]></option>
+    </term>
+    <listitem>
+      <para>Enables or disables collection of branch instruction and
+            misprediction counts.  By default this is disabled as it
+            slows Cachegrind down by approximately 25%.  Note that you
+            cannot specify <computeroutput>--cache-sim=no</computeroutput>
+            and <computeroutput>--branch-sim=no</computeroutput>
+            together, as that would leave Cachegrind with no
+            information to collect.</para>
+    </listitem>
+  </varlistentry>
+
+</variablelist>
+<!-- end of xi:include in the manpage -->
+
+</sect2>
+
+
+  
+<sect2 id="cg-manual.annotate" xreflabel="Annotating C/C++ programs">
+<title>Annotating C/C++ programs</title>
+
+<para>Before using cg_annotate,
+it is worth widening your window to be at least 120-characters
+wide if possible, as the output lines can be quite long.</para>
+
+<para>To get a function-by-function summary, run <computeroutput>cg_annotate
+&lt;filename&gt;</computeroutput> on a Cachegrind output file.</para>
+
+<para>The output looks like this:</para>
+
+<programlisting><![CDATA[
+--------------------------------------------------------------------------------
+I1 cache:              65536 B, 64 B, 2-way associative
+D1 cache:              65536 B, 64 B, 2-way associative
+L2 cache:              262144 B, 64 B, 8-way associative
+Command:               concord vg_to_ucode.c
+Events recorded:       Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw
+Events shown:          Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw
+Event sort order:      Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw
+Threshold:             99%
+Chosen for annotation:
+Auto-annotation:       on
+
+--------------------------------------------------------------------------------
+Ir         I1mr I2mr Dr         D1mr   D2mr  Dw        D1mw   D2mw
+--------------------------------------------------------------------------------
+27,742,716  276  275 10,955,517 21,905 3,987 4,474,773 19,280 19,098  PROGRAM TOTALS
+
+--------------------------------------------------------------------------------
+Ir        I1mr I2mr Dr        D1mr  D2mr  Dw        D1mw   D2mw    file:function
+--------------------------------------------------------------------------------
+8,821,482    5    5 2,242,702 1,621    73 1,794,230      0      0  getc.c:_IO_getc
+5,222,023    4    4 2,276,334    16    12   875,959      1      1  concord.c:get_word
+2,649,248    2    2 1,344,810 7,326 1,385         .      .      .  vg_main.c:strcmp
+2,521,927    2    2   591,215     0     0   179,398      0      0  concord.c:hash
+2,242,740    2    2 1,046,612   568    22   448,548      0      0  ctype.c:tolower
+1,496,937    4    4   630,874 9,000 1,400   279,388      0      0  concord.c:insert
+  897,991   51   51   897,831    95    30        62      1      1  ???:???
+  598,068    1    1   299,034     0     0   149,517      0      0  ../sysdeps/generic/lockfile.c:__flockfile
+  598,068    0    0   299,034     0     0   149,517      0      0  ../sysdeps/generic/lockfile.c:__funlockfile
+  598,024    4    4   213,580    35    16   149,506      0      0  vg_clientmalloc.c:malloc
+  446,587    1    1   215,973 2,167   430   129,948 14,057 13,957  concord.c:add_existing
+  341,760    2    2   128,160     0     0   128,160      0      0  vg_clientmalloc.c:vg_trap_here_WRAPPER
+  320,782    4    4   150,711   276     0    56,027     53     53  concord.c:init_hash_table
+  298,998    1    1   106,785     0     0    64,071      1      1  concord.c:create
+  149,518    0    0   149,516     0     0         1      0      0  ???:tolower@@GLIBC_2.0
+  149,518    0    0   149,516     0     0         1      0      0  ???:fgetc@@GLIBC_2.0
+   95,983    4    4    38,031     0     0    34,409  3,152  3,150  concord.c:new_word_node
+   85,440    0    0    42,720     0     0    21,360      0      0  vg_clientmalloc.c:vg_bogus_epilogue]]></programlisting>
+
+
+<para>First up is a summary of the annotation options:</para>
+                    
+<itemizedlist>
+
+  <listitem>
+    <para>I1 cache, D1 cache, L2 cache: cache configuration.  So
+    you know the configuration with which these results were
+    obtained.</para>
+  </listitem>
+
+  <listitem>
+    <para>Command: the command line invocation of the program
+      under examination.</para>
+  </listitem>
+
+  <listitem>
+   <para>Events recorded: event abbreviations are:</para>
+   <itemizedlist>
+     <listitem>
+       <para><computeroutput>Ir</computeroutput>: I cache reads
+       (ie. instructions executed)</para>
+     </listitem>
+     <listitem>
+       <para><computeroutput>I1mr</computeroutput>: I1 cache read
+       misses</para>
+     </listitem>
+     <listitem>
+       <para><computeroutput>I2mr</computeroutput>: L2 cache
+       instruction read misses</para>
+     </listitem>
+     <listitem>
+       <para><computeroutput>Dr</computeroutput>: D cache reads
+       (ie. memory reads)</para>
+     </listitem>
+     <listitem>
+       <para><computeroutput>D1mr</computeroutput>: D1 cache read
+       misses</para>
+     </listitem>
+     <listitem>
+       <para><computeroutput>D2mr</computeroutput>: L2 cache data
+       read misses</para>
+     </listitem>
+     <listitem>
+       <para><computeroutput>Dw</computeroutput>: D cache writes
+       (ie. memory writes)</para>
+     </listitem>
+     <listitem>
+       <para><computeroutput>D1mw</computeroutput>: D1 cache write
+       misses</para>
+     </listitem>
+     <listitem>
+       <para><computeroutput>D2mw</computeroutput>: L2 cache data
+       write misses</para>
+     </listitem>
+     <listitem>
+       <para><computeroutput>Bc</computeroutput>: Conditional branches
+       executed</para>
+     </listitem>
+     <listitem>
+       <para><computeroutput>Bcm</computeroutput>: Conditional branches
+       mispredicted</para>
+     </listitem>
+     <listitem>
+       <para><computeroutput>Bi</computeroutput>: Indirect branches
+       executed</para>
+     </listitem>
+     <listitem>
+       <para><computeroutput>Bim</computeroutput>: Conditional branches
+       mispredicted</para>
+     </listitem>
+   </itemizedlist>
+
+   <para>Note that D1 total accesses is given by
+   <computeroutput>D1mr</computeroutput> +
+   <computeroutput>D1mw</computeroutput>, and that L2 total
+   accesses is given by <computeroutput>I2mr</computeroutput> +
+   <computeroutput>D2mr</computeroutput> +
+   <computeroutput>D2mw</computeroutput>.</para>
+ </listitem>
+
+ <listitem>
+   <para>Events shown: the events shown, which is a subset of the events
+   gathered.  This can be adjusted with the
+   <computeroutput>--show</computeroutput> option.</para>
+  </listitem>
+
+  <listitem>
+    <para>Event sort order: the sort order in which functions are
+    shown.  For example, in this case the functions are sorted
+    from highest <computeroutput>Ir</computeroutput> counts to
+    lowest.  If two functions have identical
+    <computeroutput>Ir</computeroutput> counts, they will then be
+    sorted by <computeroutput>I1mr</computeroutput> counts, and
+    so on.  This order can be adjusted with the
+    <computeroutput>--sort</computeroutput> option.</para>
+
+    <para>Note that this dictates the order the functions appear.
+    It is <command>not</command> the order in which the columns
+    appear; that is dictated by the "events shown" line (and can
+    be changed with the <computeroutput>--show</computeroutput>
+    option).</para>
+  </listitem>
+
+  <listitem>
+    <para>Threshold: cg_annotate
+    by default omits functions that cause very low counts
+    to avoid drowning you in information.  In this case,
+    cg_annotate shows summaries the functions that account for
+    99% of the <computeroutput>Ir</computeroutput> counts;
+    <computeroutput>Ir</computeroutput> is chosen as the
+    threshold event since it is the primary sort event.  The
+    threshold can be adjusted with the
+    <computeroutput>--threshold</computeroutput>
+    option.</para>
+  </listitem>
+
+  <listitem>
+    <para>Chosen for annotation: names of files specified
+    manually for annotation; in this case none.</para>
+  </listitem>
+
+  <listitem>
+    <para>Auto-annotation: whether auto-annotation was requested
+    via the <computeroutput>--auto=yes</computeroutput>
+    option. In this case no.</para>
+  </listitem>
+
+</itemizedlist>
+
+<para>Then follows summary statistics for the whole
+program. These are similar to the summary provided when running
+<computeroutput>valgrind --tool=cachegrind</computeroutput>.</para>
+  
+<para>Then follows function-by-function statistics. Each function
+is identified by a
+<computeroutput>file_name:function_name</computeroutput> pair. If
+a column contains only a dot it means the function never performs
+that event (eg. the third row shows that
+<computeroutput>strcmp()</computeroutput> contains no
+instructions that write to memory). The name
+<computeroutput>???</computeroutput> is used if the the file name
+and/or function name could not be determined from debugging
+information. If most of the entries have the form
+<computeroutput>???:???</computeroutput> the program probably
+wasn't compiled with <computeroutput>-g</computeroutput>.  If any
+code was invalidated (either due to self-modifying code or
+unloading of shared objects) its counts are aggregated into a
+single cost centre written as
+<computeroutput>(discarded):(discarded)</computeroutput>.</para>
+
+<para>It is worth noting that functions will come both from
+the profiled program (eg. <filename>concord.c</filename>)
+and from libraries (eg. <filename>getc.c</filename>)</para>
+
+<para>There are two ways to annotate source files -- by choosing
+them manually, or with the
+<computeroutput>--auto=yes</computeroutput> option. To do it
+manually, just specify the filenames as additional arguments to
+cg_annotate. For example, the
+output from running <filename>cg_annotate &lt;filename&gt;
+concord.c</filename> for our example produces the same output as above
+followed by an annotated version of <filename>concord.c</filename>, a
+section of which looks like:</para>
+
+<programlisting><![CDATA[
+--------------------------------------------------------------------------------
+-- User-annotated source: concord.c
+--------------------------------------------------------------------------------
+Ir        I1mr I2mr Dr      D1mr  D2mr  Dw      D1mw   D2mw
+
+[snip]
+
+        .    .    .       .     .     .       .      .      .  void init_hash_table(char *file_name, Word_Node *table[])
+        3    1    1       .     .     .       1      0      0  {
+        .    .    .       .     .     .       .      .      .      FILE *file_ptr;
+        .    .    .       .     .     .       .      .      .      Word_Info *data;
+        1    0    0       .     .     .       1      1      1      int line = 1, i;
+        .    .    .       .     .     .       .      .      .
+        5    0    0       .     .     .       3      0      0      data = (Word_Info *) create(sizeof(Word_Info));
+        .    .    .       .     .     .       .      .      .
+    4,991    0    0   1,995     0     0     998      0      0      for (i = 0; i < TABLE_SIZE; i++)
+    3,988    1    1   1,994     0     0     997     53     52          table[i] = NULL;
+        .    .    .       .     .     .       .      .      .
+        .    .    .       .     .     .       .      .      .      /* Open file, check it. */
+        6    0    0       1     0     0       4      0      0      file_ptr = fopen(file_name, "r");
+        2    0    0       1     0     0       .      .      .      if (!(file_ptr)) {
+        .    .    .       .     .     .       .      .      .          fprintf(stderr, "Couldn't open '%s'.\n", file_name);
+        1    1    1       .     .     .       .      .      .          exit(EXIT_FAILURE);
+        .    .    .       .     .     .       .      .      .      }
+        .    .    .       .     .     .       .      .      .
+  165,062    1    1  73,360     0     0  91,700      0      0      while ((line = get_word(data, line, file_ptr)) != EOF)
+  146,712    0    0  73,356     0     0  73,356      0      0          insert(data->;word, data->line, table);
+        .    .    .       .     .     .       .      .      .
+        4    0    0       1     0     0       2      0      0      free(data);
+        4    0    0       1     0     0       2      0      0      fclose(file_ptr);
+        3    0    0       2     0     0       .      .      .  }]]></programlisting>
+
+<para>(Although column widths are automatically minimised, a wide
+terminal is clearly useful.)</para>
+  
+<para>Each source file is clearly marked
+(<computeroutput>User-annotated source</computeroutput>) as
+having been chosen manually for annotation.  If the file was
+found in one of the directories specified with the
+<computeroutput>-I / --include</computeroutput> option, the directory
+and file are both given.</para>
+
+<para>Each line is annotated with its event counts.  Events not
+applicable for a line are represented by a dot.  This is useful
+for distinguishing between an event which cannot happen, and one
+which can but did not.</para>
+
+<para>Sometimes only a small section of a source file is
+executed.  To minimise uninteresting output, Cachegrind only shows
+annotated lines and lines within a small distance of annotated
+lines.  Gaps are marked with the line numbers so you know which
+part of a file the shown code comes from, eg:</para>
+
+<programlisting><![CDATA[
+(figures and code for line 704)
+-- line 704 ----------------------------------------
+-- line 878 ----------------------------------------
+(figures and code for line 878)]]></programlisting>
+
+<para>The amount of context to show around annotated lines is
+controlled by the <computeroutput>--context</computeroutput>
+option.</para>
+
+<para>To get automatic annotation, run
+<computeroutput>cg_annotate &lt;filename&gt; --auto=yes</computeroutput>.
+cg_annotate will automatically annotate every source file it can
+find that is mentioned in the function-by-function summary.
+Therefore, the files chosen for auto-annotation are affected by
+the <computeroutput>--sort</computeroutput> and
+<computeroutput>--threshold</computeroutput> options.  Each
+source file is clearly marked (<computeroutput>Auto-annotated
+source</computeroutput>) as being chosen automatically.  Any
+files that could not be found are mentioned at the end of the
+output, eg:</para>
+
+<programlisting><![CDATA[
+------------------------------------------------------------------
+The following files chosen for auto-annotation could not be found:
+------------------------------------------------------------------
+  getc.c
+  ctype.c
+  ../sysdeps/generic/lockfile.c]]></programlisting>
+
+<para>This is quite common for library files, since libraries are
+usually compiled with debugging information, but the source files
+are often not present on a system.  If a file is chosen for
+annotation <command>both</command> manually and automatically, it
+is marked as <computeroutput>User-annotated
+source</computeroutput>. Use the <computeroutput>-I /
+--include</computeroutput> option to tell Valgrind where to look
+for source files if the filenames found from the debugging
+information aren't specific enough.</para>
+
+<para>Beware that cg_annotate can take some time to digest large
+<computeroutput>cachegrind.out.&lt;pid&gt;</computeroutput> files,
+e.g. 30 seconds or more.  Also beware that auto-annotation can
+produce a lot of output if your program is large!</para>
+
+</sect2>
+
+
+<sect2 id="cg-manual.assembler" xreflabel="Annotating assembler programs">
+<title>Annotating assembly code programs</title>
+
+<para>Valgrind can annotate assembly code programs too, or annotate
+the assembly code generated for your C program.  Sometimes this is
+useful for understanding what is really happening when an
+interesting line of C code is translated into multiple
+instructions.</para>
+
+<para>To do this, you just need to assemble your
+<computeroutput>.s</computeroutput> files with assembly-level debug
+information.  You can use <computeroutput>gcc
+-S</computeroutput> to compile C/C++ programs to assembly code, and then
+<computeroutput>gcc -g</computeroutput> on the assembly code files to
+achieve this.  You can then profile and annotate the assembly code source
+files in the same way as C/C++ source files.</para>
+
+</sect2>
+
+<sect2 id="ms-manual.forkingprograms" xreflabel="Forking Programs">
+<title>Forking Programs</title>
+<para>If your program forks, the child will inherit all the profiling data that
+has been gathered for the parent.</para>
+
+<para>If the output file format string (controlled by
+<option>--cachegrind-out-file</option>) does not contain <option>%p</option>,
+then the outputs from the parent and child will be intermingled in a single
+output file, which will almost certainly make it unreadable by
+cg_annotate.</para>
+</sect2>
+
+
+</sect1>
+
+
+<sect1 id="cg-manual.annopts" xreflabel="cg_annotate options">
+<title>cg_annotate options</title>
+
+<itemizedlist>
+
+  <listitem>
+    <para><computeroutput>-h, --help</computeroutput></para>
+    <para><computeroutput>-v, --version</computeroutput></para>
+    <para>Help and version, as usual.</para>
+  </listitem>
+
+  <listitem id="sort">
+    <para><computeroutput>--sort=A,B,C</computeroutput> [default:
+    order in
+    <computeroutput>cachegrind.out.&lt;pid&gt;</computeroutput>]</para>
+    <para>Specifies the events upon which the sorting of the
+    function-by-function entries will be based.  Useful if you
+    want to concentrate on eg. I cache misses
+    (<computeroutput>--sort=I1mr,I2mr</computeroutput>), or D
+    cache misses
+    (<computeroutput>--sort=D1mr,D2mr</computeroutput>), or L2
+    misses
+    (<computeroutput>--sort=D2mr,I2mr</computeroutput>).</para>
+  </listitem>
+
+  <listitem id="show">
+    <para><computeroutput>--show=A,B,C</computeroutput> [default:
+    all, using order in
+    <computeroutput>cachegrind.out.&lt;pid&gt;</computeroutput>]</para>
+    <para>Specifies which events to show (and the column
+    order). Default is to use all present in the
+    <computeroutput>cachegrind.out.&lt;pid&gt;</computeroutput> file (and
+    use the order in the file).</para>
+  </listitem>
+
+  <listitem id="threshold">
+    <para><computeroutput>--threshold=X</computeroutput>
+    [default: 99%]</para>
+    <para>Sets the threshold for the function-by-function
+    summary.  Functions are shown that account for more than X%
+    of the primary sort event.  If auto-annotating, also affects
+    which files are annotated.</para>
+      
+    <para>Note: thresholds can be set for more than one of the
+    events by appending any events for the
+    <computeroutput>--sort</computeroutput> option with a colon
+    and a number (no spaces, though).  E.g. if you want to see
+    the functions that cover 99% of L2 read misses and 99% of L2
+    write misses, use this option:</para>
+    <para><computeroutput>--sort=D2mr:99,D2mw:99</computeroutput></para>
+  </listitem>
+
+  <listitem id="auto">
+    <para><computeroutput>--auto=no</computeroutput> [default]</para>
+    <para><computeroutput>--auto=yes</computeroutput></para>
+    <para>When enabled, automatically annotates every file that
+    is mentioned in the function-by-function summary that can be
+    found.  Also gives a list of those that couldn't be found.</para>
+  </listitem>
+
+  <listitem id="context">
+    <para><computeroutput>--context=N</computeroutput> [default:
+    8]</para>
+    <para>Print N lines of context before and after each
+    annotated line.  Avoids printing large sections of source
+    files that were not executed.  Use a large number
+    (eg. 10,000) to show all source lines.</para>
+  </listitem>
+
+  <listitem id="include">
+    <para><computeroutput>-I&lt;dir&gt;,
+      --include=&lt;dir&gt;</computeroutput> [default: empty
+      string]</para>
+    <para>Adds a directory to the list in which to search for
+    files.  Multiple -I/--include options can be given to add
+    multiple directories.</para>
+  </listitem>
+
+</itemizedlist>
+  
+
+
+<sect2 id="cg-manual.annopts.warnings" xreflabel="Warnings">
+<title>Warnings</title>
+
+<para>There are a couple of situations in which
+cg_annotate issues warnings.</para>
+
+<itemizedlist>
+  <listitem>
+    <para>If a source file is more recent than the
+    <computeroutput>cachegrind.out.&lt;pid&gt;</computeroutput> file.
+    This is because the information in
+    <computeroutput>cachegrind.out.&lt;pid&gt;</computeroutput> is only
+    recorded with line numbers, so if the line numbers change at
+    all in the source (eg.  lines added, deleted, swapped), any
+    annotations will be incorrect.</para>
+  </listitem>
+  <listitem>
+    <para>If information is recorded about line numbers past the
+    end of a file.  This can be caused by the above problem,
+    ie. shortening the source file while using an old
+    <computeroutput>cachegrind.out.&lt;pid&gt;</computeroutput> file.  If
+    this happens, the figures for the bogus lines are printed
+    anyway (clearly marked as bogus) in case they are
+    important.</para>
+  </listitem>
+</itemizedlist>
+
+</sect2>
+
+
+
+<sect2 id="cg-manual.annopts.things-to-watch-out-for"
+       xreflabel="Things to watch out for">
+<title>Things to watch out for</title>
+
+<para>Some odd things that can occur during annotation:</para>
+
+<itemizedlist>
+  <listitem>
+    <para>If annotating at the assembler level, you might see
+    something like this:</para>
+<programlisting><![CDATA[
+      1    0    0  .    .    .  .    .    .          leal -12(%ebp),%eax
+      1    0    0  .    .    .  1    0    0          movl %eax,84(%ebx)
+      2    0    0  0    0    0  1    0    0          movl $1,-20(%ebp)
+      .    .    .  .    .    .  .    .    .          .align 4,0x90
+      1    0    0  .    .    .  .    .    .          movl $.LnrB,%eax
+      1    0    0  .    .    .  1    0    0          movl %eax,-16(%ebp)]]></programlisting>
+
+    <para>How can the third instruction be executed twice when
+    the others are executed only once?  As it turns out, it
+    isn't.  Here's a dump of the executable, using
+    <computeroutput>objdump -d</computeroutput>:</para>
+<programlisting><![CDATA[
+      8048f25:       8d 45 f4                lea    0xfffffff4(%ebp),%eax
+      8048f28:       89 43 54                mov    %eax,0x54(%ebx)
+      8048f2b:       c7 45 ec 01 00 00 00    movl   $0x1,0xffffffec(%ebp)
+      8048f32:       89 f6                   mov    %esi,%esi
+      8048f34:       b8 08 8b 07 08          mov    $0x8078b08,%eax
+      8048f39:       89 45 f0                mov    %eax,0xfffffff0(%ebp)]]></programlisting>
+
+    <para>Notice the extra <computeroutput>mov
+    %esi,%esi</computeroutput> instruction.  Where did this come
+    from?  The GNU assembler inserted it to serve as the two
+    bytes of padding needed to align the <computeroutput>movl
+    $.LnrB,%eax</computeroutput> instruction on a four-byte
+    boundary, but pretended it didn't exist when adding debug
+    information.  Thus when Valgrind reads the debug info it
+    thinks that the <computeroutput>movl
+    $0x1,0xffffffec(%ebp)</computeroutput> instruction covers the
+    address range 0x8048f2b--0x804833 by itself, and attributes
+    the counts for the <computeroutput>mov
+    %esi,%esi</computeroutput> to it.</para>
+  </listitem>
+
+  <listitem>
+    <para>Inlined functions can cause strange results in the
+    function-by-function summary.  If a function
+    <computeroutput>inline_me()</computeroutput> is defined in
+    <filename>foo.h</filename> and inlined in the functions
+    <computeroutput>f1()</computeroutput>,
+    <computeroutput>f2()</computeroutput> and
+    <computeroutput>f3()</computeroutput> in
+    <filename>bar.c</filename>, there will not be a
+    <computeroutput>foo.h:inline_me()</computeroutput> function
+    entry.  Instead, there will be separate function entries for
+    each inlining site, ie.
+    <computeroutput>foo.h:f1()</computeroutput>,
+    <computeroutput>foo.h:f2()</computeroutput> and
+    <computeroutput>foo.h:f3()</computeroutput>.  To find the
+    total counts for
+    <computeroutput>foo.h:inline_me()</computeroutput>, add up
+    the counts from each entry.</para>
+
+    <para>The reason for this is that although the debug info
+    output by gcc indicates the switch from
+    <filename>bar.c</filename> to <filename>foo.h</filename>, it
+    doesn't indicate the name of the function in
+    <filename>foo.h</filename>, so Valgrind keeps using the old
+    one.</para>
+  </listitem>
+
+  <listitem>
+    <para>Sometimes, the same filename might be represented with
+    a relative name and with an absolute name in different parts
+    of the debug info, eg:
+    <filename>/home/user/proj/proj.h</filename> and
+    <filename>../proj.h</filename>.  In this case, if you use
+    auto-annotation, the file will be annotated twice with the
+    counts split between the two.</para>
+  </listitem>
+
+  <listitem>
+    <para>Files with more than 65,535 lines cause difficulties
+    for the Stabs-format debug info reader.  This is because the line
+    number in the <computeroutput>struct nlist</computeroutput>
+    defined in <filename>a.out.h</filename> under Linux is only a
+    16-bit value.  Valgrind can handle some files with more than
+    65,535 lines correctly by making some guesses to identify
+    line number overflows.  But some cases are beyond it, in
+    which case you'll get a warning message explaining that
+    annotations for the file might be incorrect.</para>
+    
+    <para>If you are using gcc 3.1 or later, this is most likely
+    irrelevant, since gcc switched to using the more modern DWARF2 
+    format by default at version 3.1.  DWARF2 does not have any such
+    limitations on line numbers.</para>
+  </listitem>
+
+  <listitem>
+    <para>If you compile some files with
+    <computeroutput>-g</computeroutput> and some without, some
+    events that take place in a file without debug info could be
+    attributed to the last line of a file with debug info
+    (whichever one gets placed before the non-debug-info file in
+    the executable).</para>
+  </listitem>
+
+</itemizedlist>
+
+<para>This list looks long, but these cases should be fairly
+rare.</para>
+
+</sect2>
+
+
+
+<sect2 id="cg-manual.annopts.accuracy" xreflabel="Accuracy">
+<title>Accuracy</title>
+
+<para>Valgrind's cache profiling has a number of
+shortcomings:</para>
+
+<itemizedlist>
+  <listitem>
+    <para>It doesn't account for kernel activity -- the effect of
+    system calls on the cache contents is ignored.</para>
+  </listitem>
+
+  <listitem>
+    <para>It doesn't account for other process activity.
+    This is probably desirable when considering a single
+    program.</para>
+  </listitem>
+
+  <listitem>
+    <para>It doesn't account for virtual-to-physical address
+    mappings.  Hence the simulation is not a true
+    representation of what's happening in the
+    cache.  Most caches are physically indexed, but Cachegrind
+    simulates caches using virtual addresses.</para>
+  </listitem>
+
+  <listitem>
+    <para>It doesn't account for cache misses not visible at the
+    instruction level, eg. those arising from TLB misses, or
+    speculative execution.</para>
+  </listitem>
+
+  <listitem>
+    <para>Valgrind will schedule
+    threads differently from how they would be when running natively.
+    This could warp the results for threaded programs.</para>
+  </listitem>
+
+  <listitem>
+    <para>The x86/amd64 instructions <computeroutput>bts</computeroutput>,
+    <computeroutput>btr</computeroutput> and
+    <computeroutput>btc</computeroutput> will incorrectly be
+    counted as doing a data read if both the arguments are
+    registers, eg:</para>
+<programlisting><![CDATA[
+    btsl %eax, %edx]]></programlisting>
+
+    <para>This should only happen rarely.</para>
+  </listitem>
+
+  <listitem>
+    <para>x86/amd64 FPU instructions with data sizes of 28 and 108 bytes
+    (e.g.  <computeroutput>fsave</computeroutput>) are treated as
+    though they only access 16 bytes.  These instructions seem to
+    be rare so hopefully this won't affect accuracy much.</para>
+  </listitem>
+
+</itemizedlist>
+
+<para>Another thing worth noting is that results are very sensitive.
+Changing the size of the the executable being profiled, or the sizes
+of any of the shared libraries it uses, or even the length of their
+file names, can perturb the results.  Variations will be small, but
+don't expect perfectly repeatable results if your program changes at
+all.</para>
+
+<para>More recent GNU/Linux distributions do address space
+randomisation, in which identical runs of the same program have their
+shared libraries loaded at different locations, as a security measure.
+This also perturbs the results.</para>
+
+<para>While these factors mean you shouldn't trust the results to
+be super-accurate, hopefully they should be close enough to be
+useful.</para>
+
+</sect2>
+
+</sect1>
+
+
+
+<sect1 id="cg-manual.cg_merge" xreflabel="cg_merge">
+<title>Merging profiles with cg_merge</title>
+
+<para>
+cg_merge is a simple program which
+reads multiple profile files, as created by cachegrind, merges them
+together, and writes the results into another file in the same format.
+You can then examine the merged results using
+<computeroutput>cg_annotate &lt;filename&gt;</computeroutput>, as
+described above.  The merging functionality might be useful if you
+want to aggregate costs over multiple runs of the same program, or
+from a single parallel run with multiple instances of the same
+program.</para>
+
+<para>
+cg_merge is invoked as follows:
+</para>
+
+<programlisting><![CDATA[
+cg_merge -o outputfile file1 file2 file3 ...]]></programlisting>
+
+<para>
+It reads and checks <computeroutput>file1</computeroutput>, then read
+and checks <computeroutput>file2</computeroutput> and merges it into
+the running totals, then the same with
+<computeroutput>file3</computeroutput>, etc.  The final results are
+written to <computeroutput>outputfile</computeroutput>, or to standard
+out if no output file is specified.</para>
+
+<para>
+Costs are summed on a per-function, per-line and per-instruction
+basis.  Because of this, the order in which the input files does not
+matter, although you should take care to only mention each file once,
+since any file mentioned twice will be added in twice.</para>
+
+<para>
+cg_merge does not attempt to check
+that the input files come from runs of the same executable.  It will
+happily merge together profile files from completely unrelated
+programs.  It does however check that the
+<computeroutput>Events:</computeroutput> lines of all the inputs are
+identical, so as to ensure that the addition of costs makes sense.
+For example, it would be nonsensical for it to add a number indicating
+D1 read references to a number from a different file indicating L2
+write misses.</para>
+
+<para>
+A number of other syntax and sanity checks are done whilst reading the
+inputs.  cg_merge will stop and
+attempt to print a helpful error message if any of the input files
+fail these checks.</para>
+
+</sect1>
+
+
+<sect1 id="cg-manual.acting-on"
+       xreflabel="Acting on Cachegrind's information">
+<title>Acting on Cachegrind's information</title>
+<para>
+So, you've managed to profile your program with Cachegrind.  Now what?
+What's the best way to actually act on the information it provides to speed
+up your program?  Here are some rules of thumb that we have found to be
+useful.</para>
+
+<para>
+First of all, the global hit/miss rate numbers are not that useful.  If you
+have multiple programs or multiple runs of a program, comparing the numbers
+might identify if any are outliers and worthy of closer investigation.
+Otherwise, they're not enough to act on.</para>
+
+<para>
+The line-by-line source code annotations are much more useful.  In our
+experience, the best place to start is by looking at the
+<computeroutput>Ir</computeroutput> numbers.  They simply measure how many
+instructions were executed for each line, and don't include any cache
+information, but they can still be very useful for identifying
+bottlenecks.</para>
+
+<para>
+After that, we have found that L2 misses are typically a much bigger source
+of slow-downs than L1 misses.  So it's worth looking for any snippets of
+code that cause a high proportion of the L2 misses.  If you find any, it's
+still not always easy to work out how to improve things.  You need to have a
+reasonable understanding of how caches work, the principles of locality, and
+your program's data access patterns.  Improving things may require
+redesigning a data structure, for example.</para>
+
+<para>
+In short, Cachegrind can tell you where some of the bottlenecks in your code
+are, but it can't tell you how to fix them.  You have to work that out for
+yourself.  But at least you have the information!
+</para>
+
+</sect1>
+
+<sect1 id="cg-manual.impl-details"
+       xreflabel="Implementation details">
+<title>Implementation details</title>
+<para>
+This section talks about details you don't need to know about in order to
+use Cachegrind, but may be of interest to some people.
+</para>
+
+<sect2 id="cg-manual.impl-details.how-cg-works"
+       xreflabel="How Cachegrind works">
+<title>How Cachegrind works</title>
+<para>The best reference for understanding how Cachegrind works is chapter 3 of
+"Dynamic Binary Analysis and Instrumentation", by Nicholas Nethercote.  It
+is available on the <ulink url="&vg-pubs;">Valgrind publications
+page</ulink>.</para>
+</sect2>
+
+<sect2 id="cg-manual.impl-details.file-format"
+       xreflabel="Cachegrind output file format">
+<title>Cachegrind output file format</title>
+<para>The file format is fairly straightforward, basically giving the
+cost centre for every line, grouped by files and
+functions.  Total counts (eg. total cache accesses, total L1
+misses) are calculated when traversing this structure rather than
+during execution, to save time; the cache simulation functions
+are called so often that even one or two extra adds can make a
+sizeable difference.</para>
+
+<para>The file format:</para>
+<programlisting><![CDATA[
+file         ::= desc_line* cmd_line events_line data_line+ summary_line
+desc_line    ::= "desc:" ws? non_nl_string
+cmd_line     ::= "cmd:" ws? cmd
+events_line  ::= "events:" ws? (event ws)+
+data_line    ::= file_line | fn_line | count_line
+file_line    ::= "fl=" filename
+fn_line      ::= "fn=" fn_name
+count_line   ::= line_num ws? (count ws)+
+summary_line ::= "summary:" ws? (count ws)+
+count        ::= num | "."]]></programlisting>
+
+<para>Where:</para>
+<itemizedlist>
+  <listitem>
+    <para><computeroutput>non_nl_string</computeroutput> is any
+    string not containing a newline.</para>
+  </listitem>
+  <listitem>
+    <para><computeroutput>cmd</computeroutput> is a string holding the
+    command line of the profiled program.</para>
+  </listitem>
+  <listitem>
+    <para><computeroutput>event</computeroutput> is a string containing
+    no whitespace.</para>
+  </listitem>
+  <listitem>
+    <para><computeroutput>filename</computeroutput> and
+    <computeroutput>fn_name</computeroutput> are strings.</para>
+  </listitem>
+  <listitem>
+    <para><computeroutput>num</computeroutput> and
+    <computeroutput>line_num</computeroutput> are decimal
+    numbers.</para>
+  </listitem>
+  <listitem>
+    <para><computeroutput>ws</computeroutput> is whitespace.</para>
+  </listitem>
+</itemizedlist>
+
+<para>The contents of the "desc:" lines are printed out at the top
+of the summary.  This is a generic way of providing simulation
+specific information, eg. for giving the cache configuration for
+cache simulation.</para>
+
+<para>More than one line of info can be presented for each file/fn/line number.
+In such cases, the counts for the named events will be accumulated.</para>
+
+<para>Counts can be "." to represent zero.  This makes the files easier for
+humans to read.</para>
+
+<para>The number of counts in each
+<computeroutput>line</computeroutput> and the
+<computeroutput>summary_line</computeroutput> should not exceed
+the number of events in the
+<computeroutput>event_line</computeroutput>.  If the number in
+each <computeroutput>line</computeroutput> is less, cg_annotate
+treats those missing as though they were a "." entry.  This saves space.
+</para>
+
+<para>A <computeroutput>file_line</computeroutput> changes the
+current file name.  A <computeroutput>fn_line</computeroutput>
+changes the current function name.  A
+<computeroutput>count_line</computeroutput> contains counts that
+pertain to the current filename/fn_name.  A "fn="
+<computeroutput>file_line</computeroutput> and a
+<computeroutput>fn_line</computeroutput> must appear before any
+<computeroutput>count_line</computeroutput>s to give the context
+of the first <computeroutput>count_line</computeroutput>s.</para>
+
+<para>Each <computeroutput>file_line</computeroutput> will normally be
+immediately followed by a <computeroutput>fn_line</computeroutput>.  But it
+doesn't have to be.</para>
+
+
+</sect2>
+
+</sect1>
+</chapter>
diff --git a/cachegrind/tests/.deps/chdir.Po b/cachegrind/tests/.deps/chdir.Po
new file mode 100644
index 0000000..9ce06a8
--- /dev/null
+++ b/cachegrind/tests/.deps/chdir.Po
@@ -0,0 +1 @@
+# dummy
diff --git a/cachegrind/tests/.deps/clreq.Po b/cachegrind/tests/.deps/clreq.Po
new file mode 100644
index 0000000..9ce06a8
--- /dev/null
+++ b/cachegrind/tests/.deps/clreq.Po
@@ -0,0 +1 @@
+# dummy
diff --git a/cachegrind/tests/.deps/dlclose.Po b/cachegrind/tests/.deps/dlclose.Po
new file mode 100644
index 0000000..9ce06a8
--- /dev/null
+++ b/cachegrind/tests/.deps/dlclose.Po
@@ -0,0 +1 @@
+# dummy
diff --git a/cachegrind/tests/.deps/myprint_so-myprint.Po b/cachegrind/tests/.deps/myprint_so-myprint.Po
new file mode 100644
index 0000000..9ce06a8
--- /dev/null
+++ b/cachegrind/tests/.deps/myprint_so-myprint.Po
@@ -0,0 +1 @@
+# dummy
diff --git a/cachegrind/tests/.svn/dir-prop-base b/cachegrind/tests/.svn/dir-prop-base
new file mode 100644
index 0000000..2839a49
--- /dev/null
+++ b/cachegrind/tests/.svn/dir-prop-base
@@ -0,0 +1,19 @@
+K 10
+svn:ignore
+V 146
+cachegrind.out
+cachegrind.out.*
+chdir
+clreq
+.deps
+dlclose
+Makefile
+Makefile.in
+*.so
+*.stderr.diff*
+*.stderr.out
+*.stdout.diff*
+*.stdout.out
+wrap5
+
+END
diff --git a/cachegrind/tests/.svn/entries b/cachegrind/tests/.svn/entries
new file mode 100644
index 0000000..b1f58af
--- /dev/null
+++ b/cachegrind/tests/.svn/entries
@@ -0,0 +1,270 @@
+8
+
+dir
+9703
+svn://svn.valgrind.org/valgrind/trunk/cachegrind/tests
+svn://svn.valgrind.org/valgrind
+
+
+
+2009-04-24T20:17:07.643509Z
+9611
+njn
+has-props
+
+svn:special svn:externals svn:needs-lock
+
+
+
+
+
+
+
+
+
+
+
+a5019735-40e9-0310-863c-91ae7b9d1cf9
+
+clreq.vgtest
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+eb9f501a898334c224ba2e31d6a06355
+2005-11-10T15:20:37.046979Z
+5072
+njn
+
+dlclose.stderr.exp
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+2f78182fb95570a369cc24cb627a182c
+2003-11-19T20:09:53.000000Z
+2045
+nethercote
+has-props
+
+myprint.c
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+8baaf68ef997012e279eee196843a5ae
+2002-09-23T09:36:25.000000Z
+1086
+njn25
+has-props
+
+x86
+dir
+
+chdir.stderr.exp
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+2f78182fb95570a369cc24cb627a182c
+2003-04-30T20:23:58.000000Z
+1576
+njn
+has-props
+
+clreq.c
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+154addc86017d940d6a7c974723e06ca
+2005-11-10T15:20:37.046979Z
+5072
+njn
+
+wrap5.stderr.exp
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+2f78182fb95570a369cc24cb627a182c
+2006-01-22T01:08:40.404884Z
+5579
+sewardj
+
+notpower2.stderr.exp
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+2f78182fb95570a369cc24cb627a182c
+2009-01-26T22:56:14.413264Z
+9080
+weidendo
+
+filter_cachesim_discards
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+cd35ee7a5cf3ac8a6bdd69002477093d
+2003-05-05T16:18:51.000000Z
+1616
+sewardj
+has-props
+
+dlclose.vgtest
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+2c8acee8b7169002a61a91ab54d0214a
+2004-11-18T11:57:00.000000Z
+3039
+nethercote
+has-props
+
+dlclose.stdout.exp
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+22e50936205f2b04f2180a65e3cb1006
+2002-09-23T09:36:25.000000Z
+1086
+njn25
+has-props
+
+clreq.stderr.exp
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+d41d8cd98f00b204e9800998ecf8427e
+2005-11-10T15:20:37.046979Z
+5072
+njn
+
+chdir.vgtest
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+b068bd1f15c1f27699b1f3f7765fa2f7
+2004-11-18T11:57:00.000000Z
+3039
+nethercote
+has-props
+
+dlclose.c
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+068789dd2d5d080025d628cdd0fbb192
+2002-09-23T09:36:25.000000Z
+1086
+njn25
+has-props
+
+filter_stderr
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+2b86a811f7c30a44609c9de028502cac
+2009-01-26T22:56:14.413264Z
+9080
+weidendo
+has-props
+
+Makefile.am
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+4e25ae0510d2864d092572493a4e4213
+2009-02-22T23:38:10.628912Z
+9222
+njn
+has-props
+
+wrap5.vgtest
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+c9f8944fab92373d82cc3f68689dd28e
+2009-01-08T06:07:05.490180Z
+8919
+njn
+
+chdir.c
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+5fcdeeb8ab3a5f404c90bd71a6c2971c
+2003-04-30T20:23:58.000000Z
+1576
+njn
+has-props
+
+wrap5.stdout.exp
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+726cb1c67a5a1d0841a02e58d1827cd8
+2006-01-22T01:08:40.404884Z
+5579
+sewardj
+
+notpower2.vgtest
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+263b3c970feed1c3079b7956f5bedac6
+2009-01-26T22:56:14.413264Z
+9080
+weidendo
+
diff --git a/cachegrind/tests/.svn/format b/cachegrind/tests/.svn/format
new file mode 100644
index 0000000..45a4fb7
--- /dev/null
+++ b/cachegrind/tests/.svn/format
@@ -0,0 +1 @@
+8
diff --git a/cachegrind/tests/.svn/prop-base/Makefile.am.svn-base b/cachegrind/tests/.svn/prop-base/Makefile.am.svn-base
new file mode 100644
index 0000000..df54a06
--- /dev/null
+++ b/cachegrind/tests/.svn/prop-base/Makefile.am.svn-base
@@ -0,0 +1,9 @@
+K 13
+svn:eol-style
+V 6
+native
+K 12
+svn:keywords
+V 23
+author date id revision
+END
diff --git a/cachegrind/tests/.svn/prop-base/chdir.c.svn-base b/cachegrind/tests/.svn/prop-base/chdir.c.svn-base
new file mode 100644
index 0000000..df54a06
--- /dev/null
+++ b/cachegrind/tests/.svn/prop-base/chdir.c.svn-base
@@ -0,0 +1,9 @@
+K 13
+svn:eol-style
+V 6
+native
+K 12
+svn:keywords
+V 23
+author date id revision
+END
diff --git a/cachegrind/tests/.svn/prop-base/chdir.stderr.exp.svn-base b/cachegrind/tests/.svn/prop-base/chdir.stderr.exp.svn-base
new file mode 100644
index 0000000..df54a06
--- /dev/null
+++ b/cachegrind/tests/.svn/prop-base/chdir.stderr.exp.svn-base
@@ -0,0 +1,9 @@
+K 13
+svn:eol-style
+V 6
+native
+K 12
+svn:keywords
+V 23
+author date id revision
+END
diff --git a/cachegrind/tests/.svn/prop-base/chdir.vgtest.svn-base b/cachegrind/tests/.svn/prop-base/chdir.vgtest.svn-base
new file mode 100644
index 0000000..df54a06
--- /dev/null
+++ b/cachegrind/tests/.svn/prop-base/chdir.vgtest.svn-base
@@ -0,0 +1,9 @@
+K 13
+svn:eol-style
+V 6
+native
+K 12
+svn:keywords
+V 23
+author date id revision
+END
diff --git a/cachegrind/tests/.svn/prop-base/dlclose.c.svn-base b/cachegrind/tests/.svn/prop-base/dlclose.c.svn-base
new file mode 100644
index 0000000..df54a06
--- /dev/null
+++ b/cachegrind/tests/.svn/prop-base/dlclose.c.svn-base
@@ -0,0 +1,9 @@
+K 13
+svn:eol-style
+V 6
+native
+K 12
+svn:keywords
+V 23
+author date id revision
+END
diff --git a/cachegrind/tests/.svn/prop-base/dlclose.stderr.exp.svn-base b/cachegrind/tests/.svn/prop-base/dlclose.stderr.exp.svn-base
new file mode 100644
index 0000000..df54a06
--- /dev/null
+++ b/cachegrind/tests/.svn/prop-base/dlclose.stderr.exp.svn-base
@@ -0,0 +1,9 @@
+K 13
+svn:eol-style
+V 6
+native
+K 12
+svn:keywords
+V 23
+author date id revision
+END
diff --git a/cachegrind/tests/.svn/prop-base/dlclose.stdout.exp.svn-base b/cachegrind/tests/.svn/prop-base/dlclose.stdout.exp.svn-base
new file mode 100644
index 0000000..df54a06
--- /dev/null
+++ b/cachegrind/tests/.svn/prop-base/dlclose.stdout.exp.svn-base
@@ -0,0 +1,9 @@
+K 13
+svn:eol-style
+V 6
+native
+K 12
+svn:keywords
+V 23
+author date id revision
+END
diff --git a/cachegrind/tests/.svn/prop-base/dlclose.vgtest.svn-base b/cachegrind/tests/.svn/prop-base/dlclose.vgtest.svn-base
new file mode 100644
index 0000000..df54a06
--- /dev/null
+++ b/cachegrind/tests/.svn/prop-base/dlclose.vgtest.svn-base
@@ -0,0 +1,9 @@
+K 13
+svn:eol-style
+V 6
+native
+K 12
+svn:keywords
+V 23
+author date id revision
+END
diff --git a/cachegrind/tests/.svn/prop-base/filter_cachesim_discards.svn-base b/cachegrind/tests/.svn/prop-base/filter_cachesim_discards.svn-base
new file mode 100644
index 0000000..fe7d6da
--- /dev/null
+++ b/cachegrind/tests/.svn/prop-base/filter_cachesim_discards.svn-base
@@ -0,0 +1,13 @@
+K 13
+svn:eol-style
+V 6
+native
+K 14
+svn:executable
+V 1
+*
+K 12
+svn:keywords
+V 23
+author date id revision
+END
diff --git a/cachegrind/tests/.svn/prop-base/filter_stderr.svn-base b/cachegrind/tests/.svn/prop-base/filter_stderr.svn-base
new file mode 100644
index 0000000..fe7d6da
--- /dev/null
+++ b/cachegrind/tests/.svn/prop-base/filter_stderr.svn-base
@@ -0,0 +1,13 @@
+K 13
+svn:eol-style
+V 6
+native
+K 14
+svn:executable
+V 1
+*
+K 12
+svn:keywords
+V 23
+author date id revision
+END
diff --git a/cachegrind/tests/.svn/prop-base/myprint.c.svn-base b/cachegrind/tests/.svn/prop-base/myprint.c.svn-base
new file mode 100644
index 0000000..df54a06
--- /dev/null
+++ b/cachegrind/tests/.svn/prop-base/myprint.c.svn-base
@@ -0,0 +1,9 @@
+K 13
+svn:eol-style
+V 6
+native
+K 12
+svn:keywords
+V 23
+author date id revision
+END
diff --git a/cachegrind/tests/.svn/text-base/Makefile.am.svn-base b/cachegrind/tests/.svn/text-base/Makefile.am.svn-base
new file mode 100644
index 0000000..497ee19
--- /dev/null
+++ b/cachegrind/tests/.svn/text-base/Makefile.am.svn-base
@@ -0,0 +1,29 @@
+
+include $(top_srcdir)/Makefile.tool-tests.am
+
+SUBDIRS = .
+if VGCONF_ARCHS_INCLUDE_X86
+SUBDIRS += x86
+endif
+
+DIST_SUBDIRS = x86 .
+
+noinst_SCRIPTS = filter_stderr filter_cachesim_discards
+
+EXTRA_DIST = $(noinst_SCRIPTS) \
+	chdir.vgtest chdir.stderr.exp \
+	clreq.vgtest clreq.stderr.exp \
+	dlclose.vgtest dlclose.stderr.exp dlclose.stdout.exp \
+	notpower2.vgtest notpower2.stderr.exp \
+	wrap5.vgtest wrap5.stderr.exp wrap5.stdout.exp
+
+check_PROGRAMS = \
+	chdir clreq dlclose myprint.so
+
+AM_CFLAGS   += $(AM_FLAG_M3264_PRI)
+AM_CXXFLAGS += $(AM_FLAG_M3264_PRI)
+
+# C ones
+dlclose_LDADD		= -ldl
+myprint_so_LDFLAGS	= $(AM_FLAG_M3264_PRI) -shared -fPIC
+myprint_so_CFLAGS	= $(AM_CFLAGS) -fPIC
diff --git a/cachegrind/tests/.svn/text-base/chdir.c.svn-base b/cachegrind/tests/.svn/text-base/chdir.c.svn-base
new file mode 100644
index 0000000..9b681cf
--- /dev/null
+++ b/cachegrind/tests/.svn/text-base/chdir.c.svn-base
@@ -0,0 +1,10 @@
+#include <unistd.h>
+
+// Before the bug was fixed, if a program changed working directory, things
+// would break and the cachegrind.out.<pid> file wouldn't get written.
+int main(void)
+{
+   chdir("..");
+
+   return 0;
+}
diff --git a/cachegrind/tests/.svn/text-base/chdir.stderr.exp.svn-base b/cachegrind/tests/.svn/text-base/chdir.stderr.exp.svn-base
new file mode 100644
index 0000000..8eaf654
--- /dev/null
+++ b/cachegrind/tests/.svn/text-base/chdir.stderr.exp.svn-base
@@ -0,0 +1,17 @@
+
+
+I   refs:
+I1  misses:
+L2i misses:
+I1  miss rate:
+L2i miss rate:
+
+D   refs:
+D1  misses:
+L2d misses:
+D1  miss rate:
+L2d miss rate:
+
+L2 refs:
+L2 misses:
+L2 miss rate:
diff --git a/cachegrind/tests/.svn/text-base/chdir.vgtest.svn-base b/cachegrind/tests/.svn/text-base/chdir.vgtest.svn-base
new file mode 100644
index 0000000..041c5cf
--- /dev/null
+++ b/cachegrind/tests/.svn/text-base/chdir.vgtest.svn-base
@@ -0,0 +1,2 @@
+prog: chdir
+cleanup: rm cachegrind.out.*
diff --git a/cachegrind/tests/.svn/text-base/clreq.c.svn-base b/cachegrind/tests/.svn/text-base/clreq.c.svn-base
new file mode 100644
index 0000000..0f2bc2e
--- /dev/null
+++ b/cachegrind/tests/.svn/text-base/clreq.c.svn-base
@@ -0,0 +1,11 @@
+
+// Prior to 3.0.1, Cachegrind was failing if run on a program that uses
+// client requests.  It was fixed in 3.0.1, but then reintroduced
+// afterwards (reported as bug #116057).  So here we test it.
+
+#include "../../include/valgrind.h"
+
+int main(void)
+{
+   return RUNNING_ON_VALGRIND;
+}
diff --git a/cachegrind/tests/.svn/text-base/clreq.stderr.exp.svn-base b/cachegrind/tests/.svn/text-base/clreq.stderr.exp.svn-base
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/cachegrind/tests/.svn/text-base/clreq.stderr.exp.svn-base
diff --git a/cachegrind/tests/.svn/text-base/clreq.vgtest.svn-base b/cachegrind/tests/.svn/text-base/clreq.vgtest.svn-base
new file mode 100644
index 0000000..c0cf5fa
--- /dev/null
+++ b/cachegrind/tests/.svn/text-base/clreq.vgtest.svn-base
@@ -0,0 +1,3 @@
+prog: clreq
+vgopts: -q
+cleanup: rm cachegrind.out.*
diff --git a/cachegrind/tests/.svn/text-base/dlclose.c.svn-base b/cachegrind/tests/.svn/text-base/dlclose.c.svn-base
new file mode 100644
index 0000000..9fee030
--- /dev/null
+++ b/cachegrind/tests/.svn/text-base/dlclose.c.svn-base
@@ -0,0 +1,38 @@
+/* This exercises the code that was causing this bug:
+  
+     valgrind: vg_cachesim.c:389 (get_BBCC): Assertion `((Bool)0) == remove' 
+     failed.
+
+   in Cachegrind 1.0.0 and 1.0.1, that was caused by unloading symbols before
+   invalidating translations.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <dlfcn.h>
+
+int main(int argc, char **argv) {
+   void *handle;
+   void (*myprint)(void);
+   char *error;
+
+   handle = dlopen ("./myprint.so", RTLD_LAZY);
+   if (!handle) {
+       fputs (dlerror(), stderr);
+       exit(1);
+   }
+
+   myprint = dlsym(handle, "myprint");
+   if ((error = dlerror()) != NULL)  {
+       fprintf (stderr, "%s\n", error);
+       exit(1);
+   }
+
+   (*myprint)();
+
+   /* Assertion failure was happening here */
+   dlclose(handle);
+
+   return 0;
+}
+
diff --git a/cachegrind/tests/.svn/text-base/dlclose.stderr.exp.svn-base b/cachegrind/tests/.svn/text-base/dlclose.stderr.exp.svn-base
new file mode 100644
index 0000000..8eaf654
--- /dev/null
+++ b/cachegrind/tests/.svn/text-base/dlclose.stderr.exp.svn-base
@@ -0,0 +1,17 @@
+
+
+I   refs:
+I1  misses:
+L2i misses:
+I1  miss rate:
+L2i miss rate:
+
+D   refs:
+D1  misses:
+L2d misses:
+D1  miss rate:
+L2d miss rate:
+
+L2 refs:
+L2 misses:
+L2 miss rate:
diff --git a/cachegrind/tests/.svn/text-base/dlclose.stdout.exp.svn-base b/cachegrind/tests/.svn/text-base/dlclose.stdout.exp.svn-base
new file mode 100644
index 0000000..890082f
--- /dev/null
+++ b/cachegrind/tests/.svn/text-base/dlclose.stdout.exp.svn-base
@@ -0,0 +1 @@
+This is myprint!
diff --git a/cachegrind/tests/.svn/text-base/dlclose.vgtest.svn-base b/cachegrind/tests/.svn/text-base/dlclose.vgtest.svn-base
new file mode 100644
index 0000000..d61200f
--- /dev/null
+++ b/cachegrind/tests/.svn/text-base/dlclose.vgtest.svn-base
@@ -0,0 +1,3 @@
+prog: dlclose
+stderr_filter: filter_cachesim_discards
+cleanup: rm cachegrind.out.*
diff --git a/cachegrind/tests/.svn/text-base/filter_cachesim_discards.svn-base b/cachegrind/tests/.svn/text-base/filter_cachesim_discards.svn-base
new file mode 100644
index 0000000..d184c4f
--- /dev/null
+++ b/cachegrind/tests/.svn/text-base/filter_cachesim_discards.svn-base
@@ -0,0 +1,8 @@
+#! /bin/sh
+
+dir=`dirname $0`
+
+$dir/filter_stderr | 
+
+# Anonymise paths like "/local/foo/bar/tests/baz/quux" (note "tests" is there)
+sed "s/\/.*\/tests\//\/...\/tests\//"
diff --git a/cachegrind/tests/.svn/text-base/filter_stderr.svn-base b/cachegrind/tests/.svn/text-base/filter_stderr.svn-base
new file mode 100644
index 0000000..a5bc1f4
--- /dev/null
+++ b/cachegrind/tests/.svn/text-base/filter_stderr.svn-base
@@ -0,0 +1,20 @@
+#! /bin/sh
+
+dir=`dirname $0`
+
+$dir/../../tests/filter_stderr_basic                |
+
+# Remove "Cachegrind, ..." line and the following copyright line.
+sed "/^Cachegrind, a cache and branch-prediction profiler./ , /./ d" |
+
+# Remove numbers from I/D/L2 "refs:" lines
+sed "s/\(\(I\|D\|L2\) *refs:\)[ 0-9,()+rdw]*$/\1/"  |
+
+# Remove numbers from I1/D1/L2/L2i/L2d "misses:" and "miss rates:" lines
+sed "s/\(\(I1\|D1\|L2\|L2i\|L2d\) *\(misses\|miss rate\):\)[ 0-9,()+rdw%\.]*$/\1/" |
+
+# Remove CPUID warnings lines for P4s and other machines
+sed "/warning: Pentium 4 with 12 KB micro-op instruction trace cache/d" |
+sed "/Simulating a 16 KB I-cache with 32 B lines/d"   |
+sed "/warning: L3 cache detected but ignored/d" |
+sed "/Warning: Cannot auto-detect cache config on PPC.., using one or more defaults/d"
diff --git a/cachegrind/tests/.svn/text-base/myprint.c.svn-base b/cachegrind/tests/.svn/text-base/myprint.c.svn-base
new file mode 100644
index 0000000..e22ae87
--- /dev/null
+++ b/cachegrind/tests/.svn/text-base/myprint.c.svn-base
@@ -0,0 +1,6 @@
+#include <stdio.h>
+
+void myprint(void)
+{
+   puts("This is myprint!");
+}
diff --git a/cachegrind/tests/.svn/text-base/notpower2.stderr.exp.svn-base b/cachegrind/tests/.svn/text-base/notpower2.stderr.exp.svn-base
new file mode 100644
index 0000000..8eaf654
--- /dev/null
+++ b/cachegrind/tests/.svn/text-base/notpower2.stderr.exp.svn-base
@@ -0,0 +1,17 @@
+
+
+I   refs:
+I1  misses:
+L2i misses:
+I1  miss rate:
+L2i miss rate:
+
+D   refs:
+D1  misses:
+L2d misses:
+D1  miss rate:
+L2d miss rate:
+
+L2 refs:
+L2 misses:
+L2 miss rate:
diff --git a/cachegrind/tests/.svn/text-base/notpower2.vgtest.svn-base b/cachegrind/tests/.svn/text-base/notpower2.vgtest.svn-base
new file mode 100644
index 0000000..132cfe5
--- /dev/null
+++ b/cachegrind/tests/.svn/text-base/notpower2.vgtest.svn-base
@@ -0,0 +1,3 @@
+prog: ../../tests/true
+vgopts: --I1=32768,8,64 --D1=24576,6,64 --L2=3145728,12,64
+cleanup: rm cachegrind.out.*
diff --git a/cachegrind/tests/.svn/text-base/wrap5.stderr.exp.svn-base b/cachegrind/tests/.svn/text-base/wrap5.stderr.exp.svn-base
new file mode 100644
index 0000000..8eaf654
--- /dev/null
+++ b/cachegrind/tests/.svn/text-base/wrap5.stderr.exp.svn-base
@@ -0,0 +1,17 @@
+
+
+I   refs:
+I1  misses:
+L2i misses:
+I1  miss rate:
+L2i miss rate:
+
+D   refs:
+D1  misses:
+L2d misses:
+D1  miss rate:
+L2d miss rate:
+
+L2 refs:
+L2 misses:
+L2 miss rate:
diff --git a/cachegrind/tests/.svn/text-base/wrap5.stdout.exp.svn-base b/cachegrind/tests/.svn/text-base/wrap5.stdout.exp.svn-base
new file mode 100644
index 0000000..1924a84
--- /dev/null
+++ b/cachegrind/tests/.svn/text-base/wrap5.stdout.exp.svn-base
@@ -0,0 +1,37 @@
+computing fact1(7)
+in wrapper1-pre:  fact(7)
+in wrapper2-pre:  fact(6)
+in wrapper1-pre:  fact(5)
+in wrapper2-pre:  fact(4)
+in wrapper1-pre:  fact(3)
+in wrapper2-pre:  fact(2)
+in wrapper1-pre:  fact(1)
+in wrapper2-pre:  fact(0)
+in wrapper2-post: fact(0) = 1
+in wrapper1-post: fact(1) = 1
+in wrapper2-post: fact(2) = 2
+in wrapper1-post: fact(3) = 6
+in wrapper2-pre:  fact(2)
+in wrapper1-pre:  fact(1)
+in wrapper2-pre:  fact(0)
+in wrapper2-post: fact(0) = 1
+in wrapper1-post: fact(1) = 1
+in wrapper2-post: fact(2) = 2
+in wrapper2-post: fact(4) = 32
+in wrapper1-post: fact(5) = 160
+in wrapper2-pre:  fact(2)
+in wrapper1-pre:  fact(1)
+in wrapper2-pre:  fact(0)
+in wrapper2-post: fact(0) = 1
+in wrapper1-post: fact(1) = 1
+in wrapper2-post: fact(2) = 2
+in wrapper2-post: fact(6) = 972
+in wrapper1-post: fact(7) = 6804
+in wrapper2-pre:  fact(2)
+in wrapper1-pre:  fact(1)
+in wrapper2-pre:  fact(0)
+in wrapper2-post: fact(0) = 1
+in wrapper1-post: fact(1) = 1
+in wrapper2-post: fact(2) = 2
+fact1(7) = 6806
+allocated 51 Lards
diff --git a/cachegrind/tests/.svn/text-base/wrap5.vgtest.svn-base b/cachegrind/tests/.svn/text-base/wrap5.vgtest.svn-base
new file mode 100644
index 0000000..51a172f
--- /dev/null
+++ b/cachegrind/tests/.svn/text-base/wrap5.vgtest.svn-base
@@ -0,0 +1,2 @@
+prog: ../../memcheck/tests/wrap5
+cleanup: rm cachegrind.out.*
diff --git a/cachegrind/tests/Makefile b/cachegrind/tests/Makefile
new file mode 100644
index 0000000..71b885b
--- /dev/null
+++ b/cachegrind/tests/Makefile
@@ -0,0 +1,726 @@
+# Makefile.in generated by automake 1.10.1 from Makefile.am.
+# cachegrind/tests/Makefile.  Generated from Makefile.in by configure.
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008  Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+
+
+# This file is used for tool tests, and also in perf/Makefile.am.
+
+
+pkgdatadir = $(datadir)/valgrind
+pkglibdir = $(libdir)/valgrind
+pkgincludedir = $(includedir)/valgrind
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = x86_64-unknown-linux-gnu
+host_triplet = x86_64-unknown-linux-gnu
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
+	$(top_srcdir)/Makefile.flags.am \
+	$(top_srcdir)/Makefile.tool-tests.am
+#am__append_1 = x86
+check_PROGRAMS = chdir$(EXEEXT) clreq$(EXEEXT) dlclose$(EXEEXT) \
+	myprint.so$(EXEEXT)
+subdir = cachegrind/tests
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+chdir_SOURCES = chdir.c
+chdir_OBJECTS = chdir.$(OBJEXT)
+chdir_LDADD = $(LDADD)
+clreq_SOURCES = clreq.c
+clreq_OBJECTS = clreq.$(OBJEXT)
+clreq_LDADD = $(LDADD)
+dlclose_SOURCES = dlclose.c
+dlclose_OBJECTS = dlclose.$(OBJEXT)
+dlclose_DEPENDENCIES =
+myprint_so_SOURCES = myprint.c
+myprint_so_OBJECTS = myprint_so-myprint.$(OBJEXT)
+myprint_so_LDADD = $(LDADD)
+myprint_so_LINK = $(CCLD) $(myprint_so_CFLAGS) $(CFLAGS) \
+	$(myprint_so_LDFLAGS) $(LDFLAGS) -o $@
+SCRIPTS = $(noinst_SCRIPTS)
+DEFAULT_INCLUDES = -I. -I$(top_builddir)
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+SOURCES = chdir.c clreq.c dlclose.c myprint.c
+DIST_SOURCES = chdir.c clreq.c dlclose.c myprint.c
+RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
+	html-recursive info-recursive install-data-recursive \
+	install-dvi-recursive install-exec-recursive \
+	install-html-recursive install-info-recursive \
+	install-pdf-recursive install-ps-recursive install-recursive \
+	installcheck-recursive installdirs-recursive pdf-recursive \
+	ps-recursive uninstall-recursive
+RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive	\
+  distclean-recursive maintainer-clean-recursive
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = ${SHELL} /home/steph/compile/valgrind/missing --run aclocal-1.10
+AMTAR = ${SHELL} /home/steph/compile/valgrind/missing --run tar
+AR = /usr/bin/ar
+AUTOCONF = ${SHELL} /home/steph/compile/valgrind/missing --run autoconf
+AUTOHEADER = ${SHELL} /home/steph/compile/valgrind/missing --run autoheader
+AUTOMAKE = ${SHELL} /home/steph/compile/valgrind/missing --run automake-1.10
+AWK = gawk
+BOOST_CFLAGS = 
+BOOST_LIBS = -lboost_thread-mt -m64
+CC = gcc
+CCAS = gcc
+CCASDEPMODE = depmode=gcc3
+CCASFLAGS = -Wno-long-long
+CCDEPMODE = depmode=gcc3
+CFLAGS = -Wno-long-long -Wno-pointer-sign -Wdeclaration-after-statement -fno-stack-protector
+CPP = gcc -E
+CPPFLAGS = 
+CXX = g++
+CXXDEPMODE = depmode=gcc3
+CXXFLAGS = -g -O2
+CYGPATH_W = echo
+DEFAULT_SUPP = exp-ptrcheck.supp xfree-3.supp xfree-4.supp glibc-2.X-drd.supp glibc-2.34567-NPTL-helgrind.supp glibc-2.X.supp 
+DEFS = -DHAVE_CONFIG_H
+DEPDIR = .deps
+DIFF = diff -u
+DISTCHECK_CONFIGURE_FLAGS = --with-vex=$(top_srcdir)/VEX
+ECHO_C = 
+ECHO_N = -n
+ECHO_T = 
+EGREP = /bin/grep -E
+EXEEXT = 
+FLAG_FNO_STACK_PROTECTOR = -fno-stack-protector
+FLAG_M32 = -m32
+FLAG_M64 = -m64
+FLAG_MAIX32 = 
+FLAG_MAIX64 = 
+FLAG_MMMX = -mmmx
+FLAG_MSSE = -msse
+FLAG_UNLIMITED_INLINE_UNIT_GROWTH = --param inline-unit-growth=900
+FLAG_WDECL_AFTER_STMT = -Wdeclaration-after-statement
+FLAG_W_EXTRA = -Wextra
+FLAG_W_NO_FORMAT_ZERO_LENGTH = -Wno-format-zero-length
+GDB = /usr/bin/gdb
+GLIBC_VERSION = 2.8
+GREP = /bin/grep
+INSTALL = /usr/bin/install -c
+INSTALL_DATA = ${INSTALL} -m 644
+INSTALL_PROGRAM = ${INSTALL}
+INSTALL_SCRIPT = ${INSTALL}
+INSTALL_STRIP_PROGRAM = $(install_sh) -c -s
+LDFLAGS = 
+LIBOBJS = 
+LIBS = 
+LN_S = ln -s
+LTLIBOBJS = 
+MAINT = #
+MAKEINFO = ${SHELL} /home/steph/compile/valgrind/missing --run makeinfo
+MKDIR_P = /bin/mkdir -p
+MPI_CC = mpicc
+OBJEXT = o
+PACKAGE = valgrind
+PACKAGE_BUGREPORT = valgrind-users@lists.sourceforge.net
+PACKAGE_NAME = Valgrind
+PACKAGE_STRING = Valgrind 3.5.0.SVN
+PACKAGE_TARNAME = valgrind
+PACKAGE_VERSION = 3.5.0.SVN
+PATH_SEPARATOR = :
+PERL = /usr/bin/perl
+PKG_CONFIG = /usr/bin/pkg-config
+PREFERRED_STACK_BOUNDARY = 
+QTCORE_CFLAGS = -DQT_SHARED -I/usr/include/QtCore  
+QTCORE_LIBS = -lQtCore  
+RANLIB = ranlib
+SET_MAKE = 
+SHELL = /bin/sh
+STRIP = 
+VALT_LOAD_ADDRESS = 0x38000000
+VERSION = 3.5.0.SVN
+VEX_DIR = $(top_srcdir)/VEX
+VGCONF_ARCH_PRI = amd64
+VGCONF_OS = linux
+VGCONF_PLATFORM_PRI_CAPS = AMD64_LINUX
+VGCONF_PLATFORM_SEC_CAPS = 
+abs_builddir = /home/steph/compile/valgrind/cachegrind/tests
+abs_srcdir = /home/steph/compile/valgrind/cachegrind/tests
+abs_top_builddir = /home/steph/compile/valgrind
+abs_top_srcdir = /home/steph/compile/valgrind
+ac_ct_CC = gcc
+ac_ct_CXX = g++
+am__include = include
+am__leading_dot = .
+am__quote = 
+am__tar = ${AMTAR} chof - "$$tardir"
+am__untar = ${AMTAR} xf -
+bindir = ${exec_prefix}/bin
+build = x86_64-unknown-linux-gnu
+build_alias = 
+build_cpu = x86_64
+build_os = linux-gnu
+build_vendor = unknown
+builddir = .
+datadir = ${datarootdir}
+datarootdir = ${prefix}/share
+docdir = ${datarootdir}/doc/${PACKAGE_TARNAME}
+dvidir = ${docdir}
+exec_prefix = ${prefix}
+host = x86_64-unknown-linux-gnu
+host_alias = 
+host_cpu = x86_64
+host_os = linux-gnu
+host_vendor = unknown
+htmldir = ${docdir}
+includedir = ${prefix}/include
+infodir = ${datarootdir}/info
+install_sh = $(SHELL) /home/steph/compile/valgrind/install-sh
+libdir = ${exec_prefix}/lib
+libexecdir = ${exec_prefix}/libexec
+localedir = ${datarootdir}/locale
+localstatedir = ${prefix}/var
+mandir = ${datarootdir}/man
+mkdir_p = /bin/mkdir -p
+oldincludedir = /usr/include
+pdfdir = ${docdir}
+prefix = /usr/local
+program_transform_name = s,x,x,
+psdir = ${docdir}
+sbindir = ${exec_prefix}/sbin
+sharedstatedir = ${prefix}/com
+srcdir = .
+sysconfdir = ${prefix}/etc
+target_alias = 
+top_builddir = ../..
+top_srcdir = ../..
+
+# Baseline flags for all compilations.  Aim here is to maximise
+# performance and get whatever useful warnings we can out of gcc.
+AM_CFLAGS_BASE = -O2 -g -Wmissing-prototypes -Wall -Wshadow \
+                 -Wpointer-arith -Wstrict-prototypes -Wmissing-declarations \
+		 -Wno-format-zero-length \
+                 -fno-strict-aliasing
+
+
+# These flags are used for building the preload shared objects.
+# The aim is to give reasonable performance but also to have good
+# stack traces, since users often see stack traces extending 
+# into (and through) the preloads.
+AM_CFLAGS_PIC = -O -g -fpic -fno-omit-frame-pointer -fno-strict-aliasing
+
+# Flags for specific targets.
+#
+# Nb: the AM_CPPFLAGS_* values are suitable for building tools and auxprogs.
+# For building the core, coregrind/Makefile.am files add some extra things.
+#
+# Also: in newer versions of automake (1.10 onwards?) asm files ending with
+# '.S' are considered "pre-processed" (as opposed to those ending in '.s')
+# and so the CPPFLAGS are passed to the assembler.  But this is not true for
+# older automakes (e.g. 1.8.5, 1.9.6), sigh.  So we include
+# AM_CPPFLAGS_<PLATFORM> in each AM_CCASFLAGS_<PLATFORM> variable.  This
+# means some of the flags are duplicated on systems with newer versions of
+# automake, but this does not really matter and seems hard to avoid.
+AM_CPPFLAGS_COMMON = \
+		-I$(top_srcdir) \
+		-I$(top_srcdir)/include \
+		-I$(top_srcdir)/VEX/pub
+
+AM_FLAG_M3264_X86_LINUX = -m32
+AM_CPPFLAGS_X86_LINUX = $(AM_CPPFLAGS_COMMON) \
+			    -DVGA_x86=1 \
+			    -DVGO_linux=1 \
+			    -DVGP_x86_linux=1
+
+AM_CFLAGS_X86_LINUX = -m32  \
+			 	$(AM_CFLAGS_BASE)
+
+AM_CCASFLAGS_X86_LINUX = $(AM_CPPFLAGS_X86_LINUX) -m32 -g
+AM_FLAG_M3264_AMD64_LINUX = -m64
+AM_CPPFLAGS_AMD64_LINUX = $(AM_CPPFLAGS_COMMON) \
+			    -DVGA_amd64=1 \
+			    -DVGO_linux=1 \
+			    -DVGP_amd64_linux=1
+
+AM_CFLAGS_AMD64_LINUX = -m64 -fomit-frame-pointer \
+				 $(AM_CFLAGS_BASE)
+
+AM_CCASFLAGS_AMD64_LINUX = $(AM_CPPFLAGS_AMD64_LINUX) -m64 -g
+AM_FLAG_M3264_PPC32_LINUX = -m32
+AM_CPPFLAGS_PPC32_LINUX = $(AM_CPPFLAGS_COMMON) \
+		-DVGA_ppc32=1 \
+		-DVGO_linux=1 \
+		-DVGP_ppc32_linux=1
+
+AM_CFLAGS_PPC32_LINUX = -m32 $(AM_CFLAGS_BASE)
+AM_CCASFLAGS_PPC32_LINUX = $(AM_CPPFLAGS_PPC32_LINUX) -m32 -g
+AM_FLAG_M3264_PPC64_LINUX = -m64
+AM_CPPFLAGS_PPC64_LINUX = $(AM_CPPFLAGS_COMMON) \
+			    -DVGA_ppc64=1 \
+			    -DVGO_linux=1 \
+			    -DVGP_ppc64_linux=1
+
+AM_CFLAGS_PPC64_LINUX = -m64 $(AM_CFLAGS_BASE)
+AM_CCASFLAGS_PPC64_LINUX = $(AM_CPPFLAGS_PPC64_LINUX) -m64 -g
+AM_FLAG_M3264_PPC32_AIX5 = 
+AM_CPPFLAGS_PPC32_AIX5 = $(AM_CPPFLAGS_COMMON) \
+			    -DVGA_ppc32=1 \
+			    -DVGO_aix5=1 \
+			    -DVGP_ppc32_aix5=1
+
+AM_CFLAGS_PPC32_AIX5 =  -mcpu=powerpc $(AM_CFLAGS_BASE)
+AM_CCASFLAGS_PPC32_AIX5 = $(AM_CPPFLAGS_PPC32_AIX5) \
+			     -mcpu=powerpc -g
+
+AM_FLAG_M3264_PPC64_AIX5 = 
+AM_CPPFLAGS_PPC64_AIX5 = $(AM_CPPFLAGS_COMMON) \
+			    -DVGA_ppc64=1 \
+			    -DVGO_aix5=1 \
+			    -DVGP_ppc64_aix5=1
+
+AM_CFLAGS_PPC64_AIX5 =  -mcpu=powerpc64 $(AM_CFLAGS_BASE)
+AM_CCASFLAGS_PPC64_AIX5 = $(AM_CPPFLAGS_PPC64_AIX5) \
+			     -mcpu=powerpc64 -g
+
+
+# Flags for the primary target.  These must be used to build the
+# regtests and performance tests.  In fact, these must be used to
+# build anything which is built only once on a dual-arch build.
+#
+AM_FLAG_M3264_PRI = $(AM_FLAG_M3264_AMD64_LINUX)
+AM_CPPFLAGS_PRI = $(AM_CPPFLAGS_AMD64_LINUX)
+AM_CFLAGS_PRI = $(AM_CFLAGS_AMD64_LINUX)
+AM_CCASFLAGS_PRI = $(AM_CCASFLAGS_AMD64_LINUX)
+AM_FLAG_M3264_SEC = 
+#AM_FLAG_M3264_SEC = $(AM_FLAG_M3264_)
+
+# Baseline link flags for making dynamic shared objects.
+#
+PRELOAD_LDFLAGS_COMMON_LINUX = -nodefaultlibs -shared -Wl,-z,interpose,-z,initfirst
+PRELOAD_LDFLAGS_COMMON_AIX5 = -nodefaultlibs -shared -Wl,-G -Wl,-bnogc
+PRELOAD_LDFLAGS_X86_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) -m32
+PRELOAD_LDFLAGS_AMD64_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) -m64
+PRELOAD_LDFLAGS_PPC32_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) -m32
+PRELOAD_LDFLAGS_PPC64_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) -m64
+PRELOAD_LDFLAGS_PPC32_AIX5 = $(PRELOAD_LDFLAGS_COMMON_AIX5)  
+PRELOAD_LDFLAGS_PPC64_AIX5 = $(PRELOAD_LDFLAGS_COMMON_AIX5)  
+AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/include \
+		-I$(top_srcdir)/coregrind -I$(top_builddir)/include \
+		-I$(top_srcdir)/VEX/pub \
+		-DVGA_$(VGCONF_ARCH_PRI)=1 \
+		-DVGO_$(VGCONF_OS)=1 \
+		-DVGP_$(VGCONF_ARCH_PRI)_$(VGCONF_OS)=1
+
+# Nb: Tools need to augment these flags with an arch-selection option, such
+# as $(AM_FLAG_M3264_PRI).
+AM_CFLAGS = -Winline -Wall -Wshadow -g $(AM_FLAG_M3264_PRI)
+AM_CXXFLAGS = -Winline -Wall -Wshadow -g $(AM_FLAG_M3264_PRI)
+# Include AM_CPPFLAGS in AM_CCASFLAGS to allow for older versions of
+# automake;  see comments in Makefile.flags.am for more detail.
+AM_CCASFLAGS = $(AM_CPPFLAGS)
+SUBDIRS = . $(am__append_1)
+DIST_SUBDIRS = x86 .
+noinst_SCRIPTS = filter_stderr filter_cachesim_discards
+EXTRA_DIST = $(noinst_SCRIPTS) \
+	chdir.vgtest chdir.stderr.exp \
+	clreq.vgtest clreq.stderr.exp \
+	dlclose.vgtest dlclose.stderr.exp dlclose.stdout.exp \
+	notpower2.vgtest notpower2.stderr.exp \
+	wrap5.vgtest wrap5.stderr.exp wrap5.stdout.exp
+
+
+# C ones
+dlclose_LDADD = -ldl
+myprint_so_LDFLAGS = $(AM_FLAG_M3264_PRI) -shared -fPIC
+myprint_so_CFLAGS = $(AM_CFLAGS) -fPIC
+all: all-recursive
+
+.SUFFIXES:
+.SUFFIXES: .c .o .obj
+$(srcdir)/Makefile.in: # $(srcdir)/Makefile.am $(top_srcdir)/Makefile.tool-tests.am $(top_srcdir)/Makefile.flags.am $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
+		&& exit 0; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign  cachegrind/tests/Makefile'; \
+	cd $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign  cachegrind/tests/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: # $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): # $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+clean-checkPROGRAMS:
+	-test -z "$(check_PROGRAMS)" || rm -f $(check_PROGRAMS)
+chdir$(EXEEXT): $(chdir_OBJECTS) $(chdir_DEPENDENCIES) 
+	@rm -f chdir$(EXEEXT)
+	$(LINK) $(chdir_OBJECTS) $(chdir_LDADD) $(LIBS)
+clreq$(EXEEXT): $(clreq_OBJECTS) $(clreq_DEPENDENCIES) 
+	@rm -f clreq$(EXEEXT)
+	$(LINK) $(clreq_OBJECTS) $(clreq_LDADD) $(LIBS)
+dlclose$(EXEEXT): $(dlclose_OBJECTS) $(dlclose_DEPENDENCIES) 
+	@rm -f dlclose$(EXEEXT)
+	$(LINK) $(dlclose_OBJECTS) $(dlclose_LDADD) $(LIBS)
+myprint.so$(EXEEXT): $(myprint_so_OBJECTS) $(myprint_so_DEPENDENCIES) 
+	@rm -f myprint.so$(EXEEXT)
+	$(myprint_so_LINK) $(myprint_so_OBJECTS) $(myprint_so_LDADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+include ./$(DEPDIR)/chdir.Po
+include ./$(DEPDIR)/clreq.Po
+include ./$(DEPDIR)/dlclose.Po
+include ./$(DEPDIR)/myprint_so-myprint.Po
+
+.c.o:
+	$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+	mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+#	source='$<' object='$@' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(COMPILE) -c $<
+
+.c.obj:
+	$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+	mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+#	source='$<' object='$@' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+myprint_so-myprint.o: myprint.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(myprint_so_CFLAGS) $(CFLAGS) -MT myprint_so-myprint.o -MD -MP -MF $(DEPDIR)/myprint_so-myprint.Tpo -c -o myprint_so-myprint.o `test -f 'myprint.c' || echo '$(srcdir)/'`myprint.c
+	mv -f $(DEPDIR)/myprint_so-myprint.Tpo $(DEPDIR)/myprint_so-myprint.Po
+#	source='myprint.c' object='myprint_so-myprint.o' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(myprint_so_CFLAGS) $(CFLAGS) -c -o myprint_so-myprint.o `test -f 'myprint.c' || echo '$(srcdir)/'`myprint.c
+
+myprint_so-myprint.obj: myprint.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(myprint_so_CFLAGS) $(CFLAGS) -MT myprint_so-myprint.obj -MD -MP -MF $(DEPDIR)/myprint_so-myprint.Tpo -c -o myprint_so-myprint.obj `if test -f 'myprint.c'; then $(CYGPATH_W) 'myprint.c'; else $(CYGPATH_W) '$(srcdir)/myprint.c'; fi`
+	mv -f $(DEPDIR)/myprint_so-myprint.Tpo $(DEPDIR)/myprint_so-myprint.Po
+#	source='myprint.c' object='myprint_so-myprint.obj' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
+#	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(myprint_so_CFLAGS) $(CFLAGS) -c -o myprint_so-myprint.obj `if test -f 'myprint.c'; then $(CYGPATH_W) 'myprint.c'; else $(CYGPATH_W) '$(srcdir)/myprint.c'; fi`
+
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run `make' without going through this Makefile.
+# To change the values of `make' variables: instead of editing Makefiles,
+# (1) if the variable is set in `config.status', edit `config.status'
+#     (which will cause the Makefiles to be regenerated when you run `make');
+# (2) otherwise, pass the desired values on the `make' command line.
+$(RECURSIVE_TARGETS):
+	@failcom='exit 1'; \
+	for f in x $$MAKEFLAGS; do \
+	  case $$f in \
+	    *=* | --[!k]*);; \
+	    *k*) failcom='fail=yes';; \
+	  esac; \
+	done; \
+	dot_seen=no; \
+	target=`echo $@ | sed s/-recursive//`; \
+	list='$(SUBDIRS)'; for subdir in $$list; do \
+	  echo "Making $$target in $$subdir"; \
+	  if test "$$subdir" = "."; then \
+	    dot_seen=yes; \
+	    local_target="$$target-am"; \
+	  else \
+	    local_target="$$target"; \
+	  fi; \
+	  (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+	  || eval $$failcom; \
+	done; \
+	if test "$$dot_seen" = "no"; then \
+	  $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+	fi; test -z "$$fail"
+
+$(RECURSIVE_CLEAN_TARGETS):
+	@failcom='exit 1'; \
+	for f in x $$MAKEFLAGS; do \
+	  case $$f in \
+	    *=* | --[!k]*);; \
+	    *k*) failcom='fail=yes';; \
+	  esac; \
+	done; \
+	dot_seen=no; \
+	case "$@" in \
+	  distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
+	  *) list='$(SUBDIRS)' ;; \
+	esac; \
+	rev=''; for subdir in $$list; do \
+	  if test "$$subdir" = "."; then :; else \
+	    rev="$$subdir $$rev"; \
+	  fi; \
+	done; \
+	rev="$$rev ."; \
+	target=`echo $@ | sed s/-recursive//`; \
+	for subdir in $$rev; do \
+	  echo "Making $$target in $$subdir"; \
+	  if test "$$subdir" = "."; then \
+	    local_target="$$target-am"; \
+	  else \
+	    local_target="$$target"; \
+	  fi; \
+	  (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+	  || eval $$failcom; \
+	done && test -z "$$fail"
+tags-recursive:
+	list='$(SUBDIRS)'; for subdir in $$list; do \
+	  test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
+	done
+ctags-recursive:
+	list='$(SUBDIRS)'; for subdir in $$list; do \
+	  test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
+	done
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '{ files[$$0] = 1; nonemtpy = 1; } \
+	      END { if (nonempty) { for (i in files) print i; }; }'`; \
+	mkid -fID $$unique
+tags: TAGS
+
+TAGS: tags-recursive $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+		$(TAGS_FILES) $(LISP)
+	tags=; \
+	here=`pwd`; \
+	if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
+	  include_option=--etags-include; \
+	  empty_fix=.; \
+	else \
+	  include_option=--include; \
+	  empty_fix=; \
+	fi; \
+	list='$(SUBDIRS)'; for subdir in $$list; do \
+	  if test "$$subdir" = .; then :; else \
+	    test ! -f $$subdir/TAGS || \
+	      tags="$$tags $$include_option=$$here/$$subdir/TAGS"; \
+	  fi; \
+	done; \
+	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+	      END { if (nonempty) { for (i in files) print i; }; }'`; \
+	if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	    $$tags $$unique; \
+	fi
+ctags: CTAGS
+CTAGS: ctags-recursive $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+		$(TAGS_FILES) $(LISP)
+	tags=; \
+	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+	      END { if (nonempty) { for (i in files) print i; }; }'`; \
+	test -z "$(CTAGS_ARGS)$$tags$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$tags $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && cd $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) $$here
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+	    fi; \
+	    cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+	  else \
+	    test -f $(distdir)/$$file \
+	    || cp -p $$d/$$file $(distdir)/$$file \
+	    || exit 1; \
+	  fi; \
+	done
+	list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+	  if test "$$subdir" = .; then :; else \
+	    test -d "$(distdir)/$$subdir" \
+	    || $(MKDIR_P) "$(distdir)/$$subdir" \
+	    || exit 1; \
+	    distdir=`$(am__cd) $(distdir) && pwd`; \
+	    top_distdir=`$(am__cd) $(top_distdir) && pwd`; \
+	    (cd $$subdir && \
+	      $(MAKE) $(AM_MAKEFLAGS) \
+	        top_distdir="$$top_distdir" \
+	        distdir="$$distdir/$$subdir" \
+		am__remove_distdir=: \
+		am__skip_length_check=: \
+	        distdir) \
+	      || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+	$(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS)
+check: check-recursive
+all-am: Makefile $(SCRIPTS)
+installdirs: installdirs-recursive
+installdirs-am:
+install: install-recursive
+install-exec: install-exec-recursive
+install-data: install-data-recursive
+uninstall: uninstall-recursive
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-recursive
+install-strip:
+	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	  install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	  `test -z '$(STRIP)' || \
+	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-recursive
+
+clean-am: clean-checkPROGRAMS clean-generic mostlyclean-am
+
+distclean: distclean-recursive
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-recursive
+
+dvi-am:
+
+html: html-recursive
+
+info: info-recursive
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-recursive
+
+install-exec-am:
+
+install-html: install-html-recursive
+
+install-info: install-info-recursive
+
+install-man:
+
+install-pdf: install-pdf-recursive
+
+install-ps: install-ps-recursive
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-recursive
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-recursive
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic
+
+pdf: pdf-recursive
+
+pdf-am:
+
+ps: ps-recursive
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) install-am \
+	install-strip
+
+.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
+	all all-am check check-am clean clean-checkPROGRAMS \
+	clean-generic ctags ctags-recursive distclean \
+	distclean-compile distclean-generic distclean-tags distdir dvi \
+	dvi-am html html-am info info-am install install-am \
+	install-data install-data-am install-dvi install-dvi-am \
+	install-exec install-exec-am install-html install-html-am \
+	install-info install-info-am install-man install-pdf \
+	install-pdf-am install-ps install-ps-am install-strip \
+	installcheck installcheck-am installdirs installdirs-am \
+	maintainer-clean maintainer-clean-generic mostlyclean \
+	mostlyclean-compile mostlyclean-generic pdf pdf-am ps ps-am \
+	tags tags-recursive uninstall uninstall-am
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/cachegrind/tests/Makefile.am b/cachegrind/tests/Makefile.am
new file mode 100644
index 0000000..497ee19
--- /dev/null
+++ b/cachegrind/tests/Makefile.am
@@ -0,0 +1,29 @@
+
+include $(top_srcdir)/Makefile.tool-tests.am
+
+SUBDIRS = .
+if VGCONF_ARCHS_INCLUDE_X86
+SUBDIRS += x86
+endif
+
+DIST_SUBDIRS = x86 .
+
+noinst_SCRIPTS = filter_stderr filter_cachesim_discards
+
+EXTRA_DIST = $(noinst_SCRIPTS) \
+	chdir.vgtest chdir.stderr.exp \
+	clreq.vgtest clreq.stderr.exp \
+	dlclose.vgtest dlclose.stderr.exp dlclose.stdout.exp \
+	notpower2.vgtest notpower2.stderr.exp \
+	wrap5.vgtest wrap5.stderr.exp wrap5.stdout.exp
+
+check_PROGRAMS = \
+	chdir clreq dlclose myprint.so
+
+AM_CFLAGS   += $(AM_FLAG_M3264_PRI)
+AM_CXXFLAGS += $(AM_FLAG_M3264_PRI)
+
+# C ones
+dlclose_LDADD		= -ldl
+myprint_so_LDFLAGS	= $(AM_FLAG_M3264_PRI) -shared -fPIC
+myprint_so_CFLAGS	= $(AM_CFLAGS) -fPIC
diff --git a/cachegrind/tests/Makefile.in b/cachegrind/tests/Makefile.in
new file mode 100644
index 0000000..597143e
--- /dev/null
+++ b/cachegrind/tests/Makefile.in
@@ -0,0 +1,726 @@
+# Makefile.in generated by automake 1.10.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008  Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# This file is used for tool tests, and also in perf/Makefile.am.
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
+	$(top_srcdir)/Makefile.flags.am \
+	$(top_srcdir)/Makefile.tool-tests.am
+@VGCONF_ARCHS_INCLUDE_X86_TRUE@am__append_1 = x86
+check_PROGRAMS = chdir$(EXEEXT) clreq$(EXEEXT) dlclose$(EXEEXT) \
+	myprint.so$(EXEEXT)
+subdir = cachegrind/tests
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+chdir_SOURCES = chdir.c
+chdir_OBJECTS = chdir.$(OBJEXT)
+chdir_LDADD = $(LDADD)
+clreq_SOURCES = clreq.c
+clreq_OBJECTS = clreq.$(OBJEXT)
+clreq_LDADD = $(LDADD)
+dlclose_SOURCES = dlclose.c
+dlclose_OBJECTS = dlclose.$(OBJEXT)
+dlclose_DEPENDENCIES =
+myprint_so_SOURCES = myprint.c
+myprint_so_OBJECTS = myprint_so-myprint.$(OBJEXT)
+myprint_so_LDADD = $(LDADD)
+myprint_so_LINK = $(CCLD) $(myprint_so_CFLAGS) $(CFLAGS) \
+	$(myprint_so_LDFLAGS) $(LDFLAGS) -o $@
+SCRIPTS = $(noinst_SCRIPTS)
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+SOURCES = chdir.c clreq.c dlclose.c myprint.c
+DIST_SOURCES = chdir.c clreq.c dlclose.c myprint.c
+RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
+	html-recursive info-recursive install-data-recursive \
+	install-dvi-recursive install-exec-recursive \
+	install-html-recursive install-info-recursive \
+	install-pdf-recursive install-ps-recursive install-recursive \
+	installcheck-recursive installdirs-recursive pdf-recursive \
+	ps-recursive uninstall-recursive
+RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive	\
+  distclean-recursive maintainer-clean-recursive
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BOOST_CFLAGS = @BOOST_CFLAGS@
+BOOST_LIBS = @BOOST_LIBS@
+CC = @CC@
+CCAS = @CCAS@
+CCASDEPMODE = @CCASDEPMODE@
+CCASFLAGS = @CCASFLAGS@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFAULT_SUPP = @DEFAULT_SUPP@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DIFF = @DIFF@
+DISTCHECK_CONFIGURE_FLAGS = @DISTCHECK_CONFIGURE_FLAGS@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FLAG_FNO_STACK_PROTECTOR = @FLAG_FNO_STACK_PROTECTOR@
+FLAG_M32 = @FLAG_M32@
+FLAG_M64 = @FLAG_M64@
+FLAG_MAIX32 = @FLAG_MAIX32@
+FLAG_MAIX64 = @FLAG_MAIX64@
+FLAG_MMMX = @FLAG_MMMX@
+FLAG_MSSE = @FLAG_MSSE@
+FLAG_UNLIMITED_INLINE_UNIT_GROWTH = @FLAG_UNLIMITED_INLINE_UNIT_GROWTH@
+FLAG_WDECL_AFTER_STMT = @FLAG_WDECL_AFTER_STMT@
+FLAG_W_EXTRA = @FLAG_W_EXTRA@
+FLAG_W_NO_FORMAT_ZERO_LENGTH = @FLAG_W_NO_FORMAT_ZERO_LENGTH@
+GDB = @GDB@
+GLIBC_VERSION = @GLIBC_VERSION@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+MPI_CC = @MPI_CC@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PERL = @PERL@
+PKG_CONFIG = @PKG_CONFIG@
+PREFERRED_STACK_BOUNDARY = @PREFERRED_STACK_BOUNDARY@
+QTCORE_CFLAGS = @QTCORE_CFLAGS@
+QTCORE_LIBS = @QTCORE_LIBS@
+RANLIB = @RANLIB@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VALT_LOAD_ADDRESS = @VALT_LOAD_ADDRESS@
+VERSION = @VERSION@
+VEX_DIR = @VEX_DIR@
+VGCONF_ARCH_PRI = @VGCONF_ARCH_PRI@
+VGCONF_OS = @VGCONF_OS@
+VGCONF_PLATFORM_PRI_CAPS = @VGCONF_PLATFORM_PRI_CAPS@
+VGCONF_PLATFORM_SEC_CAPS = @VGCONF_PLATFORM_SEC_CAPS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+
+# Baseline flags for all compilations.  Aim here is to maximise
+# performance and get whatever useful warnings we can out of gcc.
+AM_CFLAGS_BASE = -O2 -g -Wmissing-prototypes -Wall -Wshadow \
+                 -Wpointer-arith -Wstrict-prototypes -Wmissing-declarations \
+		 @FLAG_W_NO_FORMAT_ZERO_LENGTH@ \
+                 -fno-strict-aliasing
+
+
+# These flags are used for building the preload shared objects.
+# The aim is to give reasonable performance but also to have good
+# stack traces, since users often see stack traces extending 
+# into (and through) the preloads.
+AM_CFLAGS_PIC = -O -g -fpic -fno-omit-frame-pointer -fno-strict-aliasing
+
+# Flags for specific targets.
+#
+# Nb: the AM_CPPFLAGS_* values are suitable for building tools and auxprogs.
+# For building the core, coregrind/Makefile.am files add some extra things.
+#
+# Also: in newer versions of automake (1.10 onwards?) asm files ending with
+# '.S' are considered "pre-processed" (as opposed to those ending in '.s')
+# and so the CPPFLAGS are passed to the assembler.  But this is not true for
+# older automakes (e.g. 1.8.5, 1.9.6), sigh.  So we include
+# AM_CPPFLAGS_<PLATFORM> in each AM_CCASFLAGS_<PLATFORM> variable.  This
+# means some of the flags are duplicated on systems with newer versions of
+# automake, but this does not really matter and seems hard to avoid.
+AM_CPPFLAGS_COMMON = \
+		-I$(top_srcdir) \
+		-I$(top_srcdir)/include \
+		-I@VEX_DIR@/pub
+
+AM_FLAG_M3264_X86_LINUX = @FLAG_M32@
+AM_CPPFLAGS_X86_LINUX = $(AM_CPPFLAGS_COMMON) \
+			    -DVGA_x86=1 \
+			    -DVGO_linux=1 \
+			    -DVGP_x86_linux=1
+
+AM_CFLAGS_X86_LINUX = @FLAG_M32@ @PREFERRED_STACK_BOUNDARY@ \
+			 	$(AM_CFLAGS_BASE)
+
+AM_CCASFLAGS_X86_LINUX = $(AM_CPPFLAGS_X86_LINUX) @FLAG_M32@ -g
+AM_FLAG_M3264_AMD64_LINUX = @FLAG_M64@
+AM_CPPFLAGS_AMD64_LINUX = $(AM_CPPFLAGS_COMMON) \
+			    -DVGA_amd64=1 \
+			    -DVGO_linux=1 \
+			    -DVGP_amd64_linux=1
+
+AM_CFLAGS_AMD64_LINUX = @FLAG_M64@ -fomit-frame-pointer \
+				@PREFERRED_STACK_BOUNDARY@ $(AM_CFLAGS_BASE)
+
+AM_CCASFLAGS_AMD64_LINUX = $(AM_CPPFLAGS_AMD64_LINUX) @FLAG_M64@ -g
+AM_FLAG_M3264_PPC32_LINUX = @FLAG_M32@
+AM_CPPFLAGS_PPC32_LINUX = $(AM_CPPFLAGS_COMMON) \
+		-DVGA_ppc32=1 \
+		-DVGO_linux=1 \
+		-DVGP_ppc32_linux=1
+
+AM_CFLAGS_PPC32_LINUX = @FLAG_M32@ $(AM_CFLAGS_BASE)
+AM_CCASFLAGS_PPC32_LINUX = $(AM_CPPFLAGS_PPC32_LINUX) @FLAG_M32@ -g
+AM_FLAG_M3264_PPC64_LINUX = @FLAG_M64@
+AM_CPPFLAGS_PPC64_LINUX = $(AM_CPPFLAGS_COMMON) \
+			    -DVGA_ppc64=1 \
+			    -DVGO_linux=1 \
+			    -DVGP_ppc64_linux=1
+
+AM_CFLAGS_PPC64_LINUX = @FLAG_M64@ $(AM_CFLAGS_BASE)
+AM_CCASFLAGS_PPC64_LINUX = $(AM_CPPFLAGS_PPC64_LINUX) @FLAG_M64@ -g
+AM_FLAG_M3264_PPC32_AIX5 = @FLAG_MAIX32@
+AM_CPPFLAGS_PPC32_AIX5 = $(AM_CPPFLAGS_COMMON) \
+			    -DVGA_ppc32=1 \
+			    -DVGO_aix5=1 \
+			    -DVGP_ppc32_aix5=1
+
+AM_CFLAGS_PPC32_AIX5 = @FLAG_MAIX32@ -mcpu=powerpc $(AM_CFLAGS_BASE)
+AM_CCASFLAGS_PPC32_AIX5 = $(AM_CPPFLAGS_PPC32_AIX5) \
+			    @FLAG_MAIX32@ -mcpu=powerpc -g
+
+AM_FLAG_M3264_PPC64_AIX5 = @FLAG_MAIX64@
+AM_CPPFLAGS_PPC64_AIX5 = $(AM_CPPFLAGS_COMMON) \
+			    -DVGA_ppc64=1 \
+			    -DVGO_aix5=1 \
+			    -DVGP_ppc64_aix5=1
+
+AM_CFLAGS_PPC64_AIX5 = @FLAG_MAIX64@ -mcpu=powerpc64 $(AM_CFLAGS_BASE)
+AM_CCASFLAGS_PPC64_AIX5 = $(AM_CPPFLAGS_PPC64_AIX5) \
+			    @FLAG_MAIX64@ -mcpu=powerpc64 -g
+
+
+# Flags for the primary target.  These must be used to build the
+# regtests and performance tests.  In fact, these must be used to
+# build anything which is built only once on a dual-arch build.
+#
+AM_FLAG_M3264_PRI = $(AM_FLAG_M3264_@VGCONF_PLATFORM_PRI_CAPS@)
+AM_CPPFLAGS_PRI = $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
+AM_CFLAGS_PRI = $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
+AM_CCASFLAGS_PRI = $(AM_CCASFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
+@VGCONF_HAVE_PLATFORM_SEC_CAPS_FALSE@AM_FLAG_M3264_SEC = 
+@VGCONF_HAVE_PLATFORM_SEC_CAPS_TRUE@AM_FLAG_M3264_SEC = $(AM_FLAG_M3264_@VGCONF_PLATFORM_SEC_CAPS@)
+
+# Baseline link flags for making dynamic shared objects.
+#
+PRELOAD_LDFLAGS_COMMON_LINUX = -nodefaultlibs -shared -Wl,-z,interpose,-z,initfirst
+PRELOAD_LDFLAGS_COMMON_AIX5 = -nodefaultlibs -shared -Wl,-G -Wl,-bnogc
+PRELOAD_LDFLAGS_X86_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M32@
+PRELOAD_LDFLAGS_AMD64_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M64@
+PRELOAD_LDFLAGS_PPC32_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M32@
+PRELOAD_LDFLAGS_PPC64_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M64@
+PRELOAD_LDFLAGS_PPC32_AIX5 = $(PRELOAD_LDFLAGS_COMMON_AIX5)  @FLAG_MAIX32@
+PRELOAD_LDFLAGS_PPC64_AIX5 = $(PRELOAD_LDFLAGS_COMMON_AIX5)  @FLAG_MAIX64@
+AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/include \
+		-I$(top_srcdir)/coregrind -I$(top_builddir)/include \
+		-I@VEX_DIR@/pub \
+		-DVGA_$(VGCONF_ARCH_PRI)=1 \
+		-DVGO_$(VGCONF_OS)=1 \
+		-DVGP_$(VGCONF_ARCH_PRI)_$(VGCONF_OS)=1
+
+# Nb: Tools need to augment these flags with an arch-selection option, such
+# as $(AM_FLAG_M3264_PRI).
+AM_CFLAGS = -Winline -Wall -Wshadow -g $(AM_FLAG_M3264_PRI)
+AM_CXXFLAGS = -Winline -Wall -Wshadow -g $(AM_FLAG_M3264_PRI)
+# Include AM_CPPFLAGS in AM_CCASFLAGS to allow for older versions of
+# automake;  see comments in Makefile.flags.am for more detail.
+AM_CCASFLAGS = $(AM_CPPFLAGS)
+SUBDIRS = . $(am__append_1)
+DIST_SUBDIRS = x86 .
+noinst_SCRIPTS = filter_stderr filter_cachesim_discards
+EXTRA_DIST = $(noinst_SCRIPTS) \
+	chdir.vgtest chdir.stderr.exp \
+	clreq.vgtest clreq.stderr.exp \
+	dlclose.vgtest dlclose.stderr.exp dlclose.stdout.exp \
+	notpower2.vgtest notpower2.stderr.exp \
+	wrap5.vgtest wrap5.stderr.exp wrap5.stdout.exp
+
+
+# C ones
+dlclose_LDADD = -ldl
+myprint_so_LDFLAGS = $(AM_FLAG_M3264_PRI) -shared -fPIC
+myprint_so_CFLAGS = $(AM_CFLAGS) -fPIC
+all: all-recursive
+
+.SUFFIXES:
+.SUFFIXES: .c .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/Makefile.tool-tests.am $(top_srcdir)/Makefile.flags.am $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
+		&& exit 0; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign  cachegrind/tests/Makefile'; \
+	cd $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign  cachegrind/tests/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+clean-checkPROGRAMS:
+	-test -z "$(check_PROGRAMS)" || rm -f $(check_PROGRAMS)
+chdir$(EXEEXT): $(chdir_OBJECTS) $(chdir_DEPENDENCIES) 
+	@rm -f chdir$(EXEEXT)
+	$(LINK) $(chdir_OBJECTS) $(chdir_LDADD) $(LIBS)
+clreq$(EXEEXT): $(clreq_OBJECTS) $(clreq_DEPENDENCIES) 
+	@rm -f clreq$(EXEEXT)
+	$(LINK) $(clreq_OBJECTS) $(clreq_LDADD) $(LIBS)
+dlclose$(EXEEXT): $(dlclose_OBJECTS) $(dlclose_DEPENDENCIES) 
+	@rm -f dlclose$(EXEEXT)
+	$(LINK) $(dlclose_OBJECTS) $(dlclose_LDADD) $(LIBS)
+myprint.so$(EXEEXT): $(myprint_so_OBJECTS) $(myprint_so_DEPENDENCIES) 
+	@rm -f myprint.so$(EXEEXT)
+	$(myprint_so_LINK) $(myprint_so_OBJECTS) $(myprint_so_LDADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chdir.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/clreq.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dlclose.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/myprint_so-myprint.Po@am__quote@
+
+.c.o:
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(COMPILE) -c $<
+
+.c.obj:
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+myprint_so-myprint.o: myprint.c
+@am__fastdepCC_TRUE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(myprint_so_CFLAGS) $(CFLAGS) -MT myprint_so-myprint.o -MD -MP -MF $(DEPDIR)/myprint_so-myprint.Tpo -c -o myprint_so-myprint.o `test -f 'myprint.c' || echo '$(srcdir)/'`myprint.c
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/myprint_so-myprint.Tpo $(DEPDIR)/myprint_so-myprint.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='myprint.c' object='myprint_so-myprint.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(myprint_so_CFLAGS) $(CFLAGS) -c -o myprint_so-myprint.o `test -f 'myprint.c' || echo '$(srcdir)/'`myprint.c
+
+myprint_so-myprint.obj: myprint.c
+@am__fastdepCC_TRUE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(myprint_so_CFLAGS) $(CFLAGS) -MT myprint_so-myprint.obj -MD -MP -MF $(DEPDIR)/myprint_so-myprint.Tpo -c -o myprint_so-myprint.obj `if test -f 'myprint.c'; then $(CYGPATH_W) 'myprint.c'; else $(CYGPATH_W) '$(srcdir)/myprint.c'; fi`
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/myprint_so-myprint.Tpo $(DEPDIR)/myprint_so-myprint.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='myprint.c' object='myprint_so-myprint.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(myprint_so_CFLAGS) $(CFLAGS) -c -o myprint_so-myprint.obj `if test -f 'myprint.c'; then $(CYGPATH_W) 'myprint.c'; else $(CYGPATH_W) '$(srcdir)/myprint.c'; fi`
+
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run `make' without going through this Makefile.
+# To change the values of `make' variables: instead of editing Makefiles,
+# (1) if the variable is set in `config.status', edit `config.status'
+#     (which will cause the Makefiles to be regenerated when you run `make');
+# (2) otherwise, pass the desired values on the `make' command line.
+$(RECURSIVE_TARGETS):
+	@failcom='exit 1'; \
+	for f in x $$MAKEFLAGS; do \
+	  case $$f in \
+	    *=* | --[!k]*);; \
+	    *k*) failcom='fail=yes';; \
+	  esac; \
+	done; \
+	dot_seen=no; \
+	target=`echo $@ | sed s/-recursive//`; \
+	list='$(SUBDIRS)'; for subdir in $$list; do \
+	  echo "Making $$target in $$subdir"; \
+	  if test "$$subdir" = "."; then \
+	    dot_seen=yes; \
+	    local_target="$$target-am"; \
+	  else \
+	    local_target="$$target"; \
+	  fi; \
+	  (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+	  || eval $$failcom; \
+	done; \
+	if test "$$dot_seen" = "no"; then \
+	  $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+	fi; test -z "$$fail"
+
+$(RECURSIVE_CLEAN_TARGETS):
+	@failcom='exit 1'; \
+	for f in x $$MAKEFLAGS; do \
+	  case $$f in \
+	    *=* | --[!k]*);; \
+	    *k*) failcom='fail=yes';; \
+	  esac; \
+	done; \
+	dot_seen=no; \
+	case "$@" in \
+	  distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
+	  *) list='$(SUBDIRS)' ;; \
+	esac; \
+	rev=''; for subdir in $$list; do \
+	  if test "$$subdir" = "."; then :; else \
+	    rev="$$subdir $$rev"; \
+	  fi; \
+	done; \
+	rev="$$rev ."; \
+	target=`echo $@ | sed s/-recursive//`; \
+	for subdir in $$rev; do \
+	  echo "Making $$target in $$subdir"; \
+	  if test "$$subdir" = "."; then \
+	    local_target="$$target-am"; \
+	  else \
+	    local_target="$$target"; \
+	  fi; \
+	  (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+	  || eval $$failcom; \
+	done && test -z "$$fail"
+tags-recursive:
+	list='$(SUBDIRS)'; for subdir in $$list; do \
+	  test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
+	done
+ctags-recursive:
+	list='$(SUBDIRS)'; for subdir in $$list; do \
+	  test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
+	done
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '{ files[$$0] = 1; nonemtpy = 1; } \
+	      END { if (nonempty) { for (i in files) print i; }; }'`; \
+	mkid -fID $$unique
+tags: TAGS
+
+TAGS: tags-recursive $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+		$(TAGS_FILES) $(LISP)
+	tags=; \
+	here=`pwd`; \
+	if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
+	  include_option=--etags-include; \
+	  empty_fix=.; \
+	else \
+	  include_option=--include; \
+	  empty_fix=; \
+	fi; \
+	list='$(SUBDIRS)'; for subdir in $$list; do \
+	  if test "$$subdir" = .; then :; else \
+	    test ! -f $$subdir/TAGS || \
+	      tags="$$tags $$include_option=$$here/$$subdir/TAGS"; \
+	  fi; \
+	done; \
+	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+	      END { if (nonempty) { for (i in files) print i; }; }'`; \
+	if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	    $$tags $$unique; \
+	fi
+ctags: CTAGS
+CTAGS: ctags-recursive $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+		$(TAGS_FILES) $(LISP)
+	tags=; \
+	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+	      END { if (nonempty) { for (i in files) print i; }; }'`; \
+	test -z "$(CTAGS_ARGS)$$tags$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$tags $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && cd $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) $$here
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+	    fi; \
+	    cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+	  else \
+	    test -f $(distdir)/$$file \
+	    || cp -p $$d/$$file $(distdir)/$$file \
+	    || exit 1; \
+	  fi; \
+	done
+	list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+	  if test "$$subdir" = .; then :; else \
+	    test -d "$(distdir)/$$subdir" \
+	    || $(MKDIR_P) "$(distdir)/$$subdir" \
+	    || exit 1; \
+	    distdir=`$(am__cd) $(distdir) && pwd`; \
+	    top_distdir=`$(am__cd) $(top_distdir) && pwd`; \
+	    (cd $$subdir && \
+	      $(MAKE) $(AM_MAKEFLAGS) \
+	        top_distdir="$$top_distdir" \
+	        distdir="$$distdir/$$subdir" \
+		am__remove_distdir=: \
+		am__skip_length_check=: \
+	        distdir) \
+	      || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+	$(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS)
+check: check-recursive
+all-am: Makefile $(SCRIPTS)
+installdirs: installdirs-recursive
+installdirs-am:
+install: install-recursive
+install-exec: install-exec-recursive
+install-data: install-data-recursive
+uninstall: uninstall-recursive
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-recursive
+install-strip:
+	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	  install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	  `test -z '$(STRIP)' || \
+	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-recursive
+
+clean-am: clean-checkPROGRAMS clean-generic mostlyclean-am
+
+distclean: distclean-recursive
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-recursive
+
+dvi-am:
+
+html: html-recursive
+
+info: info-recursive
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-recursive
+
+install-exec-am:
+
+install-html: install-html-recursive
+
+install-info: install-info-recursive
+
+install-man:
+
+install-pdf: install-pdf-recursive
+
+install-ps: install-ps-recursive
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-recursive
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-recursive
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic
+
+pdf: pdf-recursive
+
+pdf-am:
+
+ps: ps-recursive
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) install-am \
+	install-strip
+
+.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
+	all all-am check check-am clean clean-checkPROGRAMS \
+	clean-generic ctags ctags-recursive distclean \
+	distclean-compile distclean-generic distclean-tags distdir dvi \
+	dvi-am html html-am info info-am install install-am \
+	install-data install-data-am install-dvi install-dvi-am \
+	install-exec install-exec-am install-html install-html-am \
+	install-info install-info-am install-man install-pdf \
+	install-pdf-am install-ps install-ps-am install-strip \
+	installcheck installcheck-am installdirs installdirs-am \
+	maintainer-clean maintainer-clean-generic mostlyclean \
+	mostlyclean-compile mostlyclean-generic pdf pdf-am ps ps-am \
+	tags tags-recursive uninstall uninstall-am
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/cachegrind/tests/chdir.c b/cachegrind/tests/chdir.c
new file mode 100644
index 0000000..9b681cf
--- /dev/null
+++ b/cachegrind/tests/chdir.c
@@ -0,0 +1,10 @@
+#include <unistd.h>
+
+// Before the bug was fixed, if a program changed working directory, things
+// would break and the cachegrind.out.<pid> file wouldn't get written.
+int main(void)
+{
+   chdir("..");
+
+   return 0;
+}
diff --git a/cachegrind/tests/chdir.stderr.exp b/cachegrind/tests/chdir.stderr.exp
new file mode 100644
index 0000000..8eaf654
--- /dev/null
+++ b/cachegrind/tests/chdir.stderr.exp
@@ -0,0 +1,17 @@
+
+
+I   refs:
+I1  misses:
+L2i misses:
+I1  miss rate:
+L2i miss rate:
+
+D   refs:
+D1  misses:
+L2d misses:
+D1  miss rate:
+L2d miss rate:
+
+L2 refs:
+L2 misses:
+L2 miss rate:
diff --git a/cachegrind/tests/chdir.vgtest b/cachegrind/tests/chdir.vgtest
new file mode 100644
index 0000000..041c5cf
--- /dev/null
+++ b/cachegrind/tests/chdir.vgtest
@@ -0,0 +1,2 @@
+prog: chdir
+cleanup: rm cachegrind.out.*
diff --git a/cachegrind/tests/clreq.c b/cachegrind/tests/clreq.c
new file mode 100644
index 0000000..0f2bc2e
--- /dev/null
+++ b/cachegrind/tests/clreq.c
@@ -0,0 +1,11 @@
+
+// Prior to 3.0.1, Cachegrind was failing if run on a program that uses
+// client requests.  It was fixed in 3.0.1, but then reintroduced
+// afterwards (reported as bug #116057).  So here we test it.
+
+#include "../../include/valgrind.h"
+
+int main(void)
+{
+   return RUNNING_ON_VALGRIND;
+}
diff --git a/cachegrind/tests/clreq.stderr.exp b/cachegrind/tests/clreq.stderr.exp
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/cachegrind/tests/clreq.stderr.exp
diff --git a/cachegrind/tests/clreq.vgtest b/cachegrind/tests/clreq.vgtest
new file mode 100644
index 0000000..c0cf5fa
--- /dev/null
+++ b/cachegrind/tests/clreq.vgtest
@@ -0,0 +1,3 @@
+prog: clreq
+vgopts: -q
+cleanup: rm cachegrind.out.*
diff --git a/cachegrind/tests/dlclose.c b/cachegrind/tests/dlclose.c
new file mode 100644
index 0000000..9fee030
--- /dev/null
+++ b/cachegrind/tests/dlclose.c
@@ -0,0 +1,38 @@
+/* This exercises the code that was causing this bug:
+  
+     valgrind: vg_cachesim.c:389 (get_BBCC): Assertion `((Bool)0) == remove' 
+     failed.
+
+   in Cachegrind 1.0.0 and 1.0.1, that was caused by unloading symbols before
+   invalidating translations.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <dlfcn.h>
+
+int main(int argc, char **argv) {
+   void *handle;
+   void (*myprint)(void);
+   char *error;
+
+   handle = dlopen ("./myprint.so", RTLD_LAZY);
+   if (!handle) {
+       fputs (dlerror(), stderr);
+       exit(1);
+   }
+
+   myprint = dlsym(handle, "myprint");
+   if ((error = dlerror()) != NULL)  {
+       fprintf (stderr, "%s\n", error);
+       exit(1);
+   }
+
+   (*myprint)();
+
+   /* Assertion failure was happening here */
+   dlclose(handle);
+
+   return 0;
+}
+
diff --git a/cachegrind/tests/dlclose.stderr.exp b/cachegrind/tests/dlclose.stderr.exp
new file mode 100644
index 0000000..8eaf654
--- /dev/null
+++ b/cachegrind/tests/dlclose.stderr.exp
@@ -0,0 +1,17 @@
+
+
+I   refs:
+I1  misses:
+L2i misses:
+I1  miss rate:
+L2i miss rate:
+
+D   refs:
+D1  misses:
+L2d misses:
+D1  miss rate:
+L2d miss rate:
+
+L2 refs:
+L2 misses:
+L2 miss rate:
diff --git a/cachegrind/tests/dlclose.stdout.exp b/cachegrind/tests/dlclose.stdout.exp
new file mode 100644
index 0000000..890082f
--- /dev/null
+++ b/cachegrind/tests/dlclose.stdout.exp
@@ -0,0 +1 @@
+This is myprint!
diff --git a/cachegrind/tests/dlclose.vgtest b/cachegrind/tests/dlclose.vgtest
new file mode 100644
index 0000000..d61200f
--- /dev/null
+++ b/cachegrind/tests/dlclose.vgtest
@@ -0,0 +1,3 @@
+prog: dlclose
+stderr_filter: filter_cachesim_discards
+cleanup: rm cachegrind.out.*
diff --git a/cachegrind/tests/filter_cachesim_discards b/cachegrind/tests/filter_cachesim_discards
new file mode 100755
index 0000000..d184c4f
--- /dev/null
+++ b/cachegrind/tests/filter_cachesim_discards
@@ -0,0 +1,8 @@
+#! /bin/sh
+
+dir=`dirname $0`
+
+$dir/filter_stderr | 
+
+# Anonymise paths like "/local/foo/bar/tests/baz/quux" (note "tests" is there)
+sed "s/\/.*\/tests\//\/...\/tests\//"
diff --git a/cachegrind/tests/filter_stderr b/cachegrind/tests/filter_stderr
new file mode 100755
index 0000000..a5bc1f4
--- /dev/null
+++ b/cachegrind/tests/filter_stderr
@@ -0,0 +1,20 @@
+#! /bin/sh
+
+dir=`dirname $0`
+
+$dir/../../tests/filter_stderr_basic                |
+
+# Remove "Cachegrind, ..." line and the following copyright line.
+sed "/^Cachegrind, a cache and branch-prediction profiler./ , /./ d" |
+
+# Remove numbers from I/D/L2 "refs:" lines
+sed "s/\(\(I\|D\|L2\) *refs:\)[ 0-9,()+rdw]*$/\1/"  |
+
+# Remove numbers from I1/D1/L2/L2i/L2d "misses:" and "miss rates:" lines
+sed "s/\(\(I1\|D1\|L2\|L2i\|L2d\) *\(misses\|miss rate\):\)[ 0-9,()+rdw%\.]*$/\1/" |
+
+# Remove CPUID warnings lines for P4s and other machines
+sed "/warning: Pentium 4 with 12 KB micro-op instruction trace cache/d" |
+sed "/Simulating a 16 KB I-cache with 32 B lines/d"   |
+sed "/warning: L3 cache detected but ignored/d" |
+sed "/Warning: Cannot auto-detect cache config on PPC.., using one or more defaults/d"
diff --git a/cachegrind/tests/myprint.c b/cachegrind/tests/myprint.c
new file mode 100644
index 0000000..e22ae87
--- /dev/null
+++ b/cachegrind/tests/myprint.c
@@ -0,0 +1,6 @@
+#include <stdio.h>
+
+void myprint(void)
+{
+   puts("This is myprint!");
+}
diff --git a/cachegrind/tests/notpower2.stderr.exp b/cachegrind/tests/notpower2.stderr.exp
new file mode 100644
index 0000000..8eaf654
--- /dev/null
+++ b/cachegrind/tests/notpower2.stderr.exp
@@ -0,0 +1,17 @@
+
+
+I   refs:
+I1  misses:
+L2i misses:
+I1  miss rate:
+L2i miss rate:
+
+D   refs:
+D1  misses:
+L2d misses:
+D1  miss rate:
+L2d miss rate:
+
+L2 refs:
+L2 misses:
+L2 miss rate:
diff --git a/cachegrind/tests/notpower2.vgtest b/cachegrind/tests/notpower2.vgtest
new file mode 100644
index 0000000..132cfe5
--- /dev/null
+++ b/cachegrind/tests/notpower2.vgtest
@@ -0,0 +1,3 @@
+prog: ../../tests/true
+vgopts: --I1=32768,8,64 --D1=24576,6,64 --L2=3145728,12,64
+cleanup: rm cachegrind.out.*
diff --git a/cachegrind/tests/wrap5.stderr.exp b/cachegrind/tests/wrap5.stderr.exp
new file mode 100644
index 0000000..8eaf654
--- /dev/null
+++ b/cachegrind/tests/wrap5.stderr.exp
@@ -0,0 +1,17 @@
+
+
+I   refs:
+I1  misses:
+L2i misses:
+I1  miss rate:
+L2i miss rate:
+
+D   refs:
+D1  misses:
+L2d misses:
+D1  miss rate:
+L2d miss rate:
+
+L2 refs:
+L2 misses:
+L2 miss rate:
diff --git a/cachegrind/tests/wrap5.stdout.exp b/cachegrind/tests/wrap5.stdout.exp
new file mode 100644
index 0000000..1924a84
--- /dev/null
+++ b/cachegrind/tests/wrap5.stdout.exp
@@ -0,0 +1,37 @@
+computing fact1(7)
+in wrapper1-pre:  fact(7)
+in wrapper2-pre:  fact(6)
+in wrapper1-pre:  fact(5)
+in wrapper2-pre:  fact(4)
+in wrapper1-pre:  fact(3)
+in wrapper2-pre:  fact(2)
+in wrapper1-pre:  fact(1)
+in wrapper2-pre:  fact(0)
+in wrapper2-post: fact(0) = 1
+in wrapper1-post: fact(1) = 1
+in wrapper2-post: fact(2) = 2
+in wrapper1-post: fact(3) = 6
+in wrapper2-pre:  fact(2)
+in wrapper1-pre:  fact(1)
+in wrapper2-pre:  fact(0)
+in wrapper2-post: fact(0) = 1
+in wrapper1-post: fact(1) = 1
+in wrapper2-post: fact(2) = 2
+in wrapper2-post: fact(4) = 32
+in wrapper1-post: fact(5) = 160
+in wrapper2-pre:  fact(2)
+in wrapper1-pre:  fact(1)
+in wrapper2-pre:  fact(0)
+in wrapper2-post: fact(0) = 1
+in wrapper1-post: fact(1) = 1
+in wrapper2-post: fact(2) = 2
+in wrapper2-post: fact(6) = 972
+in wrapper1-post: fact(7) = 6804
+in wrapper2-pre:  fact(2)
+in wrapper1-pre:  fact(1)
+in wrapper2-pre:  fact(0)
+in wrapper2-post: fact(0) = 1
+in wrapper1-post: fact(1) = 1
+in wrapper2-post: fact(2) = 2
+fact1(7) = 6806
+allocated 51 Lards
diff --git a/cachegrind/tests/wrap5.vgtest b/cachegrind/tests/wrap5.vgtest
new file mode 100644
index 0000000..51a172f
--- /dev/null
+++ b/cachegrind/tests/wrap5.vgtest
@@ -0,0 +1,2 @@
+prog: ../../memcheck/tests/wrap5
+cleanup: rm cachegrind.out.*
diff --git a/cachegrind/tests/x86/.deps/fpu-28-108.Po b/cachegrind/tests/x86/.deps/fpu-28-108.Po
new file mode 100644
index 0000000..9ce06a8
--- /dev/null
+++ b/cachegrind/tests/x86/.deps/fpu-28-108.Po
@@ -0,0 +1 @@
+# dummy
diff --git a/cachegrind/tests/x86/.svn/dir-prop-base b/cachegrind/tests/x86/.svn/dir-prop-base
new file mode 100644
index 0000000..85cc8f1
--- /dev/null
+++ b/cachegrind/tests/x86/.svn/dir-prop-base
@@ -0,0 +1,15 @@
+K 10
+svn:ignore
+V 124
+cachegrind.out
+cachegrind.out.*
+.deps
+fpu-28-108
+Makefile
+Makefile.in
+*.stderr.diff
+*.stderr.out
+*.stdout.diff
+*.stdout.out
+
+END
diff --git a/cachegrind/tests/x86/.svn/entries b/cachegrind/tests/x86/.svn/entries
new file mode 100644
index 0000000..9ca0ded
--- /dev/null
+++ b/cachegrind/tests/x86/.svn/entries
@@ -0,0 +1,105 @@
+8
+
+dir
+9703
+svn://svn.valgrind.org/valgrind/trunk/cachegrind/tests/x86
+svn://svn.valgrind.org/valgrind
+
+
+
+2009-04-24T20:17:07.643509Z
+9611
+njn
+has-props
+
+svn:special svn:externals svn:needs-lock
+
+
+
+
+
+
+
+
+
+
+
+a5019735-40e9-0310-863c-91ae7b9d1cf9
+
+insn_sse2.stdout.exp
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+61463228868c638f8a4574c4279e8edd
+2005-03-10T23:23:45.107685Z
+3264
+sewardj
+
+fpu-28-108.vgtest
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+12310cf3e5db55ffd69ed2716240bb51
+2004-11-18T12:48:17.000000Z
+3041
+nethercote
+has-props
+
+fpu-28-108.stderr.exp
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+2f78182fb95570a369cc24cb627a182c
+2004-10-19T17:00:59.000000Z
+2804
+nethercote
+has-props
+
+fpu-28-108.S
+file
+
+
+
+
+2009-04-30T16:44:00.000000Z
+fdcd92732a780b55e2fc00156e2549a6
+2009-04-24T04:57:07.028318Z
+9601
+njn
+has-props
+
+filter_stderr
+file
+
+
+
+
+2009-03-13T17:30:10.000000Z
+1b5eaf301ff2feef25a57b6c63cd537b
+2004-10-19T17:00:59.000000Z
+2804
+nethercote
+has-props
+
+Makefile.am
+file
+
+
+
+
+2009-04-30T16:44:00.000000Z
+a5f02ccf0dc65dd417f55e0bdd526661
+2009-04-24T20:17:07.643509Z
+9611
+njn
+has-props
+
diff --git a/cachegrind/tests/x86/.svn/format b/cachegrind/tests/x86/.svn/format
new file mode 100644
index 0000000..45a4fb7
--- /dev/null
+++ b/cachegrind/tests/x86/.svn/format
@@ -0,0 +1 @@
+8
diff --git a/cachegrind/tests/x86/.svn/prop-base/Makefile.am.svn-base b/cachegrind/tests/x86/.svn/prop-base/Makefile.am.svn-base
new file mode 100644
index 0000000..df54a06
--- /dev/null
+++ b/cachegrind/tests/x86/.svn/prop-base/Makefile.am.svn-base
@@ -0,0 +1,9 @@
+K 13
+svn:eol-style
+V 6
+native
+K 12
+svn:keywords
+V 23
+author date id revision
+END
diff --git a/cachegrind/tests/x86/.svn/prop-base/filter_stderr.svn-base b/cachegrind/tests/x86/.svn/prop-base/filter_stderr.svn-base
new file mode 100644
index 0000000..fe7d6da
--- /dev/null
+++ b/cachegrind/tests/x86/.svn/prop-base/filter_stderr.svn-base
@@ -0,0 +1,13 @@
+K 13
+svn:eol-style
+V 6
+native
+K 14
+svn:executable
+V 1
+*
+K 12
+svn:keywords
+V 23
+author date id revision
+END
diff --git a/cachegrind/tests/x86/.svn/prop-base/fpu-28-108.S.svn-base b/cachegrind/tests/x86/.svn/prop-base/fpu-28-108.S.svn-base
new file mode 100644
index 0000000..df54a06
--- /dev/null
+++ b/cachegrind/tests/x86/.svn/prop-base/fpu-28-108.S.svn-base
@@ -0,0 +1,9 @@
+K 13
+svn:eol-style
+V 6
+native
+K 12
+svn:keywords
+V 23
+author date id revision
+END
diff --git a/cachegrind/tests/x86/.svn/prop-base/fpu-28-108.stderr.exp.svn-base b/cachegrind/tests/x86/.svn/prop-base/fpu-28-108.stderr.exp.svn-base
new file mode 100644
index 0000000..df54a06
--- /dev/null
+++ b/cachegrind/tests/x86/.svn/prop-base/fpu-28-108.stderr.exp.svn-base
@@ -0,0 +1,9 @@
+K 13
+svn:eol-style
+V 6
+native
+K 12
+svn:keywords
+V 23
+author date id revision
+END
diff --git a/cachegrind/tests/x86/.svn/prop-base/fpu-28-108.vgtest.svn-base b/cachegrind/tests/x86/.svn/prop-base/fpu-28-108.vgtest.svn-base
new file mode 100644
index 0000000..df54a06
--- /dev/null
+++ b/cachegrind/tests/x86/.svn/prop-base/fpu-28-108.vgtest.svn-base
@@ -0,0 +1,9 @@
+K 13
+svn:eol-style
+V 6
+native
+K 12
+svn:keywords
+V 23
+author date id revision
+END
diff --git a/cachegrind/tests/x86/.svn/text-base/Makefile.am.svn-base b/cachegrind/tests/x86/.svn/text-base/Makefile.am.svn-base
new file mode 100644
index 0000000..12d96e9
--- /dev/null
+++ b/cachegrind/tests/x86/.svn/text-base/Makefile.am.svn-base
@@ -0,0 +1,17 @@
+
+include $(top_srcdir)/Makefile.tool-tests.am
+
+noinst_SCRIPTS = filter_stderr
+
+EXTRA_DIST = $(noinst_SCRIPTS) \
+	fpu-28-108.vgtest fpu-28-108.stderr.exp
+
+check_PROGRAMS = \
+	fpu-28-108
+
+
+AM_CFLAGS    += @FLAG_M32@ $(FLAG_MMMX) $(FLAG_MSSE)
+AM_CXXFLAGS  += @FLAG_M32@ $(FLAG_MMMX) $(FLAG_MSSE)
+AM_CCASFLAGS += @FLAG_M32@
+
+fpu_28_108_SOURCES	= fpu-28-108.S
diff --git a/cachegrind/tests/x86/.svn/text-base/filter_stderr.svn-base b/cachegrind/tests/x86/.svn/text-base/filter_stderr.svn-base
new file mode 100644
index 0000000..ed65c9f
--- /dev/null
+++ b/cachegrind/tests/x86/.svn/text-base/filter_stderr.svn-base
@@ -0,0 +1,4 @@
+#! /bin/sh
+
+# Use the generic Cachegrind stderr filter
+../filter_stderr
diff --git a/cachegrind/tests/x86/.svn/text-base/fpu-28-108.S.svn-base b/cachegrind/tests/x86/.svn/text-base/fpu-28-108.S.svn-base
new file mode 100644
index 0000000..54b2da6
--- /dev/null
+++ b/cachegrind/tests/x86/.svn/text-base/fpu-28-108.S.svn-base
@@ -0,0 +1,26 @@
+/* Test 28 and 108 byte loads and stores.  (Just make sure program
+   runs without any assertion failures from V.) */
+
+/* Useful listing: 
+	gcc -o tests/fpu_28_108 tests/fpu_28_108.S -Wa,-a */
+
+#include "tests/asm.h"
+
+.data
+fooble:
+        .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+        .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+        .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+bar:
+        
+.text
+.globl VG_SYM_ASM(main)
+VG_SYM_ASM(main):
+        fstsw   fooble
+        fsave   fooble
+        frstor  fooble
+        fstenv  fooble
+        fldenv  fooble
+        movl    $0, %eax
+        ret
+
diff --git a/cachegrind/tests/x86/.svn/text-base/fpu-28-108.stderr.exp.svn-base b/cachegrind/tests/x86/.svn/text-base/fpu-28-108.stderr.exp.svn-base
new file mode 100644
index 0000000..8eaf654
--- /dev/null
+++ b/cachegrind/tests/x86/.svn/text-base/fpu-28-108.stderr.exp.svn-base
@@ -0,0 +1,17 @@
+
+
+I   refs:
+I1  misses:
+L2i misses:
+I1  miss rate:
+L2i miss rate:
+
+D   refs:
+D1  misses:
+L2d misses:
+D1  miss rate:
+L2d miss rate:
+
+L2 refs:
+L2 misses:
+L2 miss rate:
diff --git a/cachegrind/tests/x86/.svn/text-base/fpu-28-108.vgtest.svn-base b/cachegrind/tests/x86/.svn/text-base/fpu-28-108.vgtest.svn-base
new file mode 100644
index 0000000..b2e138e
--- /dev/null
+++ b/cachegrind/tests/x86/.svn/text-base/fpu-28-108.vgtest.svn-base
@@ -0,0 +1,2 @@
+prog: fpu-28-108
+cleanup: rm cachegrind.out.*
diff --git a/cachegrind/tests/x86/.svn/text-base/insn_sse2.stdout.exp.svn-base b/cachegrind/tests/x86/.svn/text-base/insn_sse2.stdout.exp.svn-base
new file mode 100644
index 0000000..344df89
--- /dev/null
+++ b/cachegrind/tests/x86/.svn/text-base/insn_sse2.stdout.exp.svn-base
@@ -0,0 +1,302 @@
+addpd_1 ... ok
+addpd_2 ... ok
+addsd_1 ... ok
+addsd_2 ... ok
+andpd_1 ... ok
+andpd_2 ... ok
+andnpd_1 ... ok
+andnpd_2 ... ok
+cmpeqpd_1 ... ok
+cmpeqpd_2 ... ok
+cmpltpd_1 ... ok
+cmpltpd_2 ... ok
+cmplepd_1 ... ok
+cmplepd_2 ... ok
+cmpneqpd_1 ... ok
+cmpneqpd_2 ... ok
+cmpnltpd_1 ... ok
+cmpnltpd_2 ... ok
+cmpnlepd_1 ... ok
+cmpnlepd_2 ... ok
+cmpeqsd_1 ... ok
+cmpeqsd_2 ... ok
+cmpltsd_1 ... ok
+cmpltsd_2 ... ok
+cmplesd_1 ... ok
+cmplesd_2 ... ok
+cmpneqsd_1 ... ok
+cmpneqsd_2 ... ok
+cmpnltsd_1 ... ok
+cmpnltsd_2 ... ok
+cmpnlesd_1 ... ok
+cmpnlesd_2 ... ok
+comisd_1 ... ok
+comisd_2 ... ok
+comisd_3 ... ok
+comisd_4 ... ok
+comisd_5 ... ok
+comisd_6 ... ok
+cvtdq2pd_1 ... ok
+cvtdq2pd_2 ... ok
+cvtdq2ps_1 ... ok
+cvtdq2ps_2 ... ok
+cvtpd2dq_1 ... ok
+cvtpd2dq_2 ... ok
+cvtpd2pi_1 ... ok
+cvtpd2pi_2 ... ok
+cvtpd2ps_1 ... ok
+cvtpd2ps_2 ... ok
+cvtpi2pd_1 ... ok
+cvtpi2pd_2 ... ok
+cvtps2dq_1 ... ok
+cvtps2dq_2 ... ok
+cvtps2pd_1 ... ok
+cvtps2pd_2 ... ok
+cvtsd2si_1 ... ok
+cvtsd2si_2 ... ok
+cvtsd2ss_1 ... ok
+cvtsd2ss_2 ... ok
+cvtsi2sd_1 ... ok
+cvtsi2sd_2 ... ok
+cvtss2sd_1 ... ok
+cvtss2sd_2 ... ok
+cvttpd2pi_1 ... ok
+cvttpd2pi_2 ... ok
+cvttpd2dq_1 ... ok
+cvttpd2dq_2 ... ok
+cvttps2dq_1 ... ok
+cvttps2dq_2 ... ok
+cvttsd2si_1 ... ok
+cvttsd2si_2 ... ok
+divpd_1 ... ok
+divpd_2 ... ok
+divsd_1 ... ok
+divsd_2 ... ok
+lfence_1 ... ok
+maxpd_1 ... ok
+maxpd_2 ... ok
+maxsd_1 ... ok
+maxsd_2 ... ok
+mfence_1 ... ok
+minpd_1 ... ok
+minpd_2 ... ok
+minsd_1 ... ok
+minsd_2 ... ok
+movapd_1 ... ok
+movapd_2 ... ok
+movd_1 ... ok
+movd_2 ... ok
+movd_3 ... ok
+movd_4 ... ok
+movdqa_1 ... ok
+movdqa_2 ... ok
+movdqa_3 ... ok
+movdqu_1 ... ok
+movdqu_2 ... ok
+movdqu_3 ... ok
+movdq2q_1 ... ok
+movhpd_1 ... ok
+movhpd_2 ... ok
+movlpd_1 ... ok
+movlpd_2 ... ok
+movmskpd_1 ... ok
+movntdq_1 ... ok
+movnti_1 ... ok
+movntpd_1 ... ok
+movq2dq_1 ... ok
+movsd_1 ... ok
+movsd_2 ... ok
+movsd_3 ... ok
+movupd_1 ... ok
+movupd_2 ... ok
+mulpd_1 ... ok
+mulpd_2 ... ok
+mulsd_1 ... ok
+mulsd_2 ... ok
+orpd_1 ... ok
+orpd_2 ... ok
+packssdw_1 ... ok
+packssdw_2 ... ok
+packsswb_1 ... ok
+packsswb_2 ... ok
+packuswb_1 ... ok
+packuswb_2 ... ok
+paddb_1 ... ok
+paddb_2 ... ok
+paddd_1 ... ok
+paddd_2 ... ok
+paddq_1 ... ok
+paddq_2 ... ok
+paddq_3 ... ok
+paddq_4 ... ok
+paddsb_1 ... ok
+paddsb_2 ... ok
+paddsw_1 ... ok
+paddsw_2 ... ok
+paddusb_1 ... ok
+paddusb_2 ... ok
+paddusw_1 ... ok
+paddusw_2 ... ok
+paddw_1 ... ok
+paddw_2 ... ok
+pand_1 ... ok
+pand_2 ... ok
+pandn_1 ... ok
+pandn_2 ... ok
+pavgb_1 ... ok
+pavgb_2 ... ok
+pavgw_1 ... ok
+pavgw_2 ... ok
+pcmpeqb_1 ... ok
+pcmpeqb_2 ... ok
+pcmpeqd_1 ... ok
+pcmpeqd_2 ... ok
+pcmpeqw_1 ... ok
+pcmpeqw_2 ... ok
+pcmpgtb_1 ... ok
+pcmpgtb_2 ... ok
+pcmpgtd_1 ... ok
+pcmpgtd_2 ... ok
+pcmpgtw_1 ... ok
+pcmpgtw_2 ... ok
+pextrw_1 ... ok
+pextrw_2 ... ok
+pextrw_3 ... ok
+pextrw_4 ... ok
+pextrw_5 ... ok
+pextrw_6 ... ok
+pextrw_7 ... ok
+pextrw_8 ... ok
+pinsrw_1 ... ok
+pinsrw_2 ... ok
+pinsrw_3 ... ok
+pinsrw_4 ... ok
+pinsrw_5 ... ok
+pinsrw_6 ... ok
+pinsrw_7 ... ok
+pinsrw_8 ... ok
+pinsrw_9 ... ok
+pinsrw_10 ... ok
+pinsrw_11 ... ok
+pinsrw_12 ... ok
+pinsrw_13 ... ok
+pinsrw_14 ... ok
+pinsrw_15 ... ok
+pinsrw_16 ... ok
+pmaddwd_1 ... ok
+pmaddwd_2 ... ok
+pmaxsw_1 ... ok
+pmaxsw_2 ... ok
+pmaxub_1 ... ok
+pmaxub_2 ... ok
+pminsw_1 ... ok
+pminsw_2 ... ok
+pminub_1 ... ok
+pminub_2 ... ok
+pmovmskb_1 ... ok
+pmulhuw_1 ... ok
+pmulhuw_2 ... ok
+pmulhw_1 ... ok
+pmulhw_2 ... ok
+pmullw_1 ... ok
+pmullw_2 ... ok
+pmuludq_1 ... ok
+pmuludq_2 ... ok
+pmuludq_3 ... ok
+pmuludq_4 ... ok
+por_1 ... ok
+por_2 ... ok
+psadbw_1 ... ok
+psadbw_2 ... ok
+pshufd_1 ... ok
+pshufd_2 ... ok
+pshufhw_1 ... ok
+pshufhw_2 ... ok
+pshuflw_1 ... ok
+pshuflw_2 ... ok
+pslld_1 ... ok
+pslld_2 ... ok
+pslld_3 ... ok
+pslldq_1 ... ok
+pslldq_2 ... ok
+psllq_1 ... ok
+psllq_2 ... ok
+psllq_3 ... ok
+psllw_1 ... ok
+psllw_2 ... ok
+psllw_3 ... ok
+psrad_1 ... ok
+psrad_2 ... ok
+psrad_3 ... ok
+psraw_1 ... ok
+psraw_2 ... ok
+psraw_3 ... ok
+psrld_1 ... ok
+psrld_2 ... ok
+psrld_3 ... ok
+psrldq_1 ... ok
+psrldq_2 ... ok
+psrlq_1 ... ok
+psrlq_2 ... ok
+psrlq_3 ... ok
+psrlw_1 ... ok
+psrlw_2 ... ok
+psrlw_3 ... ok
+psubb_1 ... ok
+psubb_2 ... ok
+psubd_1 ... ok
+psubd_2 ... ok
+psubq_1 ... ok
+psubq_2 ... ok
+psubq_3 ... ok
+psubq_4 ... ok
+psubsb_1 ... ok
+psubsb_2 ... ok
+psubsw_1 ... ok
+psubsw_2 ... ok
+psubusb_1 ... ok
+psubusb_2 ... ok
+psubusw_1 ... ok
+psubusw_2 ... ok
+psubw_1 ... ok
+psubw_2 ... ok
+punpckhbw_1 ... ok
+punpckhbw_2 ... ok
+punpckhdq_1 ... ok
+punpckhdq_2 ... ok
+punpckhqdq_1 ... ok
+punpckhqdq_2 ... ok
+punpckhwd_1 ... ok
+punpckhwd_2 ... ok
+punpcklbw_1 ... ok
+punpcklbw_2 ... ok
+punpckldq_1 ... ok
+punpckldq_2 ... ok
+punpcklqdq_1 ... ok
+punpcklqdq_2 ... ok
+punpcklwd_1 ... ok
+punpcklwd_2 ... ok
+pxor_1 ... ok
+pxor_2 ... ok
+shufpd_1 ... ok
+shufpd_2 ... ok
+sqrtpd_1 ... ok
+sqrtpd_2 ... ok
+sqrtsd_1 ... ok
+sqrtsd_2 ... ok
+subpd_1 ... ok
+subpd_2 ... ok
+subsd_1 ... ok
+subsd_2 ... ok
+ucomisd_1 ... ok
+ucomisd_2 ... ok
+ucomisd_3 ... ok
+ucomisd_4 ... ok
+ucomisd_5 ... ok
+ucomisd_6 ... ok
+unpckhpd_1 ... ok
+unpckhpd_2 ... ok
+unpcklpd_1 ... ok
+unpcklpd_2 ... ok
+xorpd_1 ... ok
+xorpd_2 ... ok
diff --git a/cachegrind/tests/x86/Makefile b/cachegrind/tests/x86/Makefile
new file mode 100644
index 0000000..178d042
--- /dev/null
+++ b/cachegrind/tests/x86/Makefile
@@ -0,0 +1,569 @@
+# Makefile.in generated by automake 1.10.1 from Makefile.am.
+# cachegrind/tests/x86/Makefile.  Generated from Makefile.in by configure.
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008  Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+
+
+# This file is used for tool tests, and also in perf/Makefile.am.
+
+
+pkgdatadir = $(datadir)/valgrind
+pkglibdir = $(libdir)/valgrind
+pkgincludedir = $(includedir)/valgrind
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = x86_64-unknown-linux-gnu
+host_triplet = x86_64-unknown-linux-gnu
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
+	$(top_srcdir)/Makefile.flags.am \
+	$(top_srcdir)/Makefile.tool-tests.am
+check_PROGRAMS = fpu-28-108$(EXEEXT)
+subdir = cachegrind/tests/x86
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+am_fpu_28_108_OBJECTS = fpu-28-108.$(OBJEXT)
+fpu_28_108_OBJECTS = $(am_fpu_28_108_OBJECTS)
+fpu_28_108_LDADD = $(LDADD)
+SCRIPTS = $(noinst_SCRIPTS)
+DEFAULT_INCLUDES = -I. -I$(top_builddir)
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+CPPASCOMPILE = $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS)
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+SOURCES = $(fpu_28_108_SOURCES)
+DIST_SOURCES = $(fpu_28_108_SOURCES)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = ${SHELL} /home/steph/compile/valgrind/missing --run aclocal-1.10
+AMTAR = ${SHELL} /home/steph/compile/valgrind/missing --run tar
+AR = /usr/bin/ar
+AUTOCONF = ${SHELL} /home/steph/compile/valgrind/missing --run autoconf
+AUTOHEADER = ${SHELL} /home/steph/compile/valgrind/missing --run autoheader
+AUTOMAKE = ${SHELL} /home/steph/compile/valgrind/missing --run automake-1.10
+AWK = gawk
+BOOST_CFLAGS = 
+BOOST_LIBS = -lboost_thread-mt -m64
+CC = gcc
+CCAS = gcc
+CCASDEPMODE = depmode=gcc3
+CCASFLAGS = -Wno-long-long
+CCDEPMODE = depmode=gcc3
+CFLAGS = -Wno-long-long -Wno-pointer-sign -Wdeclaration-after-statement -fno-stack-protector
+CPP = gcc -E
+CPPFLAGS = 
+CXX = g++
+CXXDEPMODE = depmode=gcc3
+CXXFLAGS = -g -O2
+CYGPATH_W = echo
+DEFAULT_SUPP = exp-ptrcheck.supp xfree-3.supp xfree-4.supp glibc-2.X-drd.supp glibc-2.34567-NPTL-helgrind.supp glibc-2.X.supp 
+DEFS = -DHAVE_CONFIG_H
+DEPDIR = .deps
+DIFF = diff -u
+DISTCHECK_CONFIGURE_FLAGS = --with-vex=$(top_srcdir)/VEX
+ECHO_C = 
+ECHO_N = -n
+ECHO_T = 
+EGREP = /bin/grep -E
+EXEEXT = 
+FLAG_FNO_STACK_PROTECTOR = -fno-stack-protector
+FLAG_M32 = -m32
+FLAG_M64 = -m64
+FLAG_MAIX32 = 
+FLAG_MAIX64 = 
+FLAG_MMMX = -mmmx
+FLAG_MSSE = -msse
+FLAG_UNLIMITED_INLINE_UNIT_GROWTH = --param inline-unit-growth=900
+FLAG_WDECL_AFTER_STMT = -Wdeclaration-after-statement
+FLAG_W_EXTRA = -Wextra
+FLAG_W_NO_FORMAT_ZERO_LENGTH = -Wno-format-zero-length
+GDB = /usr/bin/gdb
+GLIBC_VERSION = 2.8
+GREP = /bin/grep
+INSTALL = /usr/bin/install -c
+INSTALL_DATA = ${INSTALL} -m 644
+INSTALL_PROGRAM = ${INSTALL}
+INSTALL_SCRIPT = ${INSTALL}
+INSTALL_STRIP_PROGRAM = $(install_sh) -c -s
+LDFLAGS = 
+LIBOBJS = 
+LIBS = 
+LN_S = ln -s
+LTLIBOBJS = 
+MAINT = #
+MAKEINFO = ${SHELL} /home/steph/compile/valgrind/missing --run makeinfo
+MKDIR_P = /bin/mkdir -p
+MPI_CC = mpicc
+OBJEXT = o
+PACKAGE = valgrind
+PACKAGE_BUGREPORT = valgrind-users@lists.sourceforge.net
+PACKAGE_NAME = Valgrind
+PACKAGE_STRING = Valgrind 3.5.0.SVN
+PACKAGE_TARNAME = valgrind
+PACKAGE_VERSION = 3.5.0.SVN
+PATH_SEPARATOR = :
+PERL = /usr/bin/perl
+PKG_CONFIG = /usr/bin/pkg-config
+PREFERRED_STACK_BOUNDARY = 
+QTCORE_CFLAGS = -DQT_SHARED -I/usr/include/QtCore  
+QTCORE_LIBS = -lQtCore  
+RANLIB = ranlib
+SET_MAKE = 
+SHELL = /bin/sh
+STRIP = 
+VALT_LOAD_ADDRESS = 0x38000000
+VERSION = 3.5.0.SVN
+VEX_DIR = $(top_srcdir)/VEX
+VGCONF_ARCH_PRI = amd64
+VGCONF_OS = linux
+VGCONF_PLATFORM_PRI_CAPS = AMD64_LINUX
+VGCONF_PLATFORM_SEC_CAPS = 
+abs_builddir = /home/steph/compile/valgrind/cachegrind/tests/x86
+abs_srcdir = /home/steph/compile/valgrind/cachegrind/tests/x86
+abs_top_builddir = /home/steph/compile/valgrind
+abs_top_srcdir = /home/steph/compile/valgrind
+ac_ct_CC = gcc
+ac_ct_CXX = g++
+am__include = include
+am__leading_dot = .
+am__quote = 
+am__tar = ${AMTAR} chof - "$$tardir"
+am__untar = ${AMTAR} xf -
+bindir = ${exec_prefix}/bin
+build = x86_64-unknown-linux-gnu
+build_alias = 
+build_cpu = x86_64
+build_os = linux-gnu
+build_vendor = unknown
+builddir = .
+datadir = ${datarootdir}
+datarootdir = ${prefix}/share
+docdir = ${datarootdir}/doc/${PACKAGE_TARNAME}
+dvidir = ${docdir}
+exec_prefix = ${prefix}
+host = x86_64-unknown-linux-gnu
+host_alias = 
+host_cpu = x86_64
+host_os = linux-gnu
+host_vendor = unknown
+htmldir = ${docdir}
+includedir = ${prefix}/include
+infodir = ${datarootdir}/info
+install_sh = $(SHELL) /home/steph/compile/valgrind/install-sh
+libdir = ${exec_prefix}/lib
+libexecdir = ${exec_prefix}/libexec
+localedir = ${datarootdir}/locale
+localstatedir = ${prefix}/var
+mandir = ${datarootdir}/man
+mkdir_p = /bin/mkdir -p
+oldincludedir = /usr/include
+pdfdir = ${docdir}
+prefix = /usr/local
+program_transform_name = s,x,x,
+psdir = ${docdir}
+sbindir = ${exec_prefix}/sbin
+sharedstatedir = ${prefix}/com
+srcdir = .
+sysconfdir = ${prefix}/etc
+target_alias = 
+top_builddir = ../../..
+top_srcdir = ../../..
+
+# Baseline flags for all compilations.  Aim here is to maximise
+# performance and get whatever useful warnings we can out of gcc.
+AM_CFLAGS_BASE = -O2 -g -Wmissing-prototypes -Wall -Wshadow \
+                 -Wpointer-arith -Wstrict-prototypes -Wmissing-declarations \
+		 -Wno-format-zero-length \
+                 -fno-strict-aliasing
+
+
+# These flags are used for building the preload shared objects.
+# The aim is to give reasonable performance but also to have good
+# stack traces, since users often see stack traces extending 
+# into (and through) the preloads.
+AM_CFLAGS_PIC = -O -g -fpic -fno-omit-frame-pointer -fno-strict-aliasing
+
+# Flags for specific targets.
+#
+# Nb: the AM_CPPFLAGS_* values are suitable for building tools and auxprogs.
+# For building the core, coregrind/Makefile.am files add some extra things.
+#
+# Also: in newer versions of automake (1.10 onwards?) asm files ending with
+# '.S' are considered "pre-processed" (as opposed to those ending in '.s')
+# and so the CPPFLAGS are passed to the assembler.  But this is not true for
+# older automakes (e.g. 1.8.5, 1.9.6), sigh.  So we include
+# AM_CPPFLAGS_<PLATFORM> in each AM_CCASFLAGS_<PLATFORM> variable.  This
+# means some of the flags are duplicated on systems with newer versions of
+# automake, but this does not really matter and seems hard to avoid.
+AM_CPPFLAGS_COMMON = \
+		-I$(top_srcdir) \
+		-I$(top_srcdir)/include \
+		-I$(top_srcdir)/VEX/pub
+
+AM_FLAG_M3264_X86_LINUX = -m32
+AM_CPPFLAGS_X86_LINUX = $(AM_CPPFLAGS_COMMON) \
+			    -DVGA_x86=1 \
+			    -DVGO_linux=1 \
+			    -DVGP_x86_linux=1
+
+AM_CFLAGS_X86_LINUX = -m32  \
+			 	$(AM_CFLAGS_BASE)
+
+AM_CCASFLAGS_X86_LINUX = $(AM_CPPFLAGS_X86_LINUX) -m32 -g
+AM_FLAG_M3264_AMD64_LINUX = -m64
+AM_CPPFLAGS_AMD64_LINUX = $(AM_CPPFLAGS_COMMON) \
+			    -DVGA_amd64=1 \
+			    -DVGO_linux=1 \
+			    -DVGP_amd64_linux=1
+
+AM_CFLAGS_AMD64_LINUX = -m64 -fomit-frame-pointer \
+				 $(AM_CFLAGS_BASE)
+
+AM_CCASFLAGS_AMD64_LINUX = $(AM_CPPFLAGS_AMD64_LINUX) -m64 -g
+AM_FLAG_M3264_PPC32_LINUX = -m32
+AM_CPPFLAGS_PPC32_LINUX = $(AM_CPPFLAGS_COMMON) \
+		-DVGA_ppc32=1 \
+		-DVGO_linux=1 \
+		-DVGP_ppc32_linux=1
+
+AM_CFLAGS_PPC32_LINUX = -m32 $(AM_CFLAGS_BASE)
+AM_CCASFLAGS_PPC32_LINUX = $(AM_CPPFLAGS_PPC32_LINUX) -m32 -g
+AM_FLAG_M3264_PPC64_LINUX = -m64
+AM_CPPFLAGS_PPC64_LINUX = $(AM_CPPFLAGS_COMMON) \
+			    -DVGA_ppc64=1 \
+			    -DVGO_linux=1 \
+			    -DVGP_ppc64_linux=1
+
+AM_CFLAGS_PPC64_LINUX = -m64 $(AM_CFLAGS_BASE)
+AM_CCASFLAGS_PPC64_LINUX = $(AM_CPPFLAGS_PPC64_LINUX) -m64 -g
+AM_FLAG_M3264_PPC32_AIX5 = 
+AM_CPPFLAGS_PPC32_AIX5 = $(AM_CPPFLAGS_COMMON) \
+			    -DVGA_ppc32=1 \
+			    -DVGO_aix5=1 \
+			    -DVGP_ppc32_aix5=1
+
+AM_CFLAGS_PPC32_AIX5 =  -mcpu=powerpc $(AM_CFLAGS_BASE)
+AM_CCASFLAGS_PPC32_AIX5 = $(AM_CPPFLAGS_PPC32_AIX5) \
+			     -mcpu=powerpc -g
+
+AM_FLAG_M3264_PPC64_AIX5 = 
+AM_CPPFLAGS_PPC64_AIX5 = $(AM_CPPFLAGS_COMMON) \
+			    -DVGA_ppc64=1 \
+			    -DVGO_aix5=1 \
+			    -DVGP_ppc64_aix5=1
+
+AM_CFLAGS_PPC64_AIX5 =  -mcpu=powerpc64 $(AM_CFLAGS_BASE)
+AM_CCASFLAGS_PPC64_AIX5 = $(AM_CPPFLAGS_PPC64_AIX5) \
+			     -mcpu=powerpc64 -g
+
+
+# Flags for the primary target.  These must be used to build the
+# regtests and performance tests.  In fact, these must be used to
+# build anything which is built only once on a dual-arch build.
+#
+AM_FLAG_M3264_PRI = $(AM_FLAG_M3264_AMD64_LINUX)
+AM_CPPFLAGS_PRI = $(AM_CPPFLAGS_AMD64_LINUX)
+AM_CFLAGS_PRI = $(AM_CFLAGS_AMD64_LINUX)
+AM_CCASFLAGS_PRI = $(AM_CCASFLAGS_AMD64_LINUX)
+AM_FLAG_M3264_SEC = 
+#AM_FLAG_M3264_SEC = $(AM_FLAG_M3264_)
+
+# Baseline link flags for making dynamic shared objects.
+#
+PRELOAD_LDFLAGS_COMMON_LINUX = -nodefaultlibs -shared -Wl,-z,interpose,-z,initfirst
+PRELOAD_LDFLAGS_COMMON_AIX5 = -nodefaultlibs -shared -Wl,-G -Wl,-bnogc
+PRELOAD_LDFLAGS_X86_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) -m32
+PRELOAD_LDFLAGS_AMD64_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) -m64
+PRELOAD_LDFLAGS_PPC32_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) -m32
+PRELOAD_LDFLAGS_PPC64_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) -m64
+PRELOAD_LDFLAGS_PPC32_AIX5 = $(PRELOAD_LDFLAGS_COMMON_AIX5)  
+PRELOAD_LDFLAGS_PPC64_AIX5 = $(PRELOAD_LDFLAGS_COMMON_AIX5)  
+AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/include \
+		-I$(top_srcdir)/coregrind -I$(top_builddir)/include \
+		-I$(top_srcdir)/VEX/pub \
+		-DVGA_$(VGCONF_ARCH_PRI)=1 \
+		-DVGO_$(VGCONF_OS)=1 \
+		-DVGP_$(VGCONF_ARCH_PRI)_$(VGCONF_OS)=1
+
+# Nb: Tools need to augment these flags with an arch-selection option, such
+# as $(AM_FLAG_M3264_PRI).
+AM_CFLAGS = -Winline -Wall -Wshadow -g -m32 $(FLAG_MMMX) \
+	$(FLAG_MSSE)
+AM_CXXFLAGS = -Winline -Wall -Wshadow -g -m32 $(FLAG_MMMX) \
+	$(FLAG_MSSE)
+# Include AM_CPPFLAGS in AM_CCASFLAGS to allow for older versions of
+# automake;  see comments in Makefile.flags.am for more detail.
+AM_CCASFLAGS = $(AM_CPPFLAGS) -m32
+noinst_SCRIPTS = filter_stderr
+EXTRA_DIST = $(noinst_SCRIPTS) \
+	fpu-28-108.vgtest fpu-28-108.stderr.exp
+
+fpu_28_108_SOURCES = fpu-28-108.S
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .S .o .obj
+$(srcdir)/Makefile.in: # $(srcdir)/Makefile.am $(top_srcdir)/Makefile.tool-tests.am $(top_srcdir)/Makefile.flags.am $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
+		&& exit 0; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign  cachegrind/tests/x86/Makefile'; \
+	cd $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign  cachegrind/tests/x86/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: # $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): # $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+clean-checkPROGRAMS:
+	-test -z "$(check_PROGRAMS)" || rm -f $(check_PROGRAMS)
+fpu-28-108$(EXEEXT): $(fpu_28_108_OBJECTS) $(fpu_28_108_DEPENDENCIES) 
+	@rm -f fpu-28-108$(EXEEXT)
+	$(LINK) $(fpu_28_108_OBJECTS) $(fpu_28_108_LDADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+include ./$(DEPDIR)/fpu-28-108.Po
+
+.S.o:
+	$(CPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+	mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+#	source='$<' object='$@' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) \
+#	$(CPPASCOMPILE) -c -o $@ $<
+
+.S.obj:
+	$(CPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+	mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+#	source='$<' object='$@' libtool=no \
+#	DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) \
+#	$(CPPASCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '{ files[$$0] = 1; nonemtpy = 1; } \
+	      END { if (nonempty) { for (i in files) print i; }; }'`; \
+	mkid -fID $$unique
+tags: TAGS
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+		$(TAGS_FILES) $(LISP)
+	tags=; \
+	here=`pwd`; \
+	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+	      END { if (nonempty) { for (i in files) print i; }; }'`; \
+	if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	    $$tags $$unique; \
+	fi
+ctags: CTAGS
+CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+		$(TAGS_FILES) $(LISP)
+	tags=; \
+	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+	      END { if (nonempty) { for (i in files) print i; }; }'`; \
+	test -z "$(CTAGS_ARGS)$$tags$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$tags $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && cd $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) $$here
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+	    fi; \
+	    cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+	  else \
+	    test -f $(distdir)/$$file \
+	    || cp -p $$d/$$file $(distdir)/$$file \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+	$(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS)
+check: check-am
+all-am: Makefile $(SCRIPTS)
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	  install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	  `test -z '$(STRIP)' || \
+	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-checkPROGRAMS clean-generic mostlyclean-am
+
+distclean: distclean-am
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-info: install-info-am
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-ps: install-ps-am
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean \
+	clean-checkPROGRAMS clean-generic ctags distclean \
+	distclean-compile distclean-generic distclean-tags distdir dvi \
+	dvi-am html html-am info info-am install install-am \
+	install-data install-data-am install-dvi install-dvi-am \
+	install-exec install-exec-am install-html install-html-am \
+	install-info install-info-am install-man install-pdf \
+	install-pdf-am install-ps install-ps-am install-strip \
+	installcheck installcheck-am installdirs maintainer-clean \
+	maintainer-clean-generic mostlyclean mostlyclean-compile \
+	mostlyclean-generic pdf pdf-am ps ps-am tags uninstall \
+	uninstall-am
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/cachegrind/tests/x86/Makefile.am b/cachegrind/tests/x86/Makefile.am
new file mode 100644
index 0000000..12d96e9
--- /dev/null
+++ b/cachegrind/tests/x86/Makefile.am
@@ -0,0 +1,17 @@
+
+include $(top_srcdir)/Makefile.tool-tests.am
+
+noinst_SCRIPTS = filter_stderr
+
+EXTRA_DIST = $(noinst_SCRIPTS) \
+	fpu-28-108.vgtest fpu-28-108.stderr.exp
+
+check_PROGRAMS = \
+	fpu-28-108
+
+
+AM_CFLAGS    += @FLAG_M32@ $(FLAG_MMMX) $(FLAG_MSSE)
+AM_CXXFLAGS  += @FLAG_M32@ $(FLAG_MMMX) $(FLAG_MSSE)
+AM_CCASFLAGS += @FLAG_M32@
+
+fpu_28_108_SOURCES	= fpu-28-108.S
diff --git a/cachegrind/tests/x86/Makefile.in b/cachegrind/tests/x86/Makefile.in
new file mode 100644
index 0000000..d390ddb
--- /dev/null
+++ b/cachegrind/tests/x86/Makefile.in
@@ -0,0 +1,569 @@
+# Makefile.in generated by automake 1.10.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008  Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# This file is used for tool tests, and also in perf/Makefile.am.
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
+	$(top_srcdir)/Makefile.flags.am \
+	$(top_srcdir)/Makefile.tool-tests.am
+check_PROGRAMS = fpu-28-108$(EXEEXT)
+subdir = cachegrind/tests/x86
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+am_fpu_28_108_OBJECTS = fpu-28-108.$(OBJEXT)
+fpu_28_108_OBJECTS = $(am_fpu_28_108_OBJECTS)
+fpu_28_108_LDADD = $(LDADD)
+SCRIPTS = $(noinst_SCRIPTS)
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+CPPASCOMPILE = $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS)
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+SOURCES = $(fpu_28_108_SOURCES)
+DIST_SOURCES = $(fpu_28_108_SOURCES)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BOOST_CFLAGS = @BOOST_CFLAGS@
+BOOST_LIBS = @BOOST_LIBS@
+CC = @CC@
+CCAS = @CCAS@
+CCASDEPMODE = @CCASDEPMODE@
+CCASFLAGS = @CCASFLAGS@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFAULT_SUPP = @DEFAULT_SUPP@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DIFF = @DIFF@
+DISTCHECK_CONFIGURE_FLAGS = @DISTCHECK_CONFIGURE_FLAGS@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FLAG_FNO_STACK_PROTECTOR = @FLAG_FNO_STACK_PROTECTOR@
+FLAG_M32 = @FLAG_M32@
+FLAG_M64 = @FLAG_M64@
+FLAG_MAIX32 = @FLAG_MAIX32@
+FLAG_MAIX64 = @FLAG_MAIX64@
+FLAG_MMMX = @FLAG_MMMX@
+FLAG_MSSE = @FLAG_MSSE@
+FLAG_UNLIMITED_INLINE_UNIT_GROWTH = @FLAG_UNLIMITED_INLINE_UNIT_GROWTH@
+FLAG_WDECL_AFTER_STMT = @FLAG_WDECL_AFTER_STMT@
+FLAG_W_EXTRA = @FLAG_W_EXTRA@
+FLAG_W_NO_FORMAT_ZERO_LENGTH = @FLAG_W_NO_FORMAT_ZERO_LENGTH@
+GDB = @GDB@
+GLIBC_VERSION = @GLIBC_VERSION@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+MPI_CC = @MPI_CC@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PERL = @PERL@
+PKG_CONFIG = @PKG_CONFIG@
+PREFERRED_STACK_BOUNDARY = @PREFERRED_STACK_BOUNDARY@
+QTCORE_CFLAGS = @QTCORE_CFLAGS@
+QTCORE_LIBS = @QTCORE_LIBS@
+RANLIB = @RANLIB@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VALT_LOAD_ADDRESS = @VALT_LOAD_ADDRESS@
+VERSION = @VERSION@
+VEX_DIR = @VEX_DIR@
+VGCONF_ARCH_PRI = @VGCONF_ARCH_PRI@
+VGCONF_OS = @VGCONF_OS@
+VGCONF_PLATFORM_PRI_CAPS = @VGCONF_PLATFORM_PRI_CAPS@
+VGCONF_PLATFORM_SEC_CAPS = @VGCONF_PLATFORM_SEC_CAPS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+
+# Baseline flags for all compilations.  Aim here is to maximise
+# performance and get whatever useful warnings we can out of gcc.
+AM_CFLAGS_BASE = -O2 -g -Wmissing-prototypes -Wall -Wshadow \
+                 -Wpointer-arith -Wstrict-prototypes -Wmissing-declarations \
+		 @FLAG_W_NO_FORMAT_ZERO_LENGTH@ \
+                 -fno-strict-aliasing
+
+
+# These flags are used for building the preload shared objects.
+# The aim is to give reasonable performance but also to have good
+# stack traces, since users often see stack traces extending 
+# into (and through) the preloads.
+AM_CFLAGS_PIC = -O -g -fpic -fno-omit-frame-pointer -fno-strict-aliasing
+
+# Flags for specific targets.
+#
+# Nb: the AM_CPPFLAGS_* values are suitable for building tools and auxprogs.
+# For building the core, coregrind/Makefile.am files add some extra things.
+#
+# Also: in newer versions of automake (1.10 onwards?) asm files ending with
+# '.S' are considered "pre-processed" (as opposed to those ending in '.s')
+# and so the CPPFLAGS are passed to the assembler.  But this is not true for
+# older automakes (e.g. 1.8.5, 1.9.6), sigh.  So we include
+# AM_CPPFLAGS_<PLATFORM> in each AM_CCASFLAGS_<PLATFORM> variable.  This
+# means some of the flags are duplicated on systems with newer versions of
+# automake, but this does not really matter and seems hard to avoid.
+AM_CPPFLAGS_COMMON = \
+		-I$(top_srcdir) \
+		-I$(top_srcdir)/include \
+		-I@VEX_DIR@/pub
+
+AM_FLAG_M3264_X86_LINUX = @FLAG_M32@
+AM_CPPFLAGS_X86_LINUX = $(AM_CPPFLAGS_COMMON) \
+			    -DVGA_x86=1 \
+			    -DVGO_linux=1 \
+			    -DVGP_x86_linux=1
+
+AM_CFLAGS_X86_LINUX = @FLAG_M32@ @PREFERRED_STACK_BOUNDARY@ \
+			 	$(AM_CFLAGS_BASE)
+
+AM_CCASFLAGS_X86_LINUX = $(AM_CPPFLAGS_X86_LINUX) @FLAG_M32@ -g
+AM_FLAG_M3264_AMD64_LINUX = @FLAG_M64@
+AM_CPPFLAGS_AMD64_LINUX = $(AM_CPPFLAGS_COMMON) \
+			    -DVGA_amd64=1 \
+			    -DVGO_linux=1 \
+			    -DVGP_amd64_linux=1
+
+AM_CFLAGS_AMD64_LINUX = @FLAG_M64@ -fomit-frame-pointer \
+				@PREFERRED_STACK_BOUNDARY@ $(AM_CFLAGS_BASE)
+
+AM_CCASFLAGS_AMD64_LINUX = $(AM_CPPFLAGS_AMD64_LINUX) @FLAG_M64@ -g
+AM_FLAG_M3264_PPC32_LINUX = @FLAG_M32@
+AM_CPPFLAGS_PPC32_LINUX = $(AM_CPPFLAGS_COMMON) \
+		-DVGA_ppc32=1 \
+		-DVGO_linux=1 \
+		-DVGP_ppc32_linux=1
+
+AM_CFLAGS_PPC32_LINUX = @FLAG_M32@ $(AM_CFLAGS_BASE)
+AM_CCASFLAGS_PPC32_LINUX = $(AM_CPPFLAGS_PPC32_LINUX) @FLAG_M32@ -g
+AM_FLAG_M3264_PPC64_LINUX = @FLAG_M64@
+AM_CPPFLAGS_PPC64_LINUX = $(AM_CPPFLAGS_COMMON) \
+			    -DVGA_ppc64=1 \
+			    -DVGO_linux=1 \
+			    -DVGP_ppc64_linux=1
+
+AM_CFLAGS_PPC64_LINUX = @FLAG_M64@ $(AM_CFLAGS_BASE)
+AM_CCASFLAGS_PPC64_LINUX = $(AM_CPPFLAGS_PPC64_LINUX) @FLAG_M64@ -g
+AM_FLAG_M3264_PPC32_AIX5 = @FLAG_MAIX32@
+AM_CPPFLAGS_PPC32_AIX5 = $(AM_CPPFLAGS_COMMON) \
+			    -DVGA_ppc32=1 \
+			    -DVGO_aix5=1 \
+			    -DVGP_ppc32_aix5=1
+
+AM_CFLAGS_PPC32_AIX5 = @FLAG_MAIX32@ -mcpu=powerpc $(AM_CFLAGS_BASE)
+AM_CCASFLAGS_PPC32_AIX5 = $(AM_CPPFLAGS_PPC32_AIX5) \
+			    @FLAG_MAIX32@ -mcpu=powerpc -g
+
+AM_FLAG_M3264_PPC64_AIX5 = @FLAG_MAIX64@
+AM_CPPFLAGS_PPC64_AIX5 = $(AM_CPPFLAGS_COMMON) \
+			    -DVGA_ppc64=1 \
+			    -DVGO_aix5=1 \
+			    -DVGP_ppc64_aix5=1
+
+AM_CFLAGS_PPC64_AIX5 = @FLAG_MAIX64@ -mcpu=powerpc64 $(AM_CFLAGS_BASE)
+AM_CCASFLAGS_PPC64_AIX5 = $(AM_CPPFLAGS_PPC64_AIX5) \
+			    @FLAG_MAIX64@ -mcpu=powerpc64 -g
+
+
+# Flags for the primary target.  These must be used to build the
+# regtests and performance tests.  In fact, these must be used to
+# build anything which is built only once on a dual-arch build.
+#
+AM_FLAG_M3264_PRI = $(AM_FLAG_M3264_@VGCONF_PLATFORM_PRI_CAPS@)
+AM_CPPFLAGS_PRI = $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
+AM_CFLAGS_PRI = $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
+AM_CCASFLAGS_PRI = $(AM_CCASFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
+@VGCONF_HAVE_PLATFORM_SEC_CAPS_FALSE@AM_FLAG_M3264_SEC = 
+@VGCONF_HAVE_PLATFORM_SEC_CAPS_TRUE@AM_FLAG_M3264_SEC = $(AM_FLAG_M3264_@VGCONF_PLATFORM_SEC_CAPS@)
+
+# Baseline link flags for making dynamic shared objects.
+#
+PRELOAD_LDFLAGS_COMMON_LINUX = -nodefaultlibs -shared -Wl,-z,interpose,-z,initfirst
+PRELOAD_LDFLAGS_COMMON_AIX5 = -nodefaultlibs -shared -Wl,-G -Wl,-bnogc
+PRELOAD_LDFLAGS_X86_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M32@
+PRELOAD_LDFLAGS_AMD64_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M64@
+PRELOAD_LDFLAGS_PPC32_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M32@
+PRELOAD_LDFLAGS_PPC64_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M64@
+PRELOAD_LDFLAGS_PPC32_AIX5 = $(PRELOAD_LDFLAGS_COMMON_AIX5)  @FLAG_MAIX32@
+PRELOAD_LDFLAGS_PPC64_AIX5 = $(PRELOAD_LDFLAGS_COMMON_AIX5)  @FLAG_MAIX64@
+AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/include \
+		-I$(top_srcdir)/coregrind -I$(top_builddir)/include \
+		-I@VEX_DIR@/pub \
+		-DVGA_$(VGCONF_ARCH_PRI)=1 \
+		-DVGO_$(VGCONF_OS)=1 \
+		-DVGP_$(VGCONF_ARCH_PRI)_$(VGCONF_OS)=1
+
+# Nb: Tools need to augment these flags with an arch-selection option, such
+# as $(AM_FLAG_M3264_PRI).
+AM_CFLAGS = -Winline -Wall -Wshadow -g @FLAG_M32@ $(FLAG_MMMX) \
+	$(FLAG_MSSE)
+AM_CXXFLAGS = -Winline -Wall -Wshadow -g @FLAG_M32@ $(FLAG_MMMX) \
+	$(FLAG_MSSE)
+# Include AM_CPPFLAGS in AM_CCASFLAGS to allow for older versions of
+# automake;  see comments in Makefile.flags.am for more detail.
+AM_CCASFLAGS = $(AM_CPPFLAGS) @FLAG_M32@
+noinst_SCRIPTS = filter_stderr
+EXTRA_DIST = $(noinst_SCRIPTS) \
+	fpu-28-108.vgtest fpu-28-108.stderr.exp
+
+fpu_28_108_SOURCES = fpu-28-108.S
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .S .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/Makefile.tool-tests.am $(top_srcdir)/Makefile.flags.am $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
+		&& exit 0; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign  cachegrind/tests/x86/Makefile'; \
+	cd $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign  cachegrind/tests/x86/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+clean-checkPROGRAMS:
+	-test -z "$(check_PROGRAMS)" || rm -f $(check_PROGRAMS)
+fpu-28-108$(EXEEXT): $(fpu_28_108_OBJECTS) $(fpu_28_108_DEPENDENCIES) 
+	@rm -f fpu-28-108$(EXEEXT)
+	$(LINK) $(fpu_28_108_OBJECTS) $(fpu_28_108_LDADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fpu-28-108.Po@am__quote@
+
+.S.o:
+@am__fastdepCCAS_TRUE@	$(CPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCCAS_TRUE@	mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@	DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCCAS_FALSE@	$(CPPASCOMPILE) -c -o $@ $<
+
+.S.obj:
+@am__fastdepCCAS_TRUE@	$(CPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCCAS_TRUE@	mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@	DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCCAS_FALSE@	$(CPPASCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '{ files[$$0] = 1; nonemtpy = 1; } \
+	      END { if (nonempty) { for (i in files) print i; }; }'`; \
+	mkid -fID $$unique
+tags: TAGS
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+		$(TAGS_FILES) $(LISP)
+	tags=; \
+	here=`pwd`; \
+	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+	      END { if (nonempty) { for (i in files) print i; }; }'`; \
+	if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	    $$tags $$unique; \
+	fi
+ctags: CTAGS
+CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+		$(TAGS_FILES) $(LISP)
+	tags=; \
+	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+	      END { if (nonempty) { for (i in files) print i; }; }'`; \
+	test -z "$(CTAGS_ARGS)$$tags$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$tags $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && cd $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) $$here
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+	    fi; \
+	    cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+	  else \
+	    test -f $(distdir)/$$file \
+	    || cp -p $$d/$$file $(distdir)/$$file \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+	$(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS)
+check: check-am
+all-am: Makefile $(SCRIPTS)
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	  install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	  `test -z '$(STRIP)' || \
+	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-checkPROGRAMS clean-generic mostlyclean-am
+
+distclean: distclean-am
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-info: install-info-am
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-ps: install-ps-am
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean \
+	clean-checkPROGRAMS clean-generic ctags distclean \
+	distclean-compile distclean-generic distclean-tags distdir dvi \
+	dvi-am html html-am info info-am install install-am \
+	install-data install-data-am install-dvi install-dvi-am \
+	install-exec install-exec-am install-html install-html-am \
+	install-info install-info-am install-man install-pdf \
+	install-pdf-am install-ps install-ps-am install-strip \
+	installcheck installcheck-am installdirs maintainer-clean \
+	maintainer-clean-generic mostlyclean mostlyclean-compile \
+	mostlyclean-generic pdf pdf-am ps ps-am tags uninstall \
+	uninstall-am
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/cachegrind/tests/x86/filter_stderr b/cachegrind/tests/x86/filter_stderr
new file mode 100755
index 0000000..ed65c9f
--- /dev/null
+++ b/cachegrind/tests/x86/filter_stderr
@@ -0,0 +1,4 @@
+#! /bin/sh
+
+# Use the generic Cachegrind stderr filter
+../filter_stderr
diff --git a/cachegrind/tests/x86/fpu-28-108.S b/cachegrind/tests/x86/fpu-28-108.S
new file mode 100644
index 0000000..54b2da6
--- /dev/null
+++ b/cachegrind/tests/x86/fpu-28-108.S
@@ -0,0 +1,26 @@
+/* Test 28 and 108 byte loads and stores.  (Just make sure program
+   runs without any assertion failures from V.) */
+
+/* Useful listing: 
+	gcc -o tests/fpu_28_108 tests/fpu_28_108.S -Wa,-a */
+
+#include "tests/asm.h"
+
+.data
+fooble:
+        .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+        .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+        .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+bar:
+        
+.text
+.globl VG_SYM_ASM(main)
+VG_SYM_ASM(main):
+        fstsw   fooble
+        fsave   fooble
+        frstor  fooble
+        fstenv  fooble
+        fldenv  fooble
+        movl    $0, %eax
+        ret
+
diff --git a/cachegrind/tests/x86/fpu-28-108.stderr.exp b/cachegrind/tests/x86/fpu-28-108.stderr.exp
new file mode 100644
index 0000000..8eaf654
--- /dev/null
+++ b/cachegrind/tests/x86/fpu-28-108.stderr.exp
@@ -0,0 +1,17 @@
+
+
+I   refs:
+I1  misses:
+L2i misses:
+I1  miss rate:
+L2i miss rate:
+
+D   refs:
+D1  misses:
+L2d misses:
+D1  miss rate:
+L2d miss rate:
+
+L2 refs:
+L2 misses:
+L2 miss rate:
diff --git a/cachegrind/tests/x86/fpu-28-108.vgtest b/cachegrind/tests/x86/fpu-28-108.vgtest
new file mode 100644
index 0000000..b2e138e
--- /dev/null
+++ b/cachegrind/tests/x86/fpu-28-108.vgtest
@@ -0,0 +1,2 @@
+prog: fpu-28-108
+cleanup: rm cachegrind.out.*
diff --git a/cachegrind/tests/x86/insn_sse2.stdout.exp b/cachegrind/tests/x86/insn_sse2.stdout.exp
new file mode 100644
index 0000000..344df89
--- /dev/null
+++ b/cachegrind/tests/x86/insn_sse2.stdout.exp
@@ -0,0 +1,302 @@
+addpd_1 ... ok
+addpd_2 ... ok
+addsd_1 ... ok
+addsd_2 ... ok
+andpd_1 ... ok
+andpd_2 ... ok
+andnpd_1 ... ok
+andnpd_2 ... ok
+cmpeqpd_1 ... ok
+cmpeqpd_2 ... ok
+cmpltpd_1 ... ok
+cmpltpd_2 ... ok
+cmplepd_1 ... ok
+cmplepd_2 ... ok
+cmpneqpd_1 ... ok
+cmpneqpd_2 ... ok
+cmpnltpd_1 ... ok
+cmpnltpd_2 ... ok
+cmpnlepd_1 ... ok
+cmpnlepd_2 ... ok
+cmpeqsd_1 ... ok
+cmpeqsd_2 ... ok
+cmpltsd_1 ... ok
+cmpltsd_2 ... ok
+cmplesd_1 ... ok
+cmplesd_2 ... ok
+cmpneqsd_1 ... ok
+cmpneqsd_2 ... ok
+cmpnltsd_1 ... ok
+cmpnltsd_2 ... ok
+cmpnlesd_1 ... ok
+cmpnlesd_2 ... ok
+comisd_1 ... ok
+comisd_2 ... ok
+comisd_3 ... ok
+comisd_4 ... ok
+comisd_5 ... ok
+comisd_6 ... ok
+cvtdq2pd_1 ... ok
+cvtdq2pd_2 ... ok
+cvtdq2ps_1 ... ok
+cvtdq2ps_2 ... ok
+cvtpd2dq_1 ... ok
+cvtpd2dq_2 ... ok
+cvtpd2pi_1 ... ok
+cvtpd2pi_2 ... ok
+cvtpd2ps_1 ... ok
+cvtpd2ps_2 ... ok
+cvtpi2pd_1 ... ok
+cvtpi2pd_2 ... ok
+cvtps2dq_1 ... ok
+cvtps2dq_2 ... ok
+cvtps2pd_1 ... ok
+cvtps2pd_2 ... ok
+cvtsd2si_1 ... ok
+cvtsd2si_2 ... ok
+cvtsd2ss_1 ... ok
+cvtsd2ss_2 ... ok
+cvtsi2sd_1 ... ok
+cvtsi2sd_2 ... ok
+cvtss2sd_1 ... ok
+cvtss2sd_2 ... ok
+cvttpd2pi_1 ... ok
+cvttpd2pi_2 ... ok
+cvttpd2dq_1 ... ok
+cvttpd2dq_2 ... ok
+cvttps2dq_1 ... ok
+cvttps2dq_2 ... ok
+cvttsd2si_1 ... ok
+cvttsd2si_2 ... ok
+divpd_1 ... ok
+divpd_2 ... ok
+divsd_1 ... ok
+divsd_2 ... ok
+lfence_1 ... ok
+maxpd_1 ... ok
+maxpd_2 ... ok
+maxsd_1 ... ok
+maxsd_2 ... ok
+mfence_1 ... ok
+minpd_1 ... ok
+minpd_2 ... ok
+minsd_1 ... ok
+minsd_2 ... ok
+movapd_1 ... ok
+movapd_2 ... ok
+movd_1 ... ok
+movd_2 ... ok
+movd_3 ... ok
+movd_4 ... ok
+movdqa_1 ... ok
+movdqa_2 ... ok
+movdqa_3 ... ok
+movdqu_1 ... ok
+movdqu_2 ... ok
+movdqu_3 ... ok
+movdq2q_1 ... ok
+movhpd_1 ... ok
+movhpd_2 ... ok
+movlpd_1 ... ok
+movlpd_2 ... ok
+movmskpd_1 ... ok
+movntdq_1 ... ok
+movnti_1 ... ok
+movntpd_1 ... ok
+movq2dq_1 ... ok
+movsd_1 ... ok
+movsd_2 ... ok
+movsd_3 ... ok
+movupd_1 ... ok
+movupd_2 ... ok
+mulpd_1 ... ok
+mulpd_2 ... ok
+mulsd_1 ... ok
+mulsd_2 ... ok
+orpd_1 ... ok
+orpd_2 ... ok
+packssdw_1 ... ok
+packssdw_2 ... ok
+packsswb_1 ... ok
+packsswb_2 ... ok
+packuswb_1 ... ok
+packuswb_2 ... ok
+paddb_1 ... ok
+paddb_2 ... ok
+paddd_1 ... ok
+paddd_2 ... ok
+paddq_1 ... ok
+paddq_2 ... ok
+paddq_3 ... ok
+paddq_4 ... ok
+paddsb_1 ... ok
+paddsb_2 ... ok
+paddsw_1 ... ok
+paddsw_2 ... ok
+paddusb_1 ... ok
+paddusb_2 ... ok
+paddusw_1 ... ok
+paddusw_2 ... ok
+paddw_1 ... ok
+paddw_2 ... ok
+pand_1 ... ok
+pand_2 ... ok
+pandn_1 ... ok
+pandn_2 ... ok
+pavgb_1 ... ok
+pavgb_2 ... ok
+pavgw_1 ... ok
+pavgw_2 ... ok
+pcmpeqb_1 ... ok
+pcmpeqb_2 ... ok
+pcmpeqd_1 ... ok
+pcmpeqd_2 ... ok
+pcmpeqw_1 ... ok
+pcmpeqw_2 ... ok
+pcmpgtb_1 ... ok
+pcmpgtb_2 ... ok
+pcmpgtd_1 ... ok
+pcmpgtd_2 ... ok
+pcmpgtw_1 ... ok
+pcmpgtw_2 ... ok
+pextrw_1 ... ok
+pextrw_2 ... ok
+pextrw_3 ... ok
+pextrw_4 ... ok
+pextrw_5 ... ok
+pextrw_6 ... ok
+pextrw_7 ... ok
+pextrw_8 ... ok
+pinsrw_1 ... ok
+pinsrw_2 ... ok
+pinsrw_3 ... ok
+pinsrw_4 ... ok
+pinsrw_5 ... ok
+pinsrw_6 ... ok
+pinsrw_7 ... ok
+pinsrw_8 ... ok
+pinsrw_9 ... ok
+pinsrw_10 ... ok
+pinsrw_11 ... ok
+pinsrw_12 ... ok
+pinsrw_13 ... ok
+pinsrw_14 ... ok
+pinsrw_15 ... ok
+pinsrw_16 ... ok
+pmaddwd_1 ... ok
+pmaddwd_2 ... ok
+pmaxsw_1 ... ok
+pmaxsw_2 ... ok
+pmaxub_1 ... ok
+pmaxub_2 ... ok
+pminsw_1 ... ok
+pminsw_2 ... ok
+pminub_1 ... ok
+pminub_2 ... ok
+pmovmskb_1 ... ok
+pmulhuw_1 ... ok
+pmulhuw_2 ... ok
+pmulhw_1 ... ok
+pmulhw_2 ... ok
+pmullw_1 ... ok
+pmullw_2 ... ok
+pmuludq_1 ... ok
+pmuludq_2 ... ok
+pmuludq_3 ... ok
+pmuludq_4 ... ok
+por_1 ... ok
+por_2 ... ok
+psadbw_1 ... ok
+psadbw_2 ... ok
+pshufd_1 ... ok
+pshufd_2 ... ok
+pshufhw_1 ... ok
+pshufhw_2 ... ok
+pshuflw_1 ... ok
+pshuflw_2 ... ok
+pslld_1 ... ok
+pslld_2 ... ok
+pslld_3 ... ok
+pslldq_1 ... ok
+pslldq_2 ... ok
+psllq_1 ... ok
+psllq_2 ... ok
+psllq_3 ... ok
+psllw_1 ... ok
+psllw_2 ... ok
+psllw_3 ... ok
+psrad_1 ... ok
+psrad_2 ... ok
+psrad_3 ... ok
+psraw_1 ... ok
+psraw_2 ... ok
+psraw_3 ... ok
+psrld_1 ... ok
+psrld_2 ... ok
+psrld_3 ... ok
+psrldq_1 ... ok
+psrldq_2 ... ok
+psrlq_1 ... ok
+psrlq_2 ... ok
+psrlq_3 ... ok
+psrlw_1 ... ok
+psrlw_2 ... ok
+psrlw_3 ... ok
+psubb_1 ... ok
+psubb_2 ... ok
+psubd_1 ... ok
+psubd_2 ... ok
+psubq_1 ... ok
+psubq_2 ... ok
+psubq_3 ... ok
+psubq_4 ... ok
+psubsb_1 ... ok
+psubsb_2 ... ok
+psubsw_1 ... ok
+psubsw_2 ... ok
+psubusb_1 ... ok
+psubusb_2 ... ok
+psubusw_1 ... ok
+psubusw_2 ... ok
+psubw_1 ... ok
+psubw_2 ... ok
+punpckhbw_1 ... ok
+punpckhbw_2 ... ok
+punpckhdq_1 ... ok
+punpckhdq_2 ... ok
+punpckhqdq_1 ... ok
+punpckhqdq_2 ... ok
+punpckhwd_1 ... ok
+punpckhwd_2 ... ok
+punpcklbw_1 ... ok
+punpcklbw_2 ... ok
+punpckldq_1 ... ok
+punpckldq_2 ... ok
+punpcklqdq_1 ... ok
+punpcklqdq_2 ... ok
+punpcklwd_1 ... ok
+punpcklwd_2 ... ok
+pxor_1 ... ok
+pxor_2 ... ok
+shufpd_1 ... ok
+shufpd_2 ... ok
+sqrtpd_1 ... ok
+sqrtpd_2 ... ok
+sqrtsd_1 ... ok
+sqrtsd_2 ... ok
+subpd_1 ... ok
+subpd_2 ... ok
+subsd_1 ... ok
+subsd_2 ... ok
+ucomisd_1 ... ok
+ucomisd_2 ... ok
+ucomisd_3 ... ok
+ucomisd_4 ... ok
+ucomisd_5 ... ok
+ucomisd_6 ... ok
+unpckhpd_1 ... ok
+unpckhpd_2 ... ok
+unpcklpd_1 ... ok
+unpcklpd_2 ... ok
+xorpd_1 ... ok
+xorpd_2 ... ok
author	Stephane Marchesin <marchesin@icps.u-strasbg.fr>	2009-05-04 19:05:59 +0200
committer	Stephane Marchesin <marchesin@icps.u-strasbg.fr>	2009-05-04 19:05:59 +0200
commit	6e410b3bb6ff51580897431105aae14591cbf7fb (patch)
tree	f8aeba9352710f10cd6b1d5138c8fc3ece91c8c3 /cachegrind