summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Whitwell <keith@tungstengraphics.com>2006-02-09 23:04:44 +0000
committerKeith Whitwell <keith@tungstengraphics.com>2006-02-09 23:04:44 +0000
commitf8246f504b0ec1e112951980d339908992abc6bb (patch)
tree0de18c521bd946002e028db09542de5e82e76d69
parent499458bcdb3879c5a7088be14e0fd75ea29d0be1 (diff)
Add basic timing for memcpys
-rw-r--r--src/mesa/drivers/dri/i915/intel_tex.c98
1 files changed, 83 insertions, 15 deletions
diff --git a/src/mesa/drivers/dri/i915/intel_tex.c b/src/mesa/drivers/dri/i915/intel_tex.c
index 7d52bfc27d..8f27a76543 100644
--- a/src/mesa/drivers/dri/i915/intel_tex.c
+++ b/src/mesa/drivers/dri/i915/intel_tex.c
@@ -47,8 +47,7 @@ static void intelFreeTextureImageData( GLcontext *ctx,
struct intel_texture_image *intelImage = intel_texture_image(texImage);
if (intelImage->mt) {
- intel_miptree_release(intel, intelImage->mt);
- intelImage->mt = NULL;
+ intel_miptree_release(intel, &intelImage->mt);
}
if (texImage->Data) {
@@ -67,6 +66,84 @@ static void *do_memcpy( void *dest, const void *src, size_t n )
}
+#ifndef __x86_64__
+static unsigned
+fastrdtsc(void)
+{
+ unsigned eax;
+ __asm__ volatile ("\t"
+ "pushl %%ebx\n\t"
+ "cpuid\n\t" ".byte 0x0f, 0x31\n\t" "popl %%ebx\n":"=a" (eax)
+ :"0"(0)
+ :"ecx", "edx", "cc");
+
+ return eax;
+}
+#else
+static unsigned
+fastrdtsc(void)
+{
+ unsigned eax;
+ __asm__ volatile ("\t"
+ "cpuid\n\t" ".byte 0x0f, 0x31\n\t" :"=a" (eax)
+ :"0"(0)
+ :"ecx", "edx", "ebx", "cc");
+
+ return eax;
+}
+#endif
+
+static unsigned
+time_diff(unsigned t, unsigned t2)
+{
+ return ((t < t2) ? t2 - t : 0xFFFFFFFFU - (t - t2 - 1));
+}
+
+
+/* The system memcpy (at least on ubuntu 5.10) has problems copying
+ * to agp (writecombined) memory from a source which isn't 64-byte
+ * aligned - there is a 4x performance falloff.
+ *
+ * The x86 __memcpy is immune to this but is slightly slower
+ * (10%-ish) than the system memcpy.
+ *
+ * The sse_memcpy seems to have a slight cliff at 64/32 bytes, but
+ * isn't much faster than x86_memcpy for agp copies.
+ *
+ * TODO: switch dynamically.
+ */
+static void *do_memcpy( void *dest, const void *src, size_t n )
+{
+ if ( (((unsigned)src) & 63) ||
+ (((unsigned)dest) & 63)) {
+ return __memcpy(dest, src, n);
+ }
+ else
+ return memcpy(dest, src, n);
+}
+
+
+static void *timed_memcpy( void *dest, const void *src, size_t n )
+{
+ void *ret;
+ unsigned t1, t2;
+ double rate;
+
+ if ( (((unsigned)src) & 63) ||
+ (((unsigned)dest) & 63))
+ _mesa_printf("Warning - non-aligned texture copy!\n");
+
+ t1 = fastrdtsc();
+ ret = do_memcpy(dest, src, n);
+ t2 = fastrdtsc();
+
+ rate = time_diff(t1, t2);
+ rate /= (double) n;
+ _mesa_printf("timed_memcpy: %u %u --> %f clocks/byte\n", t1, t2, rate);
+ return ret;
+}
+
+
void intelInitTextureFuncs(struct dd_function_table * functions)
{
functions->ChooseTextureFormat = intelChooseTextureFormat;
@@ -85,17 +162,8 @@ void intelInitTextureFuncs(struct dd_function_table * functions)
functions->UpdateTexturePalette = 0;
functions->IsTextureResident = intelIsTextureResident;
- /* The system memcpy (at least on ubuntu 5.10) has problems copying
- * to agp (writecombined) memory from a source which isn't 64-byte
- * aligned - there is a 4x performance falloff.
- *
- * The x86 __memcpy is immune to this but is slightly slower
- * (10%-ish) than the system memcpy.
- *
- * The sse_memcpy seems to have a slight cliff at 64/32 bytes, but
- * isn't much faster than x86_memcpy for agp copies.
- *
- * TODO: switch dynamically.
- */
- functions->TextureMemCpy = do_memcpy;
+ if (INTEL_DEBUG & DEBUG_BUFMGR)
+ functions->TextureMemCpy = timed_memcpy;
+ else
+ functions->TextureMemCpy = do_memcpy;
}