diff options
author | Keith Whitwell <keith@tungstengraphics.com> | 2006-02-09 23:04:44 +0000 |
---|---|---|
committer | Keith Whitwell <keith@tungstengraphics.com> | 2006-02-09 23:04:44 +0000 |
commit | f8246f504b0ec1e112951980d339908992abc6bb (patch) | |
tree | 0de18c521bd946002e028db09542de5e82e76d69 | |
parent | 499458bcdb3879c5a7088be14e0fd75ea29d0be1 (diff) |
Add basic timing for memcpys
-rw-r--r-- | src/mesa/drivers/dri/i915/intel_tex.c | 98 |
1 files changed, 83 insertions, 15 deletions
diff --git a/src/mesa/drivers/dri/i915/intel_tex.c b/src/mesa/drivers/dri/i915/intel_tex.c index 7d52bfc27d..8f27a76543 100644 --- a/src/mesa/drivers/dri/i915/intel_tex.c +++ b/src/mesa/drivers/dri/i915/intel_tex.c @@ -47,8 +47,7 @@ static void intelFreeTextureImageData( GLcontext *ctx, struct intel_texture_image *intelImage = intel_texture_image(texImage); if (intelImage->mt) { - intel_miptree_release(intel, intelImage->mt); - intelImage->mt = NULL; + intel_miptree_release(intel, &intelImage->mt); } if (texImage->Data) { @@ -67,6 +66,84 @@ static void *do_memcpy( void *dest, const void *src, size_t n ) } +#ifndef __x86_64__ +static unsigned +fastrdtsc(void) +{ + unsigned eax; + __asm__ volatile ("\t" + "pushl %%ebx\n\t" + "cpuid\n\t" ".byte 0x0f, 0x31\n\t" "popl %%ebx\n":"=a" (eax) + :"0"(0) + :"ecx", "edx", "cc"); + + return eax; +} +#else +static unsigned +fastrdtsc(void) +{ + unsigned eax; + __asm__ volatile ("\t" + "cpuid\n\t" ".byte 0x0f, 0x31\n\t" :"=a" (eax) + :"0"(0) + :"ecx", "edx", "ebx", "cc"); + + return eax; +} +#endif + +static unsigned +time_diff(unsigned t, unsigned t2) +{ + return ((t < t2) ? t2 - t : 0xFFFFFFFFU - (t - t2 - 1)); +} + + +/* The system memcpy (at least on ubuntu 5.10) has problems copying + * to agp (writecombined) memory from a source which isn't 64-byte + * aligned - there is a 4x performance falloff. + * + * The x86 __memcpy is immune to this but is slightly slower + * (10%-ish) than the system memcpy. + * + * The sse_memcpy seems to have a slight cliff at 64/32 bytes, but + * isn't much faster than x86_memcpy for agp copies. + * + * TODO: switch dynamically. + */ +static void *do_memcpy( void *dest, const void *src, size_t n ) +{ + if ( (((unsigned)src) & 63) || + (((unsigned)dest) & 63)) { + return __memcpy(dest, src, n); + } + else + return memcpy(dest, src, n); +} + + +static void *timed_memcpy( void *dest, const void *src, size_t n ) +{ + void *ret; + unsigned t1, t2; + double rate; + + if ( (((unsigned)src) & 63) || + (((unsigned)dest) & 63)) + _mesa_printf("Warning - non-aligned texture copy!\n"); + + t1 = fastrdtsc(); + ret = do_memcpy(dest, src, n); + t2 = fastrdtsc(); + + rate = time_diff(t1, t2); + rate /= (double) n; + _mesa_printf("timed_memcpy: %u %u --> %f clocks/byte\n", t1, t2, rate); + return ret; +} + + void intelInitTextureFuncs(struct dd_function_table * functions) { functions->ChooseTextureFormat = intelChooseTextureFormat; @@ -85,17 +162,8 @@ void intelInitTextureFuncs(struct dd_function_table * functions) functions->UpdateTexturePalette = 0; functions->IsTextureResident = intelIsTextureResident; - /* The system memcpy (at least on ubuntu 5.10) has problems copying - * to agp (writecombined) memory from a source which isn't 64-byte - * aligned - there is a 4x performance falloff. - * - * The x86 __memcpy is immune to this but is slightly slower - * (10%-ish) than the system memcpy. - * - * The sse_memcpy seems to have a slight cliff at 64/32 bytes, but - * isn't much faster than x86_memcpy for agp copies. - * - * TODO: switch dynamically. - */ - functions->TextureMemCpy = do_memcpy; + if (INTEL_DEBUG & DEBUG_BUFMGR) + functions->TextureMemCpy = timed_memcpy; + else + functions->TextureMemCpy = do_memcpy; } |