diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2017-07-22 22:15:22 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2018-03-15 10:52:37 +0000 |
commit | d2a334ba21badcbf3dea9d7baa96ed86e7297f84 (patch) | |
tree | b2252c3fef16ab020629a7b496d1e5f10006eb68 | |
parent | db79e5d55a4e585b97ee36fee6d7e60b21da73c3 (diff) |
gtt-wc
-rw-r--r-- | tests/gem_gtt_speed.c | 149 |
1 files changed, 149 insertions, 0 deletions
diff --git a/tests/gem_gtt_speed.c b/tests/gem_gtt_speed.c index 3d726c4e..ba8ed4f4 100644 --- a/tests/gem_gtt_speed.c +++ b/tests/gem_gtt_speed.c @@ -28,6 +28,7 @@ #include "igt.h" #include "igt_x86.h" +#include "igt_gt.h" #include <unistd.h> #include <stdlib.h> #include <stdint.h> @@ -70,6 +71,84 @@ static void streaming_load(void *src, int len) *(volatile __m128i *)src = tmp; } + +static uint8_t stage[4096] __attribute((__aligned__(64))); + +static void slow_streaming_store(void *dst, void *src, int len) +{ + __m128i *s = src, *d = dst; + + while (len >= 64) { + __m128i t0 = _mm_stream_load_si128(s + 0); + __m128i t1 = _mm_stream_load_si128(s + 1); + __m128i t2 = _mm_stream_load_si128(s + 2); + __m128i t3 = _mm_stream_load_si128(s + 3); + + + _mm_store_si128(d + 0, t0); + _mm_store_si128(d + 1, t1); + _mm_store_si128(d + 2, t2); + _mm_store_si128(d + 3, t3); + + len -= 64; + s += 4; + d += 4; + } + + memcpy(d, s, len); +} + +static void streaming_store(void *dst, void *src, int len) +{ + while (len > 64) { + __m128i *cache = (__m128i *)stage; + int slen = len > 4096 ? 4096 : len; + + while (slen >= 64) { + __m128i *s = src; + + __m128i t0 = _mm_stream_load_si128(s + 0); + __m128i t1 = _mm_stream_load_si128(s + 1); + __m128i t2 = _mm_stream_load_si128(s + 2); + __m128i t3 = _mm_stream_load_si128(s + 3); + + _mm_store_si128(cache + 0, t0); + _mm_store_si128(cache + 1, t1); + _mm_store_si128(cache + 2, t2); + _mm_store_si128(cache + 3, t3); + + src += 64; + slen -= 64; + cache += 4; + } + + cache = (__m128i *)stage; + slen = len > 4096 ? 4096 : len; + + __asm__ __volatile__("lfence" ::: "memory"); + + while (slen >= 64) { + __m128i *d = dst; + + __m128i t0 = _mm_stream_load_si128(cache + 0); + __m128i t1 = _mm_stream_load_si128(cache + 1); + __m128i t2 = _mm_stream_load_si128(cache + 2); + __m128i t3 = _mm_stream_load_si128(cache + 3); + + _mm_store_si128(d + 0, t0); + _mm_store_si128(d + 1, t1); + _mm_store_si128(d + 2, t2); + _mm_store_si128(d + 3, t3); + + dst += 64; + slen -= 64; + cache += 4; + } + + len -= len > 4096 ? 4096 : len; + } +} + static inline unsigned x86_64_features(void) { return igt_x86_features(); @@ -84,6 +163,14 @@ static void streaming_load(void *src, int len) { igt_assert(!"reached"); } +static void streaming_store(void *dst, void *src, int len) +{ + igt_assert(!"reached"); +} +static void slow_streaming_store(void *dst, void *src, int len) +{ + igt_assert(!"reached"); +} #endif int main(int argc, char **argv) @@ -112,6 +199,7 @@ int main(int argc, char **argv) igt_info("Detected cpu faatures: %s\n", igt_x86_features_to_string(cpu, str)); } + igt_setup_clflush(); buf = malloc(size); memset(buf, 0, size); @@ -187,6 +275,45 @@ int main(int argc, char **argv) size/1024, elapsed(&start, &end, loop)); } + { + base = gem_mmap__cpu(fd, handle, 0, + size, + PROT_READ | PROT_WRITE); + gettimeofday(&start, NULL); + for (loop = 0; loop < 1000; loop++) { + ptr = base; + x = 0; + + igt_clflush_range(base, size); + for (i = 0; i < size/sizeof(*ptr); i++) + x += ptr[i]; + + /* force overly clever gcc to actually compute x */ + ptr[0] = x; + + } + gettimeofday(&end, NULL); + munmap(base, size); + igt_info("Time to clflush + read %dk through a cached CPU map: %7.3fµs\n", + size/1024, elapsed(&start, &end, loop)); + } + + { + base = gem_mmap__cpu(fd, handle, 0, + size, + PROT_READ | PROT_WRITE); + gettimeofday(&start, NULL); + for (loop = 0; loop < 1000; loop++) { + igt_clflush_range(base, size); + memcpy(buf, base, size); + + } + gettimeofday(&end, NULL); + munmap(base, size); + igt_info("Time to clflush + store %dk from a cached CPU map: %7.3fµs\n", + size/1024, elapsed(&start, &end, loop)); + } + /* mmap write */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { @@ -339,6 +466,28 @@ int main(int argc, char **argv) } igt_info("Time to stream %dk from a cached WC map: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); + + { + uint32_t *base = gem_mmap__wc(fd, handle, 0, size, PROT_READ | PROT_WRITE); + gettimeofday(&start, NULL); + for (loop = 0; loop < 1000; loop++) + slow_streaming_store(buf, base, size); + gettimeofday(&end, NULL); + munmap(base, size); + } + igt_info("Time to (unoptimised) store %dk from a cached WC map: %7.3fµs\n", + size/1024, elapsed(&start, &end, loop)); + + { + uint32_t *base = gem_mmap__wc(fd, handle, 0, size, PROT_READ | PROT_WRITE); + gettimeofday(&start, NULL); + for (loop = 0; loop < 1000; loop++) + streaming_store(buf, base, size); + gettimeofday(&end, NULL); + munmap(base, size); + } + igt_info("Time to (optimised) store %dk from a cached WC map: %7.3fµs\n", + size/1024, elapsed(&start, &end, loop)); } } |