diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2017-07-22 22:15:22 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2017-08-11 12:00:15 +0100 |
commit | 4838e885f032c8b784fa8237ba26a8b8495a1333 (patch) | |
tree | 654912ac0383189dcdb92076c98197aed9e6df3d | |
parent | d16a400fc8b0b8cb1ed4df02300aa49887765c93 (diff) |
gtt-wc
-rw-r--r-- | tests/gem_gtt_speed.c | 141 |
1 files changed, 141 insertions, 0 deletions
diff --git a/tests/gem_gtt_speed.c b/tests/gem_gtt_speed.c index 3d726c4e..08860fd4 100644 --- a/tests/gem_gtt_speed.c +++ b/tests/gem_gtt_speed.c @@ -28,6 +28,7 @@ #include "igt.h" #include "igt_x86.h" +#include "igt_gt.h" #include <unistd.h> #include <stdlib.h> #include <stdint.h> @@ -70,6 +71,84 @@ static void streaming_load(void *src, int len) *(volatile __m128i *)src = tmp; } + +static uint8_t stage[4096] __attribute((__aligned__(64))); + +static void slow_streaming_store(void *dst, void *src, int len) +{ + __m128i *s = src, *d = dst; + + while (len >= 64) { + __m128i t0 = _mm_stream_load_si128(s + 0); + __m128i t1 = _mm_stream_load_si128(s + 1); + __m128i t2 = _mm_stream_load_si128(s + 2); + __m128i t3 = _mm_stream_load_si128(s + 3); + + + _mm_store_si128(d + 0, t0); + _mm_store_si128(d + 1, t1); + _mm_store_si128(d + 2, t2); + _mm_store_si128(d + 3, t3); + + len -= 64; + s += 4; + d += 4; + } + + memcpy(d, s, len); +} + +static void streaming_store(void *dst, void *src, int len) +{ + while (len > 64) { + __m128i *cache = (__m128i *)stage; + int slen = len > 4096 ? 4096 : len; + + while (slen >= 64) { + __m128i *s = src; + + __m128i t0 = _mm_stream_load_si128(s + 0); + __m128i t1 = _mm_stream_load_si128(s + 1); + __m128i t2 = _mm_stream_load_si128(s + 2); + __m128i t3 = _mm_stream_load_si128(s + 3); + + _mm_store_si128(cache + 0, t0); + _mm_store_si128(cache + 1, t1); + _mm_store_si128(cache + 2, t2); + _mm_store_si128(cache + 3, t3); + + src += 64; + slen -= 64; + cache += 4; + } + + cache = (__m128i *)stage; + slen = len > 4096 ? 4096 : len; + + __asm__ __volatile__("lfence" ::: "memory"); + + while (slen >= 64) { + __m128i *d = dst; + + __m128i t0 = _mm_stream_load_si128(cache + 0); + __m128i t1 = _mm_stream_load_si128(cache + 1); + __m128i t2 = _mm_stream_load_si128(cache + 2); + __m128i t3 = _mm_stream_load_si128(cache + 3); + + _mm_store_si128(d + 0, t0); + _mm_store_si128(d + 1, t1); + _mm_store_si128(d + 2, t2); + _mm_store_si128(d + 3, t3); + + dst += 64; + slen -= 64; + cache += 4; + } + + len -= len > 4096 ? 4096 : len; + } +} + static inline unsigned x86_64_features(void) { return igt_x86_features(); @@ -112,6 +191,7 @@ int main(int argc, char **argv) igt_info("Detected cpu faatures: %s\n", igt_x86_features_to_string(cpu, str)); } + igt_setup_clflush(); buf = malloc(size); memset(buf, 0, size); @@ -187,6 +267,45 @@ int main(int argc, char **argv) size/1024, elapsed(&start, &end, loop)); } + { + base = gem_mmap__cpu(fd, handle, 0, + size, + PROT_READ | PROT_WRITE); + gettimeofday(&start, NULL); + for (loop = 0; loop < 1000; loop++) { + ptr = base; + x = 0; + + igt_clflush_range(base, size); + for (i = 0; i < size/sizeof(*ptr); i++) + x += ptr[i]; + + /* force overly clever gcc to actually compute x */ + ptr[0] = x; + + } + gettimeofday(&end, NULL); + munmap(base, size); + igt_info("Time to clflush + read %dk through a cached CPU map: %7.3fµs\n", + size/1024, elapsed(&start, &end, loop)); + } + + { + base = gem_mmap__cpu(fd, handle, 0, + size, + PROT_READ | PROT_WRITE); + gettimeofday(&start, NULL); + for (loop = 0; loop < 1000; loop++) { + igt_clflush_range(base, size); + memcpy(buf, base, size); + + } + gettimeofday(&end, NULL); + munmap(base, size); + igt_info("Time to clflush + store %dk from a cached CPU map: %7.3fµs\n", + size/1024, elapsed(&start, &end, loop)); + } + /* mmap write */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { @@ -339,6 +458,28 @@ int main(int argc, char **argv) } igt_info("Time to stream %dk from a cached WC map: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); + + { + uint32_t *base = gem_mmap__wc(fd, handle, 0, size, PROT_READ | PROT_WRITE); + gettimeofday(&start, NULL); + for (loop = 0; loop < 1000; loop++) + slow_streaming_store(buf, base, size); + gettimeofday(&end, NULL); + munmap(base, size); + } + igt_info("Time to (unoptimised) store %dk from a cached WC map: %7.3fµs\n", + size/1024, elapsed(&start, &end, loop)); + + { + uint32_t *base = gem_mmap__wc(fd, handle, 0, size, PROT_READ | PROT_WRITE); + gettimeofday(&start, NULL); + for (loop = 0; loop < 1000; loop++) + streaming_store(buf, base, size); + gettimeofday(&end, NULL); + munmap(base, size); + } + igt_info("Time to (optimised) store %dk from a cached WC map: %7.3fµs\n", + size/1024, elapsed(&start, &end, loop)); } } |