summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2017-07-22 22:15:22 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2018-03-15 10:52:37 +0000
commitd2a334ba21badcbf3dea9d7baa96ed86e7297f84 (patch)
treeb2252c3fef16ab020629a7b496d1e5f10006eb68
parentdb79e5d55a4e585b97ee36fee6d7e60b21da73c3 (diff)
gtt-wc
-rw-r--r--tests/gem_gtt_speed.c149
1 files changed, 149 insertions, 0 deletions
diff --git a/tests/gem_gtt_speed.c b/tests/gem_gtt_speed.c
index 3d726c4e..ba8ed4f4 100644
--- a/tests/gem_gtt_speed.c
+++ b/tests/gem_gtt_speed.c
@@ -28,6 +28,7 @@
#include "igt.h"
#include "igt_x86.h"
+#include "igt_gt.h"
#include <unistd.h>
#include <stdlib.h>
#include <stdint.h>
@@ -70,6 +71,84 @@ static void streaming_load(void *src, int len)
*(volatile __m128i *)src = tmp;
}
+
+static uint8_t stage[4096] __attribute((__aligned__(64)));
+
+static void slow_streaming_store(void *dst, void *src, int len)
+{
+ __m128i *s = src, *d = dst;
+
+ while (len >= 64) {
+ __m128i t0 = _mm_stream_load_si128(s + 0);
+ __m128i t1 = _mm_stream_load_si128(s + 1);
+ __m128i t2 = _mm_stream_load_si128(s + 2);
+ __m128i t3 = _mm_stream_load_si128(s + 3);
+
+
+ _mm_store_si128(d + 0, t0);
+ _mm_store_si128(d + 1, t1);
+ _mm_store_si128(d + 2, t2);
+ _mm_store_si128(d + 3, t3);
+
+ len -= 64;
+ s += 4;
+ d += 4;
+ }
+
+ memcpy(d, s, len);
+}
+
+static void streaming_store(void *dst, void *src, int len)
+{
+ while (len > 64) {
+ __m128i *cache = (__m128i *)stage;
+ int slen = len > 4096 ? 4096 : len;
+
+ while (slen >= 64) {
+ __m128i *s = src;
+
+ __m128i t0 = _mm_stream_load_si128(s + 0);
+ __m128i t1 = _mm_stream_load_si128(s + 1);
+ __m128i t2 = _mm_stream_load_si128(s + 2);
+ __m128i t3 = _mm_stream_load_si128(s + 3);
+
+ _mm_store_si128(cache + 0, t0);
+ _mm_store_si128(cache + 1, t1);
+ _mm_store_si128(cache + 2, t2);
+ _mm_store_si128(cache + 3, t3);
+
+ src += 64;
+ slen -= 64;
+ cache += 4;
+ }
+
+ cache = (__m128i *)stage;
+ slen = len > 4096 ? 4096 : len;
+
+ __asm__ __volatile__("lfence" ::: "memory");
+
+ while (slen >= 64) {
+ __m128i *d = dst;
+
+ __m128i t0 = _mm_stream_load_si128(cache + 0);
+ __m128i t1 = _mm_stream_load_si128(cache + 1);
+ __m128i t2 = _mm_stream_load_si128(cache + 2);
+ __m128i t3 = _mm_stream_load_si128(cache + 3);
+
+ _mm_store_si128(d + 0, t0);
+ _mm_store_si128(d + 1, t1);
+ _mm_store_si128(d + 2, t2);
+ _mm_store_si128(d + 3, t3);
+
+ dst += 64;
+ slen -= 64;
+ cache += 4;
+ }
+
+ len -= len > 4096 ? 4096 : len;
+ }
+}
+
static inline unsigned x86_64_features(void)
{
return igt_x86_features();
@@ -84,6 +163,14 @@ static void streaming_load(void *src, int len)
{
igt_assert(!"reached");
}
+static void streaming_store(void *dst, void *src, int len)
+{
+ igt_assert(!"reached");
+}
+static void slow_streaming_store(void *dst, void *src, int len)
+{
+ igt_assert(!"reached");
+}
#endif
int main(int argc, char **argv)
@@ -112,6 +199,7 @@ int main(int argc, char **argv)
igt_info("Detected cpu faatures: %s\n",
igt_x86_features_to_string(cpu, str));
}
+ igt_setup_clflush();
buf = malloc(size);
memset(buf, 0, size);
@@ -187,6 +275,45 @@ int main(int argc, char **argv)
size/1024, elapsed(&start, &end, loop));
}
+ {
+ base = gem_mmap__cpu(fd, handle, 0,
+ size,
+ PROT_READ | PROT_WRITE);
+ gettimeofday(&start, NULL);
+ for (loop = 0; loop < 1000; loop++) {
+ ptr = base;
+ x = 0;
+
+ igt_clflush_range(base, size);
+ for (i = 0; i < size/sizeof(*ptr); i++)
+ x += ptr[i];
+
+ /* force overly clever gcc to actually compute x */
+ ptr[0] = x;
+
+ }
+ gettimeofday(&end, NULL);
+ munmap(base, size);
+ igt_info("Time to clflush + read %dk through a cached CPU map: %7.3fµs\n",
+ size/1024, elapsed(&start, &end, loop));
+ }
+
+ {
+ base = gem_mmap__cpu(fd, handle, 0,
+ size,
+ PROT_READ | PROT_WRITE);
+ gettimeofday(&start, NULL);
+ for (loop = 0; loop < 1000; loop++) {
+ igt_clflush_range(base, size);
+ memcpy(buf, base, size);
+
+ }
+ gettimeofday(&end, NULL);
+ munmap(base, size);
+ igt_info("Time to clflush + store %dk from a cached CPU map: %7.3fµs\n",
+ size/1024, elapsed(&start, &end, loop));
+ }
+
/* mmap write */
gettimeofday(&start, NULL);
for (loop = 0; loop < 1000; loop++) {
@@ -339,6 +466,28 @@ int main(int argc, char **argv)
}
igt_info("Time to stream %dk from a cached WC map: %7.3fµs\n",
size/1024, elapsed(&start, &end, loop));
+
+ {
+ uint32_t *base = gem_mmap__wc(fd, handle, 0, size, PROT_READ | PROT_WRITE);
+ gettimeofday(&start, NULL);
+ for (loop = 0; loop < 1000; loop++)
+ slow_streaming_store(buf, base, size);
+ gettimeofday(&end, NULL);
+ munmap(base, size);
+ }
+ igt_info("Time to (unoptimised) store %dk from a cached WC map: %7.3fµs\n",
+ size/1024, elapsed(&start, &end, loop));
+
+ {
+ uint32_t *base = gem_mmap__wc(fd, handle, 0, size, PROT_READ | PROT_WRITE);
+ gettimeofday(&start, NULL);
+ for (loop = 0; loop < 1000; loop++)
+ streaming_store(buf, base, size);
+ gettimeofday(&end, NULL);
+ munmap(base, size);
+ }
+ igt_info("Time to (optimised) store %dk from a cached WC map: %7.3fµs\n",
+ size/1024, elapsed(&start, &end, loop));
}
}