summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2016-11-07 10:31:16 +0000
committerChris Wilson <chris@chris-wilson.co.uk>2016-11-07 11:20:16 +0000
commitc425d13827a1aab919aee9b6f9e8056c37d7b2c9 (patch)
tree4d1dc58efccab00084e60e2adcbc84431f9ee4c6
parent3d65fb87982b6ac38b0ecde812d0fd56640c519d (diff)
igt/gem_mmap_gtt: Use SSE4.1 streaming to speed up per-pixel checks
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r--tests/gem_mmap_gtt.c139
1 files changed, 110 insertions, 29 deletions
diff --git a/tests/gem_mmap_gtt.c b/tests/gem_mmap_gtt.c
index cb79aeca..801baa01 100644
--- a/tests/gem_mmap_gtt.c
+++ b/tests/gem_mmap_gtt.c
@@ -26,7 +26,6 @@
*/
#define _GNU_SOURCE
-#include "igt.h"
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
@@ -39,10 +38,15 @@
#include <sys/ioctl.h>
#include "drm.h"
+#include "igt.h"
+#include "igt_x86.h"
+
#ifndef PAGE_SIZE
#define PAGE_SIZE 4096
#endif
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+
static int OBJECT_SIZE = 16*1024*1024;
static void
@@ -339,6 +343,16 @@ test_coherency(int fd)
static int min_tile_width(uint32_t devid, int tiling)
{
+ if (tiling < 0) {
+ tiling = -tiling;
+
+ if (intel_gen(devid) >= 4)
+ return 4096 - min_tile_width(devid, tiling);
+ else
+ return 1024;
+
+ }
+
if (intel_gen(devid) == 2)
return 128;
else if (tiling == I915_TILING_X)
@@ -351,6 +365,15 @@ static int min_tile_width(uint32_t devid, int tiling)
static int max_tile_width(uint32_t devid, int tiling)
{
+ if (tiling < 0) {
+ tiling = -tiling;
+
+ if (intel_gen(devid) >= 4)
+ return 4096 + min_tile_width(devid, tiling);
+ else
+ return 2048;
+ }
+
if (intel_gen(devid) >= 7)
return 256 << 10;
else if (intel_gen(devid) >= 4)
@@ -443,12 +466,52 @@ test_huge_bo(int fd, int huge, int tiling)
munmap(linear_pattern, PAGE_SIZE);
}
+#if defined(__x86_64__)
+#define MOVNT 512
+
+#pragma GCC push_options
+#pragma GCC target("sse4.1")
+
+#include <smmintrin.h>
+__attribute__((noinline))
+static void copy_wc_page(void *dst, const void *src)
+{
+ if (igt_x86_features() & SSE4_1) {
+ __m128i *S = (__m128i *)src;
+ __m128i *D = (__m128i *)dst;
+
+ for (int i = 0; i < PAGE_SIZE/64; i++) {
+ __m128i tmp[4];
+
+ tmp[0] = _mm_stream_load_si128(S++);
+ tmp[1] = _mm_stream_load_si128(S++);
+ tmp[2] = _mm_stream_load_si128(S++);
+ tmp[3] = _mm_stream_load_si128(S++);
+
+ _mm_store_si128(D++, tmp[0]);
+ _mm_store_si128(D++, tmp[1]);
+ _mm_store_si128(D++, tmp[2]);
+ _mm_store_si128(D++, tmp[3]);
+ }
+ } else
+ memcpy(dst, src, PAGE_SIZE);
+}
+
+#pragma GCC pop_options
+
+#else
+static void copy_wc_page(uint32_t *dst, const uint32_t *src)
+{
+ memcpy(dst, src, PAGE_SIZE);
+}
+#endif
+
static void
test_huge_copy(int fd, int huge, int tiling_a, int tiling_b)
{
uint32_t devid = intel_get_drm_devid(fd);
uint64_t huge_object_size, i;
- uint32_t bo, *pattern_a, *pattern_b;
+ uint32_t bo;
char *a, *b;
switch (huge) {
@@ -467,66 +530,76 @@ test_huge_copy(int fd, int huge, int tiling_a, int tiling_b)
}
intel_require_memory(2, huge_object_size, CHECK_RAM);
- pattern_a = malloc(PAGE_SIZE);
- for (i = 0; i < PAGE_SIZE/4; i++)
- pattern_a[i] = i;
-
- pattern_b = malloc(PAGE_SIZE);
- for (i = 0; i < PAGE_SIZE/4; i++)
- pattern_b[i] = ~i;
-
bo = gem_create(fd, huge_object_size);
if (tiling_a)
- igt_require(__gem_set_tiling(fd, bo, tiling_a, min_tile_width(devid, tiling_a)) == 0);
+ igt_require(__gem_set_tiling(fd, bo, abs(tiling_a), min_tile_width(devid, tiling_a)) == 0);
a = __gem_mmap__gtt(fd, bo, huge_object_size, PROT_READ | PROT_WRITE);
igt_require(a);
gem_close(fd, bo);
for (i = 0; i < huge_object_size / PAGE_SIZE; i++) {
- memcpy(a + PAGE_SIZE*i, pattern_a, PAGE_SIZE);
+ uint32_t *ptr = (uint32_t *)(a + PAGE_SIZE*i);
+ for (int j = 0; j < PAGE_SIZE/4; j++)
+ ptr[j] = i + j;
igt_progress("Writing a ", i, huge_object_size / PAGE_SIZE);
}
bo = gem_create(fd, huge_object_size);
if (tiling_b)
- igt_require(__gem_set_tiling(fd, bo, tiling_b, max_tile_width(devid, tiling_b)) == 0);
+ igt_require(__gem_set_tiling(fd, bo, abs(tiling_b), max_tile_width(devid, tiling_b)) == 0);
b = __gem_mmap__gtt(fd, bo, huge_object_size, PROT_READ | PROT_WRITE);
igt_require(b);
gem_close(fd, bo);
for (i = 0; i < huge_object_size / PAGE_SIZE; i++) {
- memcpy(b + PAGE_SIZE*i, pattern_b, PAGE_SIZE);
+ uint32_t *ptr = (uint32_t *)(b + PAGE_SIZE*i);
+ for (int j = 0; j < PAGE_SIZE/4; j++)
+ ptr[j] = ~(i + j);
igt_progress("Writing b ", i, huge_object_size / PAGE_SIZE);
}
for (i = 0; i < huge_object_size / PAGE_SIZE; i++) {
- if (i & 1)
- memcpy(a + i *PAGE_SIZE, b + i*PAGE_SIZE, PAGE_SIZE);
- else
- memcpy(b + i *PAGE_SIZE, a + i*PAGE_SIZE, PAGE_SIZE);
+ uint32_t *A = (uint32_t *)(a + PAGE_SIZE*i);
+ uint32_t *B = (uint32_t *)(b + PAGE_SIZE*i);
+ uint32_t A_tmp[PAGE_SIZE/sizeof(uint32_t)];
+ uint32_t B_tmp[PAGE_SIZE/sizeof(uint32_t)];
+
+ copy_wc_page(A_tmp, A);
+ copy_wc_page(B_tmp, B);
+ for (int j = 0; j < PAGE_SIZE/4; j++)
+ if ((i + j) & 1)
+ A_tmp[j] = B_tmp[j];
+ else
+ B_tmp[j] = A_tmp[j];
+ memcpy(A, A_tmp, PAGE_SIZE);
+ memcpy(B, B_tmp, PAGE_SIZE);
+
igt_progress("Copying a<->b ", i, huge_object_size / PAGE_SIZE);
}
for (i = 0; i < huge_object_size / PAGE_SIZE; i++) {
- if (i & 1)
- igt_assert(memcmp(pattern_b, a + PAGE_SIZE*i, PAGE_SIZE) == 0);
- else
- igt_assert(memcmp(pattern_a, a + PAGE_SIZE*i, PAGE_SIZE) == 0);
+ uint32_t page[PAGE_SIZE/sizeof(uint32_t)];
+ copy_wc_page(page, a + PAGE_SIZE*i);
+ for (int j = 0; j < PAGE_SIZE/sizeof(uint32_t); j++)
+ if ((i + j) & 1)
+ igt_assert_eq_u32(page[j], ~(i + j));
+ else
+ igt_assert_eq_u32(page[j], i + j);
igt_progress("Checking a ", i, huge_object_size / PAGE_SIZE);
}
munmap(a, huge_object_size);
for (i = 0; i < huge_object_size / PAGE_SIZE; i++) {
- if (i & 1)
- igt_assert(memcmp(pattern_b, b + PAGE_SIZE*i, PAGE_SIZE) == 0);
- else
- igt_assert(memcmp(pattern_a, b + PAGE_SIZE*i, PAGE_SIZE) == 0);
+ uint32_t page[PAGE_SIZE/sizeof(uint32_t)];
+ copy_wc_page(page, b + PAGE_SIZE*i);
+ for (int j = 0; j < PAGE_SIZE/sizeof(uint32_t); j++)
+ if ((i + j) & 1)
+ igt_assert_eq_u32(page[j], ~(i + j));
+ else
+ igt_assert_eq_u32(page[j], i + j);
igt_progress("Checking b ", i, huge_object_size / PAGE_SIZE);
}
munmap(b, huge_object_size);
-
- free(pattern_a);
- free(pattern_b);
}
static void
@@ -695,18 +768,26 @@ igt_main
test_huge_copy(fd, -2, I915_TILING_NONE, I915_TILING_NONE);
igt_subtest("basic-small-copy-XY")
test_huge_copy(fd, -2, I915_TILING_X, I915_TILING_Y);
+ igt_subtest("basic-small-copy-odd")
+ test_huge_copy(fd, -2, -I915_TILING_X, -I915_TILING_Y);
igt_subtest("medium-copy")
test_huge_copy(fd, -1, I915_TILING_NONE, I915_TILING_NONE);
igt_subtest("medium-copy-XY")
test_huge_copy(fd, -1, I915_TILING_X, I915_TILING_Y);
+ igt_subtest("medium-copy-odd")
+ test_huge_copy(fd, -1, -I915_TILING_X, -I915_TILING_Y);
igt_subtest("big-copy")
test_huge_copy(fd, 0, I915_TILING_NONE, I915_TILING_NONE);
igt_subtest("big-copy-XY")
test_huge_copy(fd, 0, I915_TILING_X, I915_TILING_Y);
+ igt_subtest("big-copy-odd")
+ test_huge_copy(fd, 0, -I915_TILING_X, -I915_TILING_Y);
igt_subtest("huge-copy")
test_huge_copy(fd, 1, I915_TILING_NONE, I915_TILING_NONE);
igt_subtest("huge-copy-XY")
test_huge_copy(fd, 1, I915_TILING_X, I915_TILING_Y);
+ igt_subtest("huge-copy-odd")
+ test_huge_copy(fd, 1, -I915_TILING_X, -I915_TILING_Y);
igt_fixture
close(fd);