summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2014-05-08 11:56:56 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2014-05-08 12:24:09 +0100
commit66d5f092d46120d97a0408dcd8fea0c0e086d7a8 (patch)
tree731ac45d8ff238d1197ee32c6f9f838e39a194aa
parente46ff3f8c25957d641420fef4d680d48ce0a365f (diff)
igt/gem_render_tiled_blits: Speed up by using the GPU to detile
Avoid accessing via the slow GTT to read back and compare the contents of each bo against expected results. It is much faster, on llc at least, to detile using the GPU and then copy to system memory for the compare. Before: IVB: time sudo ./gem_render_tiled_blits IGT-Version: 1.6-ge46ff3f (x86_64) (Linux: 3.15.0-rc3+ x86_64) Using 3072 1MiB buffers Verifying initialisation... Cyclic blits, forward... Cyclic blits, backward... Random blits... real 6m26.005s user 6m19.234s sys 0m2.414s PNV: time sudo ./gem_render_tiled_blits IGT-Version: 1.6-g8556f8a (i686) (Linux: 3.15.0-rc2+ i686) Using 768 1MiB buffers Verifying initialisation... Cyclic blits, forward... Cyclic blits, backward... Random blits... real 1m45.431s user 1m34.960s sys 0m4.624s Using pread: IVB: time sudo ./gem_render_tiled_blits IGT-Version: 1.6-ge46ff3f (x86_64) (Linux: 3.15.0-rc3+ x86_64) Using 3072 1MiB buffers Verifying initialisation... Cyclic blits, forward... Cyclic blits, backward... Random blits... real 0m14.717s user 0m3.699s sys 0m3.192s Using snoop: IVB: time sudo ./gem_render_tiled_blits IGT-Version: 1.6-ge46ff3f (x86_64) (Linux: 3.15.0-rc3+ x86_64) Using 3072 1MiB buffers Using a snoop linear buffer for comparisons Verifying initialisation... Cyclic blits, forward... Cyclic blits, backward... Random blits... real 0m13.774s user 0m3.900s sys 0m2.089s PNV: time sudo ./gem_render_tiled_blits IGT-Version: 1.6-g8556f8a (i686) (Linux: 3.15.0-rc2+ i686) Using 768 1MiB buffers Using a snoop linear buffer for comparisons Verifying initialisation... Cyclic blits, forward... Cyclic blits, backward... Random blits... real 0m20.831s user 0m4.384s sys 0m5.032s So roughly 10-30x faster depending on platform. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=78244 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r--tests/gem_render_tiled_blits.c50
1 files changed, 40 insertions, 10 deletions
diff --git a/tests/gem_render_tiled_blits.c b/tests/gem_render_tiled_blits.c
index 05c71b2a..a3bfd00f 100644
--- a/tests/gem_render_tiled_blits.c
+++ b/tests/gem_render_tiled_blits.c
@@ -60,15 +60,30 @@
#define SIZE (HEIGHT*STRIDE)
static igt_render_copyfunc_t render_copy;
+static drm_intel_bo *linear;
+static uint32_t data[WIDTH*HEIGHT];
+static int snoop;
static void
-check_bo(drm_intel_bo *bo, uint32_t val)
+check_bo(struct intel_batchbuffer *batch, struct igt_buf *buf, uint32_t val)
{
+ struct igt_buf tmp;
uint32_t *ptr;
int i;
- do_or_die(drm_intel_gem_bo_map_gtt(bo));
- ptr = bo->virtual;
+ tmp.bo = linear;
+ tmp.stride = STRIDE;
+ tmp.tiling = I915_TILING_NONE;
+ tmp.size = SIZE;
+
+ render_copy(batch, NULL, buf, 0, 0, WIDTH, HEIGHT, &tmp, 0, 0);
+ if (snoop) {
+ do_or_die(dri_bo_map(linear, 0));
+ ptr = linear->virtual;
+ } else {
+ do_or_die(drm_intel_bo_get_subdata(linear, 0, sizeof(data), data));
+ ptr = data;
+ }
for (i = 0; i < WIDTH*HEIGHT; i++) {
if (ptr[i] != val) {
fprintf(stderr, "Expected 0x%08x, found 0x%08x "
@@ -78,7 +93,8 @@ check_bo(drm_intel_bo *bo, uint32_t val)
}
val++;
}
- drm_intel_gem_bo_unmap_gtt(bo);
+ if (ptr != data)
+ dri_bo_unmap(linear);
}
int main(int argc, char **argv)
@@ -89,22 +105,30 @@ int main(int argc, char **argv)
struct igt_buf *buf;
uint32_t start = 0;
int i, j, fd, count;
+ uint32_t devid;
igt_simple_init();
igt_skip_on_simulation();
fd = drm_open_any();
+ devid = intel_get_drm_devid(fd);
- render_copy = igt_get_render_copyfunc(intel_get_drm_devid(fd));
+ render_copy = igt_get_render_copyfunc(devid);
if (render_copy == NULL) {
printf("no render-copy function, doing nothing\n");
return 77;
}
+ snoop = 1;
+ if (IS_GEN2(devid)) /* chipset only handles cached -> uncached */
+ snoop = 0;
+ if (IS_BROADWATER(devid) || IS_CRESTLINE(devid)) /* snafu */
+ snoop = 0;
+
bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
drm_intel_bufmgr_gem_set_vma_cache_size(bufmgr, 32);
- batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+ batch = intel_batchbuffer_alloc(bufmgr, devid);
count = 0;
if (argc > 1)
@@ -123,6 +147,12 @@ int main(int argc, char **argv)
printf("Using %d 1MiB buffers\n", count);
+ linear = drm_intel_bo_alloc(bufmgr, "linear", WIDTH*HEIGHT*4, 0);
+ if (snoop) {
+ gem_set_caching(fd, linear->handle, 1);
+ printf("Using a snoop linear buffer for comparisons\n");
+ }
+
buf = malloc(sizeof(*buf)*count);
start_val = malloc(sizeof(*start_val)*count);
@@ -149,7 +179,7 @@ int main(int argc, char **argv)
printf("Verifying initialisation...\n");
for (i = 0; i < count; i++)
- check_bo(buf[i].bo, start_val[i]);
+ check_bo(batch, &buf[i], start_val[i]);
printf("Cyclic blits, forward...\n");
for (i = 0; i < count * 4; i++) {
@@ -160,7 +190,7 @@ int main(int argc, char **argv)
start_val[dst] = start_val[src];
}
for (i = 0; i < count; i++)
- check_bo(buf[i].bo, start_val[i]);
+ check_bo(batch, &buf[i], start_val[i]);
printf("Cyclic blits, backward...\n");
for (i = 0; i < count * 4; i++) {
@@ -171,7 +201,7 @@ int main(int argc, char **argv)
start_val[dst] = start_val[src];
}
for (i = 0; i < count; i++)
- check_bo(buf[i].bo, start_val[i]);
+ check_bo(batch, &buf[i], start_val[i]);
printf("Random blits...\n");
for (i = 0; i < count * 4; i++) {
@@ -185,7 +215,7 @@ int main(int argc, char **argv)
start_val[dst] = start_val[src];
}
for (i = 0; i < count; i++)
- check_bo(buf[i].bo, start_val[i]);
+ check_bo(batch, &buf[i], start_val[i]);
return 0;
}