Exercise the whole aperture with tiled blits

After full-gtt, gem_tiled_blits doesn't allocate enough to force eviction. So query the total aperture and accommodate. Also introduce a similar test that utilizes fences rather than use the BLT to perform the tiling and detiling. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
author: Chris Wilson <chris@chris-wilson.co.uk> 2011-02-01 10:53:57 +0000
committer: Chris Wilson <chris@chris-wilson.co.uk> 2011-02-01 10:56:46 +0000
commit: 371f87f2d825900484e34a8ec78272b9e62cbc02 (patch)
tree: 7dea0d2304d2027c13ccf945fb1811cf999d27f0
parent: 5fc09c82d40113a7ebf67b7dbc374c181d9fafe3 (diff)
6 files changed, 249 insertions, 58 deletions
diff --git a/.gitignore b/.gitignore
index 19c8f5e9..73e32eba 100644
--- a/.gitignore
+++ b/.gitignore
@@ -46,6 +46,7 @@ tests/gem_pwrite
 tests/gem_readwrite
 tests/gem_ringfill
 tests/gem_tiled_blits
+tests/gem_tiled_fence_blits
 tests/gem_tiled_pread
 tests/gem_bad_address
 tests/gem_bad_batch
diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
index 54154692..449d46de 100644
--- a/lib/intel_batchbuffer.c
+++ b/lib/intel_batchbuffer.c
@@ -46,15 +46,10 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch)
 		batch->bo = NULL;
 	}
 
-	if (!batch->buffer)
-		batch->buffer = malloc(BATCH_SZ);
-
 	batch->bo = drm_intel_bo_alloc(batch->bufmgr, "batchbuffer",
 				       BATCH_SZ, 4096);
 
-	batch->map = batch->buffer;
-	batch->size = BATCH_SZ;
-	batch->ptr = batch->map;
+	batch->ptr = batch->buffer;
 }
 
 struct intel_batchbuffer *
@@ -72,8 +67,6 @@ intel_batchbuffer_alloc(drm_intel_bufmgr *bufmgr, uint32_t devid)
 void
 intel_batchbuffer_free(struct intel_batchbuffer *batch)
 {
-	free (batch->buffer);
-
 	drm_intel_bo_unreference(batch->bo);
 	batch->bo = NULL;
 	free(batch);
@@ -82,7 +75,7 @@ intel_batchbuffer_free(struct intel_batchbuffer *batch)
 void
 intel_batchbuffer_flush(struct intel_batchbuffer *batch)
 {
-	unsigned int used = batch->ptr - batch->map;
+	unsigned int used = batch->ptr - batch->buffer;
 	int ring;
 	int ret;
 
@@ -93,17 +86,16 @@ intel_batchbuffer_flush(struct intel_batchbuffer *batch)
 	if ((used & 4) == 0) {
 		*(uint32_t *) (batch->ptr) = 0; /* noop */
 		batch->ptr += 4;
-		used = batch->ptr - batch->map;
 	}
 
 	/* Mark the end of the buffer. */
-	*(uint32_t *) (batch->ptr) = MI_BATCH_BUFFER_END; /* noop */
+	*(uint32_t *)(batch->ptr) = MI_BATCH_BUFFER_END; /* noop */
 	batch->ptr += 4;
-	used = batch->ptr - batch->map;
+	used = batch->ptr - batch->buffer;
 
-	drm_intel_bo_subdata(batch->bo, 0, used, batch->buffer);
+	ret = drm_intel_bo_subdata(batch->bo, 0, used, batch->buffer);
+	assert(ret == 0);
 
-	batch->map = NULL;
 	batch->ptr = NULL;
 
 	ring = 0;
@@ -125,12 +117,13 @@ intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
 {
 	int ret;
 
-	if (batch->ptr - batch->map > batch->bo->size)
-		printf("bad relocation ptr %p map %p offset %d size %ld\n",
-		       batch->ptr, batch->map, batch->ptr - batch->map,
-		       batch->bo->size);
+	if (batch->ptr - batch->buffer > BATCH_SZ)
+		printf("bad relocation ptr %p map %p offset %d size %d\n",
+		       batch->ptr, batch->buffer,
+		       (int)(batch->ptr - batch->buffer),
+		       BATCH_SZ);
 
-	ret = drm_intel_bo_emit_reloc(batch->bo, batch->ptr - batch->map,
+	ret = drm_intel_bo_emit_reloc(batch->bo, batch->ptr - batch->buffer,
 				      buffer, delta,
 				      read_domains, write_domain);
 	intel_batchbuffer_emit_dword(batch, buffer->offset + delta);
diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h
index 84f75761..a8271766 100644
--- a/lib/intel_batchbuffer.h
+++ b/lib/intel_batchbuffer.h
@@ -15,18 +15,8 @@ struct intel_batchbuffer
 
 	drm_intel_bo *bo;
 
-	uint8_t *buffer;
-
-	uint8_t *map;
+	uint8_t buffer[BATCH_SZ];
 	uint8_t *ptr;
-
-	/* debug stuff */
-	struct {
-		uint8_t *start_ptr;
-		unsigned int total;
-	} emit;
-
-	unsigned int size;
 };
 
 struct intel_batchbuffer *intel_batchbuffer_alloc(drm_intel_bufmgr *bufmgr,
@@ -56,14 +46,13 @@ void intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
 static inline int
 intel_batchbuffer_space(struct intel_batchbuffer *batch)
 {
-	return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map);
+	return (BATCH_SZ - BATCH_RESERVED) - (batch->ptr - batch->buffer);
 }
 
 
 static inline void
 intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, uint32_t dword)
 {
-	assert(batch->map);
 	assert(intel_batchbuffer_space(batch) >= 4);
 	*(uint32_t *) (batch->ptr) = dword;
 	batch->ptr += 4;
@@ -73,7 +62,7 @@ static inline void
 intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
                                 unsigned int sz)
 {
-	assert(sz < batch->size - 8);
+	assert(sz < BATCH_SZ - 8);
 	if (intel_batchbuffer_space(batch) < sz)
 		intel_batchbuffer_flush(batch);
 }
@@ -84,9 +73,6 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
 
 #define BEGIN_BATCH(n) do {						\
 	intel_batchbuffer_require_space(batch, (n)*4);			\
-	assert(batch->emit.start_ptr == NULL);				\
-	batch->emit.total = (n) * 4;					\
-	batch->emit.start_ptr = batch->ptr;				\
 } while (0)
 
 #define OUT_BATCH(d) intel_batchbuffer_emit_dword(batch, d)
@@ -98,15 +84,6 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
 } while (0)
 
 #define ADVANCE_BATCH() do {						\
-	unsigned int _n = batch->ptr - batch->emit.start_ptr;		\
-	assert(batch->emit.start_ptr != NULL);				\
-	if (_n != batch->emit.total) {					\
-		fprintf(stderr,						\
-			"ADVANCE_BATCH: %d of %d dwords emitted\n",	\
-			_n, batch->emit.total);				\
-		abort();						\
-	}								\
-	batch->emit.start_ptr = NULL;					\
 } while(0)
 
 
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 128106b6..b5ca62b2 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -12,6 +12,7 @@ TESTS = getversion \
 	gem_pread_after_blit \
 	gem_tiled_pread \
 	gem_tiled_blits \
+	gem_tiled_fence_blits \
 	gem_largeobject \
 	gem_bad_address \
 	gem_bad_blit \
diff --git a/tests/gem_tiled_blits.c b/tests/gem_tiled_blits.c
index b7f5d49c..95764ece 100644
--- a/tests/gem_tiled_blits.c
+++ b/tests/gem_tiled_blits.c
@@ -61,6 +61,16 @@ static drm_intel_bufmgr *bufmgr;
 struct intel_batchbuffer *batch;
 static int width = 512, height = 512;
 
+static uint64_t
+gem_aperture_size(int fd)
+{
+	struct drm_i915_gem_get_aperture aperture;
+
+	aperture.aper_size = 512*1024*1024;
+	(void)drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
+	return aperture.aper_size;
+}
+
 static drm_intel_bo *
 create_bo(uint32_t start_val)
 {
@@ -79,10 +89,8 @@ create_bo(uint32_t start_val)
 	/* Fill the BO with dwords starting at start_val */
 	drm_intel_bo_map(linear_bo, 1);
 	linear = linear_bo->virtual;
-
-	for (i = 0; i < 1024 * 1024 / 4; i++) {
+	for (i = 0; i < 1024 * 1024 / 4; i++)
 		linear[i] = start_val++;
-	}
 	drm_intel_bo_unmap(linear_bo);
 
 	intel_copy_bo (batch, bo, linear_bo, width, height);
@@ -122,20 +130,23 @@ check_bo(drm_intel_bo *bo, uint32_t start_val)
 
 int main(int argc, char **argv)
 {
-	int fd;
-	int bo_count = 768; /* 768MB of objects */
-	drm_intel_bo *bo[bo_count];
-	uint32_t bo_start_val[bo_count];
+	drm_intel_bo *bo[4096];
+	uint32_t bo_start_val[4096];
 	uint32_t start = 0;
-	int i;
+	int i, fd, count;
 
 	fd = drm_open_any();
 
+	count = 3 * gem_aperture_size(fd) / (1024*1024) / 2;
+	count += (count & 1) == 0;
+	printf("Using %d 1MiB buffers\n", count);
+	assert(count <= 4096);
+
 	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
 	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
 	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
 
-	for (i = 0; i < bo_count; i++) {
+	for (i = 0; i < count; i++) {
 		bo[i] = create_bo(start);
 		bo_start_val[i] = start;
 
@@ -147,9 +158,9 @@ int main(int argc, char **argv)
 		start += 1024 * 1024 / 4;
 	}
 
-	for (i = 0; i < bo_count * 4; i++) {
-		int src = random() % bo_count;
-		int dst = random() % bo_count;
+	for (i = 0; i < count * 4; i++) {
+		int src = random() % count;
+		int dst = random() % count;
 
 		if (src == dst)
 			continue;
@@ -163,7 +174,7 @@ int main(int argc, char **argv)
 		*/
 	}
 
-	for (i = 0; i < bo_count; i++) {
+	for (i = 0; i < count; i++) {
 		/*
 		printf("check %d\n", i);
 		*/
diff --git a/tests/gem_tiled_fence_blits.c b/tests/gem_tiled_fence_blits.c
new file mode 100644
index 00000000..bc2462f7
--- /dev/null
+++ b/tests/gem_tiled_fence_blits.c
@@ -0,0 +1,208 @@
+/*
+ * Copyright © 2009,2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file gem_tiled_fence_blits.c
+ *
+ * This is a test of doing many tiled blits, with a working set
+ * larger than the aperture size.
+ *
+ * The goal is to catch a couple types of failure;
+ * - Fence management problems on pre-965.
+ * - A17 or L-shaped memory tiling workaround problems in acceleration.
+ *
+ * The model is to fill a collection of 1MB objects in a way that can't trip
+ * over A6 swizzling -- upload data to a non-tiled object, blit to the tiled
+ * object.  Then, copy the 1MB objects randomly between each other for a while.
+ * Finally, download their data through linear objects again and see what
+ * resulted.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+static int width = 512, height = 512;
+static uint32_t linear[1024*1024/4];
+
+static uint64_t
+gem_aperture_size(int fd)
+{
+	struct drm_i915_gem_get_aperture aperture;
+
+	aperture.aper_size = 512*1024*1024;
+	(void)drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
+	return aperture.aper_size;
+}
+
+static void
+gem_write(int fd, drm_intel_bo *bo, const void *buf, int size)
+{
+	struct drm_i915_gem_pwrite pwrite;
+	int ret;
+
+	pwrite.handle = bo->handle;
+	pwrite.offset = 0;
+	pwrite.size = size;
+	pwrite.data_ptr = (uintptr_t)buf;
+	ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
+	assert(ret == 0);
+}
+
+static void
+gem_read(int fd, drm_intel_bo *bo, void *buf, int size)
+{
+	struct drm_i915_gem_pread pread;
+	int ret;
+
+	pread.handle = bo->handle;
+	pread.offset = 0;
+	pread.size = size;
+	pread.data_ptr = (uintptr_t)buf;
+	ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_PREAD, &pread);
+	assert(ret == 0);
+}
+
+static drm_intel_bo *
+create_bo(int fd, uint32_t start_val)
+{
+	drm_intel_bo *bo;
+	uint32_t tiling = I915_TILING_X;
+	int ret, i;
+
+	bo = drm_intel_bo_alloc(bufmgr, "tiled bo", 1024 * 1024, 4096);
+	ret = drm_intel_bo_set_tiling(bo, &tiling, width * 4);
+	assert(ret == 0);
+	assert(tiling == I915_TILING_X);
+
+	/* Fill the BO with dwords starting at start_val */
+	for (i = 0; i < 1024 * 1024 / 4; i++)
+		linear[i] = start_val++;
+
+	gem_write(fd, bo, linear, sizeof(linear));
+
+	return bo;
+}
+
+static void
+check_bo(int fd, drm_intel_bo *bo, uint32_t start_val)
+{
+	int i;
+
+	gem_read(fd, bo, linear, sizeof(linear));
+
+	for (i = 0; i < 1024 * 1024 / 4; i++) {
+		if (linear[i] != start_val) {
+			fprintf(stderr, "Expected 0x%08x, found 0x%08x "
+				"at offset 0x%08x\n",
+				start_val, linear[i], i * 4);
+			abort();
+		}
+		start_val++;
+	}
+}
+
+int main(int argc, char **argv)
+{
+	drm_intel_bo *bo[4096];
+	uint32_t bo_start_val[4096];
+	uint32_t start = 0;
+	int fd, i, count;
+
+	fd = drm_open_any();
+	count = 3 * gem_aperture_size(fd) / (1024*1024) / 2;
+	count += (count & 1) == 0;
+	printf("Using %d 1MiB buffers\n", count);
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+
+	for (i = 0; i < count; i++) {
+		bo[i] = create_bo(fd, start);
+		bo_start_val[i] = start;
+
+		/*
+		printf("Creating bo %d\n", i);
+		check_bo(bo[i], bo_start_val[i]);
+		*/
+
+		start += 1024 * 1024 / 4;
+	}
+
+	for (i = 0; i < count; i++) {
+		int src = count - i - 1;
+		intel_copy_bo(batch, bo[i], bo[src], width, height);
+		bo_start_val[i] = bo_start_val[src];
+	}
+
+	for (i = 0; i < count * 4; i++) {
+		int src = random() % count;
+		int dst = random() % count;
+
+		if (src == dst)
+			continue;
+
+		intel_copy_bo(batch, bo[dst], bo[src], width, height);
+		bo_start_val[dst] = bo_start_val[src];
+
+		/*
+		check_bo(bo[dst], bo_start_val[dst]);
+		printf("%d: copy bo %d to %d\n", i, src, dst);
+		*/
+	}
+
+	for (i = 0; i < count; i++) {
+		/*
+		printf("check %d\n", i);
+		*/
+		check_bo(fd, bo[i], bo_start_val[i]);
+
+		drm_intel_bo_unreference(bo[i]);
+		bo[i] = NULL;
+	}
+
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	return 0;
+}
author	Chris Wilson <chris@chris-wilson.co.uk>	2011-02-01 10:53:57 +0000
committer	Chris Wilson <chris@chris-wilson.co.uk>	2011-02-01 10:56:46 +0000
commit	371f87f2d825900484e34a8ec78272b9e62cbc02 (patch)
tree	7dea0d2304d2027c13ccf945fb1811cf999d27f0
parent	5fc09c82d40113a7ebf67b7dbc374c181d9fafe3 (diff)