summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>2016-09-07 17:32:49 +0100
committerLionel Landwerlin <lionel.g.landwerlin@intel.com>2016-09-21 12:01:06 +0300
commitf2d43b44d7c8168228ca1fe85023dffb3a933dca (patch)
treeba58533ac70d0f18c72758f86636e2b0532524ce
parent09394ee6cfe9df2c99373963794c60678da08b39 (diff)
anv: allocator: correct scratch space for haswell
This reproduces this commit : commit 2213ffdb4bb79856f0556bdf2bfd4bdf57720232 Author: Kenneth Graunke <kenneth@whitecape.org> Date: Mon Jun 6 21:37:34 2016 -0700 i965: Allocate scratch space for the maximum number of compute threads. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
-rw-r--r--src/intel/vulkan/anv_allocator.c22
1 files changed, 21 insertions, 1 deletions
diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c
index f694aee92d..a59ad3cd6f 100644
--- a/src/intel/vulkan/anv_allocator.c
+++ b/src/intel/vulkan/anv_allocator.c
@@ -926,13 +926,33 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool,
struct anv_physical_device *physical_device =
&device->instance->physicalDevice;
+
+ /* WaCSScratchSize:hsw
+ *
+ * Haswell's scratch space address calculation appears to be sparse
+ * rather than tightly packed. The Thread ID has bits indicating which
+ * subslice, EU within a subslice, and thread within an EU it is.
+ * There's a maximum of two slices and two subslices, so these can be
+ * stored with a single bit. Even though there are only 10 EUs per
+ * subslice, this is stored in 4 bits, so there's an effective maximum
+ * value of 16 EUs. Similarly, although there are only 7 threads per EU,
+ * this is stored in a 3 bit number, giving an effective maximum value
+ * of 8 threads per EU.
+ *
+ * This means that we need to use 16 * 8 instead of 10 * 7 for the
+ * number of threads per subslice.
+ */
+ const unsigned subslices = MAX2(physical_device->subslice_total, 1);
+ const unsigned scratch_ids_per_subslice =
+ device->info.is_haswell ? 16 * 8 : physical_device->max_cs_threads;
+
uint32_t max_threads[] = {
[MESA_SHADER_VERTEX] = physical_device->max_vs_threads,
[MESA_SHADER_TESS_CTRL] = physical_device->max_hs_threads,
[MESA_SHADER_TESS_EVAL] = physical_device->max_ds_threads,
[MESA_SHADER_GEOMETRY] = physical_device->max_gs_threads,
[MESA_SHADER_FRAGMENT] = physical_device->max_wm_threads,
- [MESA_SHADER_COMPUTE] = physical_device->max_cs_threads,
+ [MESA_SHADER_COMPUTE] = scratch_ids_per_subslice * subslices,
};
size = per_thread_scratch * max_threads[stage];