summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNeil Roberts <nroberts@igalia.com>2018-05-18 13:39:13 +0200
committerDylan Baker <dylan@pnwbakers.com>2018-07-05 10:00:13 -0700
commit7333112ed0fa5bcd83dfa0d3875255031d72e277 (patch)
treee9c95a0667af06790043ba20de4f2f7ba31f0145
parent83716106b9f37b13277e513d8285de9b2440e779 (diff)
i965: Fix output register sizes when variable ranges are interleaved18.1-proposed
In 6f5abf31466aed this code was fixed to calculate the maximum size of an attribute in a seperate pass and then allocate the registers to that size. However this wasn’t taking into account ranges that overlap but don’t have the same starting location. For example: layout(location = 0, component = 0) out float a[4]; layout(location = 2, component = 1) out float b[4]; Previously, if ‘a’ was processed first then it would allocate a register of size 4 for location 0 and it wouldn’t allocate another register for location 2 because it would already be covered by the range of 0. Then if something tries to write to b[2] it would try to write past the end of the register allocated for ‘a’ and it would hit an assert. This patch changes it to scan for any overlapping ranges that start within each range to calculate the maximum extent and allocate that instead. Fixed Piglit’s arb_enhanced_layouts/execution/component-layout/ vs-fs-array-interleave-range.shader_test Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com> Fixes: 6f5abf31466 "i965: Fix output register sizes when multiple variables share a slot." (cherry picked from commit 2d5ddbe960f7c62a8f00d5e800925865f115970f)
-rw-r--r--src/intel/compiler/brw_fs_nir.cpp25
1 files changed, 18 insertions, 7 deletions
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index 9698a0111e..47e26f8ec4 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -67,14 +67,25 @@ fs_visitor::nir_setup_outputs()
vec4s[loc] = MAX2(vec4s[loc], var_vec4s);
}
- nir_foreach_variable(var, &nir->outputs) {
- const int loc = var->data.driver_location;
- if (outputs[loc].file == BAD_FILE) {
- fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_F, 4 * vec4s[loc]);
- for (unsigned i = 0; i < vec4s[loc]; i++) {
- outputs[loc + i] = offset(reg, bld, 4 * i);
- }
+ for (unsigned loc = 0; loc < ARRAY_SIZE(vec4s);) {
+ if (vec4s[loc] == 0) {
+ loc++;
+ continue;
}
+
+ unsigned reg_size = vec4s[loc];
+
+ /* Check if there are any ranges that start within this range and extend
+ * past it. If so, include them in this allocation.
+ */
+ for (unsigned i = 1; i < reg_size; i++)
+ reg_size = MAX2(vec4s[i + loc] + i, reg_size);
+
+ fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_F, 4 * reg_size);
+ for (unsigned i = 0; i < reg_size; i++)
+ outputs[loc + i] = offset(reg, bld, 4 * i);
+
+ loc += reg_size;
}
}