summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNeil Roberts <neil@linux.intel.com>2015-05-28 15:27:31 +0100
committerEmil Velikov <emil.l.velikov@gmail.com>2015-06-12 16:22:51 +0100
commit9b8d49278183abb05861490f99a91fe6e433932b (patch)
tree49339cab1ba726bf75063058fad76ddf1ef4cd28
parentef9020570448b520ecdf7d088003130e36211589 (diff)
i965: Don't use a temporary when generating an indirect sample
Previously when generating the send instruction for a sample instruction with an indirect sampler it would use the destination register as a temporary store. This breaks when used in combination with the opt_sampler_eot optimisation because that forces the destination to be null. This patch fixes that by avoiding the temp register altogether. The reason the temporary register was needed was because it was trying to ensure the binding table index doesn't overflow a byte by and'ing it with 0xff. The result is then or'd with samper_index<<8. This patch instead just and's the whole thing by 0xfff. This will ensure that a bogus sampler index won't overflow into the rest of the message descriptor but unlike the previous code it won't ensure that the binding table index doesn't overflow into the sampler index. It doesn't seem like that should matter very much though because if the shader is generating a bogus sampler index then it's going to just get garbage out either way. Instead of doing sampler_index<<8|(sampler_index+base_table_index) the new code avoids one operation by doing sampler_index*0x101+base_table_index which should be equivalent. However if we wanted to avoid the multiply for some reason we could do this by adding an extra or instruction still without needing the temporary register. This fixes a number of Piglit tests on Skylake that were using indirect samplers such as: spec@arb_gpu_shader5@execution@sampler_array_indexing@fs-simple Reviewed-by: Matt Turner <mattst88@gmail.com> Reviewed-by: Chris Forbes <chrisf@ijw.co.nz> Acked-by: Ben Widawsky <ben@bwidawsk.net> Tested-by: Anuj Phogat <anuj.phogat@gmail.com> (cherry picked from commit 6c846dc57b1d6f3e015a604dba1976f96c4be9e9)
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_generator.cpp17
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_generator.cpp17
2 files changed, 8 insertions, 26 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 0be0f86655..ea46b1a191 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -779,27 +779,18 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
brw_mark_surface_used(prog_data, sampler + base_binding_table_index);
} else {
/* Non-const sampler index */
- /* Note: this clobbers `dst` as a temporary before emitting the send */
struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
- struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD));
-
struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD));
brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_set_default_access_mode(p, BRW_ALIGN_1);
- /* Some care required: `sampler` and `temp` may alias:
- * addr = sampler & 0xff
- * temp = (sampler << 8) & 0xf00
- * addr = addr | temp
- */
- brw_ADD(p, addr, sampler_reg, brw_imm_ud(base_binding_table_index));
- brw_SHL(p, temp, sampler_reg, brw_imm_ud(8u));
- brw_AND(p, temp, temp, brw_imm_ud(0x0f00));
- brw_AND(p, addr, addr, brw_imm_ud(0x0ff));
- brw_OR(p, addr, addr, temp);
+ /* addr = ((sampler * 0x101) + base_binding_table_index) & 0xfff */
+ brw_MUL(p, addr, sampler_reg, brw_imm_ud(0x101));
+ brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index));
+ brw_AND(p, addr, addr, brw_imm_ud(0xfff));
brw_pop_insn_state(p);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index ef77b8df05..9699607ed2 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -398,27 +398,18 @@ vec4_generator::generate_tex(vec4_instruction *inst,
brw_mark_surface_used(&prog_data->base, sampler + base_binding_table_index);
} else {
/* Non-constant sampler index. */
- /* Note: this clobbers `dst` as a temporary before emitting the send */
struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
- struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD));
-
struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD));
brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_set_default_access_mode(p, BRW_ALIGN_1);
- /* Some care required: `sampler` and `temp` may alias:
- * addr = sampler & 0xff
- * temp = (sampler << 8) & 0xf00
- * addr = addr | temp
- */
- brw_ADD(p, addr, sampler_reg, brw_imm_ud(base_binding_table_index));
- brw_SHL(p, temp, sampler_reg, brw_imm_ud(8u));
- brw_AND(p, temp, temp, brw_imm_ud(0x0f00));
- brw_AND(p, addr, addr, brw_imm_ud(0x0ff));
- brw_OR(p, addr, addr, temp);
+ /* addr = ((sampler * 0x101) + base_binding_table_index) & 0xfff */
+ brw_MUL(p, addr, sampler_reg, brw_imm_ud(0x101));
+ brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index));
+ brw_AND(p, addr, addr, brw_imm_ud(0xfff));
brw_pop_insn_state(p);