summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Romanick <ian.d.romanick@intel.com>2023-05-11 15:22:44 -0700
committerIan Romanick <ian.d.romanick@intel.com>2023-05-11 15:36:27 -0700
commita6b269b3d83eb7c827cb258c0171f0d36b9a6567 (patch)
treeecad31e26f00bb4b298f1729f33f71ad6aae50a5
parent90c3fd0c835241b73c7d17b7f1efd110fbdf6231 (diff)
WIP: intel/fs: Fix shift counts for 8- and 16-bit typeswip/fix-shift-counts
-rw-r--r--src/intel/compiler/brw_fs.cpp13
-rw-r--r--src/intel/compiler/brw_fs_nir.cpp54
2 files changed, 37 insertions, 30 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 59637d36fb2..bf9faee560b 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -2655,6 +2655,19 @@ fs_visitor::opt_algebraic()
break;
}
break;
+ case BRW_OPCODE_AND:
+ if (inst->src[1].file != IMM)
+ continue;
+
+ if (inst->src[0].file == IMM) {
+ inst->opcode = BRW_OPCODE_MOV;
+ inst->sources = 1;
+ inst->src[0].ud &= inst->src[1].ud;
+ inst->src[1] = reg_undef;
+ progress = true;
+ break;
+ }
+ break;
case BRW_OPCODE_OR:
if (inst->src[0].equals(inst->src[1]) ||
inst->src[1].is_zero()) {
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index a6a262379e3..8f45586a24d 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -1724,43 +1724,37 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr,
case nir_op_bitfield_insert:
unreachable("not reached: should have been lowered");
- /* For all shift operations:
+ /* With regards to implicit masking of the shift counts for 8- and 16-bit
+ * types, the PRMs are **incorrect**. They falsely state that on Gen9+ only
+ * the low bits of src1 matching the size of src0 (e.g., 4-bits for W or UW
+ * src0) are used. The Bspec (backed by data from experimentation) state
+ * that 0x3f is used for Q and UQ types, and 0x1f is used for **all** other
+ * types.
*
- * Gen4 - Gen7: After application of source modifiers, the low 5-bits of
- * src1 are used an unsigned value for the shift count.
- *
- * Gen8: As with earlier platforms, but for Q and UQ types on src0, the low
- * 6-bit of src1 are used.
- *
- * Gen9+: The low bits of src1 matching the size of src0 (e.g., 4-bits for
- * W or UW src0).
- *
- * The implication is that the following instruction will produce a
- * different result on Gen9+ than on previous platforms:
- *
- * shr(8) g4<1>UW g12<8,8,1>UW 0x0010UW
- *
- * where Gen9+ will shift by zero, and earlier platforms will shift by 16.
- *
- * This does not seem to be the case. Experimentally, it has been
- * determined that shifts of 16-bit values on Gen8 behave properly. Shifts
- * of 8-bit values on both Gen8 and Gen9 do not. Gen11+ lowers 8-bit
- * values, so those platforms were not tested. No features expose access
- * to 8- or 16-bit types on Gen7 or earlier, so those platforms were not
- * tested either. See
- * https://gitlab.freedesktop.org/mesa/crucible/-/merge_requests/76.
- *
- * This is part of the reason 8-bit values are lowered to 16-bit on all
- * platforms.
+ * The match the behavior expected for the NIR opcodes, explicit masks for
+ * 8- and 16-bit types must be added.
*/
case nir_op_ishl:
- bld.SHL(result, op[0], op[1]);
+ if (nir_dest_bit_size(instr->dest.dest) < 32) {
+ bld.AND(result, op[1], brw_imm_ud(nir_dest_bit_size(instr->dest.dest) - 1));
+ bld.SHL(result, op[0], result);
+ } else
+ bld.SHL(result, op[0], op[1]);
+
break;
case nir_op_ishr:
- bld.ASR(result, op[0], op[1]);
+ if (nir_dest_bit_size(instr->dest.dest) < 32) {
+ bld.AND(result, op[1], brw_imm_ud(nir_dest_bit_size(instr->dest.dest) - 1));
+ bld.ASR(result, op[0], result);
+ } else
+ bld.ASR(result, op[0], op[1]);
break;
case nir_op_ushr:
- bld.SHR(result, op[0], op[1]);
+ if (nir_dest_bit_size(instr->dest.dest) < 32) {
+ bld.AND(result, op[1], brw_imm_ud(nir_dest_bit_size(instr->dest.dest) - 1));
+ bld.SHR(result, op[0], result);
+ } else
+ bld.SHR(result, op[0], op[1]);
break;
case nir_op_urol: