WIP: intel/fs: Fix shift counts for 8- and 16-bit typeswip/fix-shift-counts

author: Ian Romanick <ian.d.romanick@intel.com> 2023-05-11 15:22:44 -0700
committer: Ian Romanick <ian.d.romanick@intel.com> 2023-05-11 15:36:27 -0700
commit: a6b269b3d83eb7c827cb258c0171f0d36b9a6567 (patch)
tree: ecad31e26f00bb4b298f1729f33f71ad6aae50a5
parent: 90c3fd0c835241b73c7d17b7f1efd110fbdf6231 (diff)
2 files changed, 37 insertions, 30 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 59637d36fb2..bf9faee560b 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -2655,6 +2655,19 @@ fs_visitor::opt_algebraic()
             break;
          }
          break;
+      case BRW_OPCODE_AND:
+         if (inst->src[1].file != IMM)
+            continue;
+
+         if (inst->src[0].file == IMM) {
+            inst->opcode = BRW_OPCODE_MOV;
+            inst->sources = 1;
+            inst->src[0].ud &= inst->src[1].ud;
+            inst->src[1] = reg_undef;
+            progress = true;
+            break;
+         }
+         break;
       case BRW_OPCODE_OR:
          if (inst->src[0].equals(inst->src[1]) ||
              inst->src[1].is_zero()) {
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index a6a262379e3..8f45586a24d 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -1724,43 +1724,37 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr,
    case nir_op_bitfield_insert:
       unreachable("not reached: should have been lowered");
 
-   /* For all shift operations:
+   /* With regards to implicit masking of the shift counts for 8- and 16-bit
+    * types, the PRMs are **incorrect**. They falsely state that on Gen9+ only
+    * the low bits of src1 matching the size of src0 (e.g., 4-bits for W or UW
+    * src0) are used. The Bspec (backed by data from experimentation) state
+    * that 0x3f is used for Q and UQ types, and 0x1f is used for **all** other
+    * types.
     *
-    * Gen4 - Gen7: After application of source modifiers, the low 5-bits of
-    * src1 are used an unsigned value for the shift count.
-    *
-    * Gen8: As with earlier platforms, but for Q and UQ types on src0, the low
-    * 6-bit of src1 are used.
-    *
-    * Gen9+: The low bits of src1 matching the size of src0 (e.g., 4-bits for
-    * W or UW src0).
-    *
-    * The implication is that the following instruction will produce a
-    * different result on Gen9+ than on previous platforms:
-    *
-    *    shr(8)    g4<1>UW    g12<8,8,1>UW    0x0010UW
-    *
-    * where Gen9+ will shift by zero, and earlier platforms will shift by 16.
-    *
-    * This does not seem to be the case.  Experimentally, it has been
-    * determined that shifts of 16-bit values on Gen8 behave properly.  Shifts
-    * of 8-bit values on both Gen8 and Gen9 do not.  Gen11+ lowers 8-bit
-    * values, so those platforms were not tested.  No features expose access
-    * to 8- or 16-bit types on Gen7 or earlier, so those platforms were not
-    * tested either.  See
-    * https://gitlab.freedesktop.org/mesa/crucible/-/merge_requests/76.
-    *
-    * This is part of the reason 8-bit values are lowered to 16-bit on all
-    * platforms.
+    * The match the behavior expected for the NIR opcodes, explicit masks for
+    * 8- and 16-bit types must be added.
     */
    case nir_op_ishl:
-      bld.SHL(result, op[0], op[1]);
+      if (nir_dest_bit_size(instr->dest.dest) < 32) {
+         bld.AND(result, op[1], brw_imm_ud(nir_dest_bit_size(instr->dest.dest) - 1));
+         bld.SHL(result, op[0], result);
+      } else
+         bld.SHL(result, op[0], op[1]);
+
       break;
    case nir_op_ishr:
-      bld.ASR(result, op[0], op[1]);
+      if (nir_dest_bit_size(instr->dest.dest) < 32) {
+         bld.AND(result, op[1], brw_imm_ud(nir_dest_bit_size(instr->dest.dest) - 1));
+         bld.ASR(result, op[0], result);
+      } else
+         bld.ASR(result, op[0], op[1]);
       break;
    case nir_op_ushr:
-      bld.SHR(result, op[0], op[1]);
+      if (nir_dest_bit_size(instr->dest.dest) < 32) {
+         bld.AND(result, op[1], brw_imm_ud(nir_dest_bit_size(instr->dest.dest) - 1));
+         bld.SHR(result, op[0], result);
+      } else
+         bld.SHR(result, op[0], op[1]);
       break;
 
    case nir_op_urol:
author	Ian Romanick <ian.d.romanick@intel.com>	2023-05-11 15:22:44 -0700
committer	Ian Romanick <ian.d.romanick@intel.com>	2023-05-11 15:36:27 -0700
commit	a6b269b3d83eb7c827cb258c0171f0d36b9a6567 (patch)
tree	ecad31e26f00bb4b298f1729f33f71ad6aae50a5
parent	90c3fd0c835241b73c7d17b7f1efd110fbdf6231 (diff)