diff options
author | Ian Romanick <ian.d.romanick@intel.com> | 2023-05-11 15:22:44 -0700 |
---|---|---|
committer | Ian Romanick <ian.d.romanick@intel.com> | 2023-05-11 15:36:27 -0700 |
commit | a6b269b3d83eb7c827cb258c0171f0d36b9a6567 (patch) | |
tree | ecad31e26f00bb4b298f1729f33f71ad6aae50a5 | |
parent | 90c3fd0c835241b73c7d17b7f1efd110fbdf6231 (diff) |
WIP: intel/fs: Fix shift counts for 8- and 16-bit typeswip/fix-shift-counts
-rw-r--r-- | src/intel/compiler/brw_fs.cpp | 13 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_nir.cpp | 54 |
2 files changed, 37 insertions, 30 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 59637d36fb2..bf9faee560b 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -2655,6 +2655,19 @@ fs_visitor::opt_algebraic() break; } break; + case BRW_OPCODE_AND: + if (inst->src[1].file != IMM) + continue; + + if (inst->src[0].file == IMM) { + inst->opcode = BRW_OPCODE_MOV; + inst->sources = 1; + inst->src[0].ud &= inst->src[1].ud; + inst->src[1] = reg_undef; + progress = true; + break; + } + break; case BRW_OPCODE_OR: if (inst->src[0].equals(inst->src[1]) || inst->src[1].is_zero()) { diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index a6a262379e3..8f45586a24d 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -1724,43 +1724,37 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, case nir_op_bitfield_insert: unreachable("not reached: should have been lowered"); - /* For all shift operations: + /* With regards to implicit masking of the shift counts for 8- and 16-bit + * types, the PRMs are **incorrect**. They falsely state that on Gen9+ only + * the low bits of src1 matching the size of src0 (e.g., 4-bits for W or UW + * src0) are used. The Bspec (backed by data from experimentation) state + * that 0x3f is used for Q and UQ types, and 0x1f is used for **all** other + * types. * - * Gen4 - Gen7: After application of source modifiers, the low 5-bits of - * src1 are used an unsigned value for the shift count. - * - * Gen8: As with earlier platforms, but for Q and UQ types on src0, the low - * 6-bit of src1 are used. - * - * Gen9+: The low bits of src1 matching the size of src0 (e.g., 4-bits for - * W or UW src0). - * - * The implication is that the following instruction will produce a - * different result on Gen9+ than on previous platforms: - * - * shr(8) g4<1>UW g12<8,8,1>UW 0x0010UW - * - * where Gen9+ will shift by zero, and earlier platforms will shift by 16. - * - * This does not seem to be the case. Experimentally, it has been - * determined that shifts of 16-bit values on Gen8 behave properly. Shifts - * of 8-bit values on both Gen8 and Gen9 do not. Gen11+ lowers 8-bit - * values, so those platforms were not tested. No features expose access - * to 8- or 16-bit types on Gen7 or earlier, so those platforms were not - * tested either. See - * https://gitlab.freedesktop.org/mesa/crucible/-/merge_requests/76. - * - * This is part of the reason 8-bit values are lowered to 16-bit on all - * platforms. + * The match the behavior expected for the NIR opcodes, explicit masks for + * 8- and 16-bit types must be added. */ case nir_op_ishl: - bld.SHL(result, op[0], op[1]); + if (nir_dest_bit_size(instr->dest.dest) < 32) { + bld.AND(result, op[1], brw_imm_ud(nir_dest_bit_size(instr->dest.dest) - 1)); + bld.SHL(result, op[0], result); + } else + bld.SHL(result, op[0], op[1]); + break; case nir_op_ishr: - bld.ASR(result, op[0], op[1]); + if (nir_dest_bit_size(instr->dest.dest) < 32) { + bld.AND(result, op[1], brw_imm_ud(nir_dest_bit_size(instr->dest.dest) - 1)); + bld.ASR(result, op[0], result); + } else + bld.ASR(result, op[0], op[1]); break; case nir_op_ushr: - bld.SHR(result, op[0], op[1]); + if (nir_dest_bit_size(instr->dest.dest) < 32) { + bld.AND(result, op[1], brw_imm_ud(nir_dest_bit_size(instr->dest.dest) - 1)); + bld.SHR(result, op[0], result); + } else + bld.SHR(result, op[0], op[1]); break; case nir_op_urol: |