diff options
author | Ian Romanick <ian.d.romanick@intel.com> | 2024-03-28 09:54:20 -0700 |
---|---|---|
committer | Ian Romanick <ian.d.romanick@intel.com> | 2024-04-04 21:04:09 -0700 |
commit | 0e817ba548c56014c3707633c87b0ff50cf6d3ad (patch) | |
tree | 1d1f26f57bdf61210aa896d6d07dff7e11ff2d7d | |
parent | 50c7d25a9e8967cbd5a434d53ddddfbec9d30c93 (diff) |
intel/brw/xe2+: Implement Wa 22016140776
HF sources to math instructions cannot be scalar. This is very similar
to an old Gfx6 restriction on POW, so let's fix it in a similar way.
As an extra bit of saftey, lower any occurances that might slip through
in brw_fs_lower_regioning.
The primary change is to prevent copy propagation from violating the
restriction. With that change, nothing should be able to generate these
invalid source strides. The modification to fs_visitor::validate should
detect potential problems sooner rather than later.
Previous attempts to implement this Wa when emitting the math
instruction (in brw_eu_emit.c gfx6_math) didn't work for several
reasons. The lowering happens after the SWSB pass, so the scoreboarding
was incorrect (thanks to Curro for finding that). In addition, the
lowering happens after register allocation, so it's impossible to
allocate a non-scalar register to expand the scalar value.
Fixes 113 tests in the dEQP-VK.spirv_assembly.* group on LNL.
v2: Add changes to brw_fs_lower_regioning. Suggested by Curro.
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28480>
-rw-r--r-- | src/intel/compiler/brw_eu_validate.c | 22 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_copy_propagation.cpp | 30 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_lower_regioning.cpp | 11 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_validate.cpp | 18 |
4 files changed, 73 insertions, 8 deletions
diff --git a/src/intel/compiler/brw_eu_validate.c b/src/intel/compiler/brw_eu_validate.c index 03a55c83a5b..3194ba073c1 100644 --- a/src/intel/compiler/brw_eu_validate.c +++ b/src/intel/compiler/brw_eu_validate.c @@ -769,6 +769,28 @@ general_restrictions_based_on_operand_types(const struct brw_isa_info *isa, if (desc->ndst == 0) return error_msg; + if (brw_inst_opcode(isa, inst) == BRW_OPCODE_MATH && + intel_needs_workaround(devinfo, 22016140776)) { + /* Wa_22016140776: + * + * Scalar broadcast on HF math (packed or unpacked) must not be + * used. Compiler must use a mov instruction to expand the scalar + * value to a vector before using in a HF (packed or unpacked) + * math operation. + */ + ERROR_IF(brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_HF && + src0_has_scalar_region(devinfo, inst), + "Scalar broadcast on HF math (packed or unpacked) must not " + "be used."); + + if (num_sources > 1) { + ERROR_IF(brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_HF && + src1_has_scalar_region(devinfo, inst), + "Scalar broadcast on HF math (packed or unpacked) must not " + "be used."); + } + } + /* The PRMs say: * * Where n is the largest element size in bytes for any source or diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index 64d00ce12e4..674903944f1 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -613,15 +613,29 @@ can_take_stride(fs_inst *inst, brw_reg_type dst_type, return stride == 1 || stride == 0; } - /* From the Broadwell PRM, Volume 2a "Command Reference - Instructions", - * page 391 ("Extended Math Function"): - * - * The following restrictions apply for align1 mode: Scalar source is - * supported. Source and destination horizontal stride must be the - * same. - */ - if (inst->is_math()) + if (inst->is_math()) { + /* Wa_22016140776: + * + * Scalar broadcast on HF math (packed or unpacked) must not be used. + * Compiler must use a mov instruction to expand the scalar value to + * a vector before using in a HF (packed or unpacked) math operation. + * + * Prevent copy propagating a scalar value into a math instruction. + */ + if (intel_needs_workaround(devinfo, 22016140776) && + stride == 0 && inst->src[arg].type == BRW_REGISTER_TYPE_HF) { + return false; + } + + /* From the Broadwell PRM, Volume 2a "Command Reference - Instructions", + * page 391 ("Extended Math Function"): + * + * The following restrictions apply for align1 mode: Scalar source + * is supported. Source and destination horizontal stride must be + * the same. + */ return stride == inst->dst.stride || stride == 0; + } return true; } diff --git a/src/intel/compiler/brw_fs_lower_regioning.cpp b/src/intel/compiler/brw_fs_lower_regioning.cpp index 0f7e4810664..16a49450bd9 100644 --- a/src/intel/compiler/brw_fs_lower_regioning.cpp +++ b/src/intel/compiler/brw_fs_lower_regioning.cpp @@ -238,6 +238,17 @@ namespace { has_invalid_src_region(const intel_device_info *devinfo, const fs_inst *inst, unsigned i) { + /* Wa_22016140776: + * + * Scalar broadcast on HF math (packed or unpacked) must not be used. + * Compiler must use a mov instruction to expand the scalar value to + * a vector before using in a HF (packed or unpacked) math operation. + */ + if (intel_needs_workaround(devinfo, 22016140776) && + is_uniform(inst->src[i]) && inst->src[i].type == BRW_REGISTER_TYPE_HF) { + return true; + } + if (is_send(inst) || inst->is_math() || inst->is_control_source(i) || inst->opcode == BRW_OPCODE_DPAS) { return false; diff --git a/src/intel/compiler/brw_fs_validate.cpp b/src/intel/compiler/brw_fs_validate.cpp index 7ef2be70146..f1df381b2e3 100644 --- a/src/intel/compiler/brw_fs_validate.cpp +++ b/src/intel/compiler/brw_fs_validate.cpp @@ -194,6 +194,24 @@ fs_visitor::validate() phys_subnr(devinfo, inst->dst.as_brw_reg()) == 0) { fsv_assert_eq(inst->dst.hstride, 1); } + + if (inst->is_math() && intel_needs_workaround(devinfo, 22016140776)) { + /* Wa_22016140776: + * + * Scalar broadcast on HF math (packed or unpacked) must not be + * used. Compiler must use a mov instruction to expand the scalar + * value to a vector before using in a HF (packed or unpacked) + * math operation. + * + * Since copy propagation knows about this restriction, nothing + * should be able to generate these invalid source strides. Detect + * potential problems sooner rather than later. + */ + for (unsigned i = 0; i < inst->sources; i++) { + fsv_assert(!is_uniform(inst->src[i]) || + inst->src[i].type != BRW_REGISTER_TYPE_HF); + } + } } } #endif |