summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Romanick <ian.d.romanick@intel.com>2024-03-28 09:54:20 -0700
committerIan Romanick <ian.d.romanick@intel.com>2024-04-04 21:04:09 -0700
commit0e817ba548c56014c3707633c87b0ff50cf6d3ad (patch)
tree1d1f26f57bdf61210aa896d6d07dff7e11ff2d7d
parent50c7d25a9e8967cbd5a434d53ddddfbec9d30c93 (diff)
intel/brw/xe2+: Implement Wa 22016140776
HF sources to math instructions cannot be scalar. This is very similar to an old Gfx6 restriction on POW, so let's fix it in a similar way. As an extra bit of saftey, lower any occurances that might slip through in brw_fs_lower_regioning. The primary change is to prevent copy propagation from violating the restriction. With that change, nothing should be able to generate these invalid source strides. The modification to fs_visitor::validate should detect potential problems sooner rather than later. Previous attempts to implement this Wa when emitting the math instruction (in brw_eu_emit.c gfx6_math) didn't work for several reasons. The lowering happens after the SWSB pass, so the scoreboarding was incorrect (thanks to Curro for finding that). In addition, the lowering happens after register allocation, so it's impossible to allocate a non-scalar register to expand the scalar value. Fixes 113 tests in the dEQP-VK.spirv_assembly.* group on LNL. v2: Add changes to brw_fs_lower_regioning. Suggested by Curro. Reviewed-by: Francisco Jerez <currojerez@riseup.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28480>
-rw-r--r--src/intel/compiler/brw_eu_validate.c22
-rw-r--r--src/intel/compiler/brw_fs_copy_propagation.cpp30
-rw-r--r--src/intel/compiler/brw_fs_lower_regioning.cpp11
-rw-r--r--src/intel/compiler/brw_fs_validate.cpp18
4 files changed, 73 insertions, 8 deletions
diff --git a/src/intel/compiler/brw_eu_validate.c b/src/intel/compiler/brw_eu_validate.c
index 03a55c83a5b..3194ba073c1 100644
--- a/src/intel/compiler/brw_eu_validate.c
+++ b/src/intel/compiler/brw_eu_validate.c
@@ -769,6 +769,28 @@ general_restrictions_based_on_operand_types(const struct brw_isa_info *isa,
if (desc->ndst == 0)
return error_msg;
+ if (brw_inst_opcode(isa, inst) == BRW_OPCODE_MATH &&
+ intel_needs_workaround(devinfo, 22016140776)) {
+ /* Wa_22016140776:
+ *
+ * Scalar broadcast on HF math (packed or unpacked) must not be
+ * used. Compiler must use a mov instruction to expand the scalar
+ * value to a vector before using in a HF (packed or unpacked)
+ * math operation.
+ */
+ ERROR_IF(brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_HF &&
+ src0_has_scalar_region(devinfo, inst),
+ "Scalar broadcast on HF math (packed or unpacked) must not "
+ "be used.");
+
+ if (num_sources > 1) {
+ ERROR_IF(brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_HF &&
+ src1_has_scalar_region(devinfo, inst),
+ "Scalar broadcast on HF math (packed or unpacked) must not "
+ "be used.");
+ }
+ }
+
/* The PRMs say:
*
* Where n is the largest element size in bytes for any source or
diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp
index 64d00ce12e4..674903944f1 100644
--- a/src/intel/compiler/brw_fs_copy_propagation.cpp
+++ b/src/intel/compiler/brw_fs_copy_propagation.cpp
@@ -613,15 +613,29 @@ can_take_stride(fs_inst *inst, brw_reg_type dst_type,
return stride == 1 || stride == 0;
}
- /* From the Broadwell PRM, Volume 2a "Command Reference - Instructions",
- * page 391 ("Extended Math Function"):
- *
- * The following restrictions apply for align1 mode: Scalar source is
- * supported. Source and destination horizontal stride must be the
- * same.
- */
- if (inst->is_math())
+ if (inst->is_math()) {
+ /* Wa_22016140776:
+ *
+ * Scalar broadcast on HF math (packed or unpacked) must not be used.
+ * Compiler must use a mov instruction to expand the scalar value to
+ * a vector before using in a HF (packed or unpacked) math operation.
+ *
+ * Prevent copy propagating a scalar value into a math instruction.
+ */
+ if (intel_needs_workaround(devinfo, 22016140776) &&
+ stride == 0 && inst->src[arg].type == BRW_REGISTER_TYPE_HF) {
+ return false;
+ }
+
+ /* From the Broadwell PRM, Volume 2a "Command Reference - Instructions",
+ * page 391 ("Extended Math Function"):
+ *
+ * The following restrictions apply for align1 mode: Scalar source
+ * is supported. Source and destination horizontal stride must be
+ * the same.
+ */
return stride == inst->dst.stride || stride == 0;
+ }
return true;
}
diff --git a/src/intel/compiler/brw_fs_lower_regioning.cpp b/src/intel/compiler/brw_fs_lower_regioning.cpp
index 0f7e4810664..16a49450bd9 100644
--- a/src/intel/compiler/brw_fs_lower_regioning.cpp
+++ b/src/intel/compiler/brw_fs_lower_regioning.cpp
@@ -238,6 +238,17 @@ namespace {
has_invalid_src_region(const intel_device_info *devinfo, const fs_inst *inst,
unsigned i)
{
+ /* Wa_22016140776:
+ *
+ * Scalar broadcast on HF math (packed or unpacked) must not be used.
+ * Compiler must use a mov instruction to expand the scalar value to
+ * a vector before using in a HF (packed or unpacked) math operation.
+ */
+ if (intel_needs_workaround(devinfo, 22016140776) &&
+ is_uniform(inst->src[i]) && inst->src[i].type == BRW_REGISTER_TYPE_HF) {
+ return true;
+ }
+
if (is_send(inst) || inst->is_math() || inst->is_control_source(i) ||
inst->opcode == BRW_OPCODE_DPAS) {
return false;
diff --git a/src/intel/compiler/brw_fs_validate.cpp b/src/intel/compiler/brw_fs_validate.cpp
index 7ef2be70146..f1df381b2e3 100644
--- a/src/intel/compiler/brw_fs_validate.cpp
+++ b/src/intel/compiler/brw_fs_validate.cpp
@@ -194,6 +194,24 @@ fs_visitor::validate()
phys_subnr(devinfo, inst->dst.as_brw_reg()) == 0) {
fsv_assert_eq(inst->dst.hstride, 1);
}
+
+ if (inst->is_math() && intel_needs_workaround(devinfo, 22016140776)) {
+ /* Wa_22016140776:
+ *
+ * Scalar broadcast on HF math (packed or unpacked) must not be
+ * used. Compiler must use a mov instruction to expand the scalar
+ * value to a vector before using in a HF (packed or unpacked)
+ * math operation.
+ *
+ * Since copy propagation knows about this restriction, nothing
+ * should be able to generate these invalid source strides. Detect
+ * potential problems sooner rather than later.
+ */
+ for (unsigned i = 0; i < inst->sources; i++) {
+ fsv_assert(!is_uniform(inst->src[i]) ||
+ inst->src[i].type != BRW_REGISTER_TYPE_HF);
+ }
+ }
}
}
#endif