summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/intel/compiler/brw_eu_validate.c24
1 files changed, 24 insertions, 0 deletions
diff --git a/src/intel/compiler/brw_eu_validate.c b/src/intel/compiler/brw_eu_validate.c
index 64615af44a..f231ea038b 100644
--- a/src/intel/compiler/brw_eu_validate.c
+++ b/src/intel/compiler/brw_eu_validate.c
@@ -467,6 +467,14 @@ general_restrictions_based_on_operand_types(const struct gen_device_info *devinf
brw_hw_reg_type_to_size(devinfo, exec_type, BRW_GENERAL_REGISTER_FILE);
unsigned dst_type_size = brw_element_size(devinfo, inst, dst);
+ /* On IVB/BYT, region parameters and execution size for DF are in terms of
+ * 32-bit elements, so they are doubled. For evaluating the validity of an
+ * instruction, we halve them.
+ */
+ if (devinfo->gen == 7 && !devinfo->is_haswell &&
+ exec_type_size == 8 && dst_type_size == 4)
+ dst_type_size = 8;
+
if (exec_type_size > dst_type_size) {
ERROR_IF(dst_stride * dst_type_size != exec_type_size,
"Destination stride must be equal to the ratio of the sizes of "
@@ -576,6 +584,14 @@ general_restrictions_on_region_parameters(const struct gen_device_info *devinfo,
}
#undef DO_SRC
+ /* On IVB/BYT, region parameters and execution size for DF are in terms of
+ * 32-bit elements, so they are doubled. For evaluating the validity of an
+ * instruction, we halve them.
+ */
+ if (devinfo->gen == 7 && !devinfo->is_haswell &&
+ element_size == 8)
+ element_size = 4;
+
/* ExecSize must be greater than or equal to Width. */
ERROR_IF(exec_size < width, "ExecSize must be greater than or equal "
"to Width");
@@ -792,6 +808,14 @@ region_alignment_rules(const struct gen_device_info *devinfo,
if (error_msg.str)
return error_msg;
+ /* On IVB/BYT, region parameters and execution size for DF are in terms of
+ * 32-bit elements, so they are doubled. For evaluating the validity of an
+ * instruction, we halve them.
+ */
+ if (devinfo->gen == 7 && !devinfo->is_haswell &&
+ element_size == 8)
+ element_size = 4;
+
align1_access_mask(dst_access_mask, exec_size, element_size, subreg,
exec_size == 1 ? 0 : exec_size * stride,
exec_size == 1 ? 1 : exec_size,