summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordan Justen <jordan.l.justen@intel.com>2024-03-29 02:03:40 -0700
committerJordan Justen <jordan.l.justen@intel.com>2024-03-29 02:42:52 -0700
commit8ff18d5604d6c1b46e24e89ba4f82142458ffe9e (patch)
tree3c22a2ce93e6d19c8554b862641c30fe0f8ff859
parent9d43c6fe7a16765de34d559e12cd20eec9b215b7 (diff)
intel/brw/validate: Convert access mask to be grf basedxe2-update-reg-access-validation
Our validation code doesn't need to know which bytes are accessed. It only needs to know which grfs were accessed by an element. This also helps to easily handle the Xe2 register size change. Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
-rw-r--r--src/intel/compiler/brw_eu_validate.c89
1 files changed, 55 insertions, 34 deletions
diff --git a/src/intel/compiler/brw_eu_validate.c b/src/intel/compiler/brw_eu_validate.c
index cdf4ce5ffae..db857b0a2c5 100644
--- a/src/intel/compiler/brw_eu_validate.c
+++ b/src/intel/compiler/brw_eu_validate.c
@@ -1318,37 +1318,63 @@ special_restrictions_for_mixed_float_mode(const struct brw_isa_info *isa,
}
/**
- * Creates an \p access_mask for an \p exec_size, \p element_size, and a region
+ * Creates a \p grf_access_mask for an \p exec_size, \p element_size, and a
+ * region
*
- * An \p access_mask is a 32-element array of uint64_t, where each uint64_t is
- * a bitmask of bytes accessed by the region.
+ * A \p grf_access_mask is a 32-element array of uint8_t, where each uint8_t
+ * is a bitmask of grfs accessed by the region.
*
* For instance the access mask of the source gX.1<4,2,2>F in an exec_size = 4
* instruction would be
*
- * access_mask[0] = 0x00000000000000F0
- * access_mask[1] = 0x000000000000F000
- * access_mask[2] = 0x0000000000F00000
- * access_mask[3] = 0x00000000F0000000
+ * access_mask[0] = 0x01 (bytes 7-4 of the 1st grf)
+ * access_mask[1] = 0x01 (bytes 15-12 of the 1st grf)
+ * access_mask[2] = 0x01 (bytes 23-20 of the 1st grf)
+ * access_mask[3] = 0x01 (bytes 31-28 of the 1st grf)
+ * access_mask[4-31] = 0
+ *
+ * Before Xe2, gX<1,1,0>F in an exec_size == 16 would yield:
+ *
+ * access_mask[0] = 0x01 (bytes 3-0 of the 1st grf)
+ * access_mask[1] = 0x01 (bytes 7-4 of the 1st grf)
+ * ...
+ * access_mask[7] = 0x01 (bytes 31-28 of the 1st grf)
+ * access_mask[8] = 0x02 (bytes 3-0 of the 2nd grf)
+ * ...
+ * access_mask[15] = 0x02 (bytes 31-28 of the 2nd grf)
+ * access_mask[16-31] = 0
+ *
+ * Whereas on Xe2, gX<1,1,0>F in an exec_size of 16 would yield:
+ *
+ * access_mask[0] = 0x01 (bytes 3-0 of the 1st grf)
+ * access_mask[1] = 0x01 (bytes 7-4 of the 1st grf)
+ * ...
+ * access_mask[7] = 0x01 (bytes 31-28 of the 1st grf)
+ * access_mask[8] = 0x01 (bytes 35-32 of the 1st grf)
+ * ...
+ * access_mask[15] = 0x01 (bytes 63-60 of the 1st grf)
* access_mask[4-31] = 0
*
- * because the first execution channel accesses bytes 7-4 and the second
- * execution channel accesses bytes 15-12, etc.
*/
static void
-align1_access_mask(uint64_t access_mask[static 32],
- unsigned exec_size, unsigned element_size, unsigned subreg,
- unsigned vstride, unsigned width, unsigned hstride)
+grfs_accessed(const struct intel_device_info *devinfo,
+ uint8_t grf_access_mask[static 32],
+ unsigned exec_size, unsigned element_size, unsigned subreg,
+ unsigned vstride, unsigned width, unsigned hstride)
{
- const uint64_t mask = (1ULL << element_size) - 1;
unsigned rowbase = subreg;
unsigned element = 0;
+ assert(util_is_power_of_two_nonzero(reg_unit(devinfo)));
+ unsigned grf_size_shift = (5 - 1) + ffs(reg_unit(devinfo));
for (int y = 0; y < exec_size / width; y++) {
unsigned offset = rowbase;
for (int x = 0; x < width; x++) {
- access_mask[element++] = mask << (offset % 64);
+ const unsigned start_grf = (offset >> grf_size_shift) % 8;
+ const unsigned end_byte = offset + (element_size - 1);
+ const unsigned end_grf = (end_byte >> grf_size_shift) % 8;
+ grf_access_mask[element++] = (1 << start_grf) | (1 << end_grf);
offset += hstride * element_size;
}
@@ -1362,19 +1388,14 @@ align1_access_mask(uint64_t access_mask[static 32],
* Returns the number of registers accessed according to the \p access_mask
*/
static int
-registers_read(const uint64_t access_mask[static 32])
+registers_read(const uint8_t grfs_accessed[static 32])
{
- int regs_read = 0;
+ uint8_t all_read = 0;
- for (unsigned i = 0; i < 32; i++) {
- if (access_mask[i] > 0xFFFFFFFF) {
- return 2;
- } else if (access_mask[i]) {
- regs_read = 1;
- }
- }
+ for (unsigned i = 0; i < 32; i++)
+ all_read |= grfs_accessed[i];
- return regs_read;
+ return util_bitcount(all_read);
}
/**
@@ -1390,7 +1411,7 @@ region_alignment_rules(const struct brw_isa_info *isa,
brw_opcode_desc(isa, brw_inst_opcode(isa, inst));
unsigned num_sources = brw_num_sources_from_inst(isa, inst);
unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
- uint64_t dst_access_mask[32], unused_access_mask[32];
+ uint8_t dst_access_mask[32], unused_access_mask[32];
struct string error_msg = { .str = NULL, .len = 0 };
if (num_sources == 3)
@@ -1427,9 +1448,9 @@ region_alignment_rules(const struct brw_isa_info *isa,
type = brw_inst_src ## n ## _type(devinfo, inst); \
element_size = brw_reg_type_to_size(type); \
subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \
- align1_access_mask(unused_access_mask, \
- exec_size, element_size, subreg, \
- vstride, width, hstride)
+ grfs_accessed(devinfo, unused_access_mask, \
+ exec_size, element_size, subreg, \
+ vstride, width, hstride)
if (i == 0) {
DO_SRC(0);
@@ -1462,10 +1483,10 @@ region_alignment_rules(const struct brw_isa_info *isa,
if (error_msg.str)
return error_msg;
- align1_access_mask(dst_access_mask, exec_size, element_size, subreg,
- exec_size == 1 ? 0 : exec_size * stride,
- exec_size == 1 ? 1 : exec_size,
- exec_size == 1 ? 0 : stride);
+ grfs_accessed(devinfo, dst_access_mask, exec_size, element_size, subreg,
+ exec_size == 1 ? 0 : exec_size * stride,
+ exec_size == 1 ? 1 : exec_size,
+ exec_size == 1 ? 0 : stride);
unsigned dst_regs = registers_read(dst_access_mask);
@@ -1482,10 +1503,10 @@ region_alignment_rules(const struct brw_isa_info *isa,
unsigned upper_reg_writes = 0, lower_reg_writes = 0;
for (unsigned i = 0; i < exec_size; i++) {
- if (dst_access_mask[i] > 0xFFFFFFFF) {
+ if (dst_access_mask[i] == 2) {
upper_reg_writes++;
} else {
- assert(dst_access_mask[i] != 0);
+ assert(dst_access_mask[i] == 1);
lower_reg_writes++;
}
}