summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorConnor Abbott <connor.w.abbott@intel.com>2015-08-03 17:44:08 -0700
committerSamuel Iglesias Gonsálvez <siglesias@igalia.com>2016-04-29 09:47:01 +0200
commit6d4b8e5412d916b5c82ba7c56a6462768406cb1f (patch)
tree0f6c2cd8b33dae3fdf317374753397959f469426
parentcc1839abdb3544231732f3d052ab393f624184c7 (diff)
i965/fs: extend exec_size halving in the generator
The HW has a restriction that only vertical stride may cross register boundaries. Previously, this only mattered for SIMD16 instructions where we needed to use the same regioning parameters as the equivalent SIMD8 instruction but double the exec size. But we need to do the same splitting for 64-bit instructions as well as instructions with a stride of 2 (which effectively consume 64 bits per element). Fix up the code to do the right thing instead of special-casing SIMD16.
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_generator.cpp16
1 files changed, 10 insertions, 6 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 8654ca4d0a..0743e746ca 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -65,8 +65,9 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen)
case VGRF:
if (reg->stride == 0) {
brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->nr, 0);
- } else if (inst->exec_size < 8) {
- brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->nr, 0);
+ } else if (inst->exec_size * reg->stride * type_sz(reg->type) <= 32) {
+ brw_reg = brw_vecn_reg(inst->exec_size, brw_file_from_reg(reg),
+ reg->nr, 0);
brw_reg = stride(brw_reg, inst->exec_size * reg->stride,
inst->exec_size, reg->stride);
} else {
@@ -76,11 +77,14 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen)
* rule implies that elements within a 'Width' cannot cross GRF
* boundaries.
*
- * So, for registers with width > 8, we have to use a width of 8
- * and trust the compression state to sort out the exec size.
+ * So, for registers that are large enough, we have to split the exec
+ * size in two and trust the compression state to sort it out.
*/
- brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->nr, 0);
- brw_reg = stride(brw_reg, 8 * reg->stride, 8, reg->stride);
+ assert(inst->exec_size / 2 * reg->stride * type_sz(reg->type) <= 32);
+ brw_reg = brw_vecn_reg(inst->exec_size / 2, brw_file_from_reg(reg),
+ reg->nr, 0);
+ brw_reg = stride(brw_reg, inst->exec_size / 2 * reg->stride,
+ inst->exec_size / 2, reg->stride);
}
brw_reg = retype(brw_reg, reg->type);