From ebfb703d443a4b22320c3f1eed34e6e1aa54e998 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Thu, 9 Feb 2017 10:16:58 -0800 Subject: i965/fs: Get 64-bit indirect moves working on IVB. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Samuel Iglesias Gonsálvez --- src/intel/compiler/brw_fs_generator.cpp | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 16083a70d7..a7f95cc76b 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -442,7 +442,7 @@ fs_generator::generate_mov_indirect(fs_inst *inst, brw_MOV(p, dst, reg); } else { /* Prior to Broadwell, there are only 8 address registers. */ - assert(inst->exec_size == 8 || devinfo->gen >= 8); + assert(inst->exec_size <= 8 || devinfo->gen >= 8); /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */ struct brw_reg addr = vec8(brw_address_reg(0)); @@ -480,7 +480,30 @@ fs_generator::generate_mov_indirect(fs_inst *inst, * code, using it saves us 0 instructions and would require quite a bit * of case-by-case work. It's just not worth it. */ - brw_ADD(p, addr, indirect_byte_offset, brw_imm_uw(imm_byte_offset)); + if (devinfo->gen >= 8 || devinfo->is_haswell || type_sz(reg.type) < 8) { + brw_ADD(p, addr, indirect_byte_offset, brw_imm_uw(imm_byte_offset)); + } else { + /* IVB reads two address register components per channel for + * indirectly addressed 64-bit sources, so we need to initialize + * adjacent address components to consecutive dwords of the source + * region by emitting two separate ADD instructions. Found + * empirically. + */ + assert(inst->exec_size <= 4); + brw_push_insn_state(p); + brw_set_default_exec_size(p, cvt(inst->exec_size) - 1); + + brw_ADD(p, spread(addr, 2), indirect_byte_offset, + brw_imm_uw(imm_byte_offset)); + brw_inst_set_no_dd_clear(devinfo, brw_last_inst, true); + + brw_ADD(p, spread(suboffset(addr, 1), 2), indirect_byte_offset, + brw_imm_uw(imm_byte_offset + 4)); + brw_inst_set_no_dd_check(devinfo, brw_last_inst, true); + + brw_pop_insn_state(p); + } + struct brw_reg ind_src = brw_VxH_indirect(0, 0); brw_inst *mov = brw_MOV(p, dst, retype(ind_src, reg.type)); -- cgit v1.2.3