diff options
author | Matt Turner <mattst88@gmail.com> | 2016-03-13 11:32:44 -0700 |
---|---|---|
committer | Matt Turner <mattst88@gmail.com> | 2016-05-03 23:02:21 -0700 |
commit | bb6decdf863647270c921b6bb8da1457d5d3d1f2 (patch) | |
tree | 1c67c7949650397159bb1bc535cd05070a765d97 | |
parent | 69b6693a5e181191877f29ea1d2cd8dc512fb4d2 (diff) |
i965/fs/skl+: Prepare LOD-zero optimization for sample_lz.
The next patch will add support for recognizing sample_lz, whose lod
argument is in a different location in the payload. This patch
generalizes the function to handle that (and renames it opt_sample_lz
because sample_lz is a much more important thing to recognize).
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 58 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 2 |
2 files changed, 37 insertions, 23 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 15df2984a4e4..dc2af660455d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2483,30 +2483,35 @@ fs_visitor::opt_zero_samples() return progress; } -static bool -lod_source_is_zero(const fs_inst *send_inst) +static fs_inst * +lod_source_is_zero(const fs_inst *send_inst, int lod_offset) { int reg_offset = send_inst->exec_size / 8 * 2 + send_inst->header_size; const fs_reg src = byte_offset(send_inst->src[0], reg_offset * 32); /* Look for the last instruction that writes to the source */ - foreach_inst_in_block_reverse_starting_from(const fs_inst, inst, send_inst) { - if (inst->overwrites_reg(src)) { - return (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD && - inst->src[inst->header_size + 2].is_zero()); + foreach_inst_in_block_reverse_starting_from(fs_inst, inst, send_inst) { + if (inst->overwrites_reg(src) && + inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD && + inst->src[inst->header_size + lod_offset].is_zero()) { + return inst; } } - return false; + return NULL; } /** - * Replace LD sample messages that have a zero LOD with LD_LZ. This - * instruction is available since Gen9. It would help for doing texelFetch - * when passing three coordinates because then the LOD can be skipped. + * Replace sample messages that have a zero LOD with the special _LZ messages. + * These instructions are available since Gen9. The available _LZ messages + * are: + * + * - ld_lz + * - sample_lz + * - sample_c_lz */ bool -fs_visitor::opt_ld_lz() +fs_visitor::opt_sample_lz() { if (devinfo->gen < 9) return false; @@ -2520,20 +2525,27 @@ fs_visitor::opt_ld_lz() /* If the LOD parameter is not sent or is a constant zero then we can * change the instruction. */ + int lod_offset; + if (inst->opcode == SHADER_OPCODE_TXF) { + lod_offset = 2; + } else { + unreachable("not reached"); + } bool lod_included = (inst->mlen - inst->header_size >= - inst->exec_size / 8 * 3); - if (lod_included && !lod_source_is_zero(inst)) - continue; + inst->exec_size / 8 * (lod_offset + 1)); - inst->opcode = SHADER_OPCODE_TXF_LZ; + if (lod_included && inst->mlen - inst->header_size > inst->exec_size / 8) { + fs_inst *load_payload = lod_source_is_zero(inst, lod_offset); + if (!load_payload) + continue; - if (lod_included) { inst->mlen -= inst->exec_size / 8; - /* If the r coordinate is included then we need a new LOAD_PAYLOAD - * instruction which has it in the right place. + /* If the lod is included then we need a new LOAD_PAYLOAD instruction + * which has latter arguments in the right places. */ - if (inst->mlen - inst->header_size >= inst->exec_size / 8 * 3) { + if (inst->mlen - inst->header_size >= + inst->exec_size / 8 * (lod_offset + 1)) { const fs_builder ibld(this, block, inst); fs_reg send_header = fs_reg(VGRF, alloc.allocate(inst->mlen), BRW_REGISTER_TYPE_F); @@ -2544,11 +2556,11 @@ fs_visitor::opt_ld_lz() for (int i = 0; i < n_sources; i++) { int j; - if (i >= inst->header_size + 2) + if (i >= inst->header_size + lod_offset) j = i + 1; else j = i; - new_sources[i] = offset(inst->src[0], ibld, j); + new_sources[i] = load_payload->src[j]; } /* The LOAD_PAYLOAD helper is not used for the same reasons given @@ -2568,6 +2580,8 @@ fs_visitor::opt_ld_lz() } } + inst->opcode = SHADER_OPCODE_TXF_LZ; + progress = true; } @@ -5425,7 +5439,7 @@ fs_visitor::optimize() OPT(opt_redundant_discard_jumps); OPT(opt_saturate_propagation); OPT(opt_zero_samples); - OPT(opt_ld_lz); + OPT(opt_sample_lz); OPT(register_coalesce); OPT(compute_to_mrf); OPT(eliminate_find_live_channel); diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 66b39dc1a5c0..9c017097e588 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -227,7 +227,7 @@ public: bool opt_saturate_propagation(); bool opt_cmod_propagation(); bool opt_zero_samples(); - bool opt_ld_lz(); + bool opt_sample_lz(); void emit_unspill(bblock_t *block, fs_inst *inst, fs_reg reg, uint32_t spill_offset, int count); void emit_spill(bblock_t *block, fs_inst *inst, fs_reg reg, |