From 9ef57d20ff3ac02558f8f4471ea6ce14f6f7028a Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Sun, 7 May 2017 16:18:23 -0400 Subject: i965/shader_time: add accounting for conditional discard --- src/intel/compiler/brw_fs.h | 1 + src/intel/compiler/brw_fs_nir.cpp | 75 ++++++++++++++++++++++----------------- 2 files changed, 43 insertions(+), 33 deletions(-) diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 21a72dd546..a69feebcc3 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -263,6 +263,7 @@ public: void emit_shader_time_begin(); void emit_shader_time_end(); + void emit_shader_time_branch(fs_reg cond); void SHADER_TIME_ADD(const brw::fs_builder &bld, int shader_time_subindex, fs_reg value); diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 11011f448a..d99c0ad899 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -289,44 +289,50 @@ fs_visitor::nir_emit_cf_list(exec_list *list) } } +void +fs_visitor::emit_shader_time_branch(fs_reg cond) +{ + const fs_builder cbld = bld.exec_all().group(1, 0); + /* force non-executing channels to true in f0 so we ignore them */ + cbld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0xffff)); + if (dispatch_width == 32) + cbld.MOV(brw_flag_reg(0, 1), brw_imm_uw(0xffff)); + + /* move !condition into f0 */ + fs_inst *inst = bld.MOV(bld.null_reg_ud(), cond); + inst->conditional_mod = BRW_CONDITIONAL_Z; + + enum brw_predicate pred; + if (dispatch_width == 8) + pred = BRW_PREDICATE_ALIGN1_ALL8H; + else if (dispatch_width == 16) + pred = BRW_PREDICATE_ALIGN1_ALL16H; + else + pred = BRW_PREDICATE_ALIGN1_ALL32H; + /* add to convergent branches if all zero (i.e. the branch is never + * taken) + */ + inst = cbld.ADD(component(branches, 1), component(branches, 1), brw_imm_ud(1)); + inst->predicate = pred; + + /* move condition into f0 */ + inst = bld.MOV(bld.null_reg_ud(), cond); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + /* add to convergent branches if all true (i.e. the branch is always + * taken + */ + inst = cbld.ADD(component(branches, 1), component(branches, 1), brw_imm_ud(1)); + inst->predicate = pred; + cbld.ADD(component(branches, 0), component(branches, 0), brw_imm_ud(1)); +} + void fs_visitor::nir_emit_if(nir_if *if_stmt) { fs_reg cond = get_nir_src(if_stmt->condition); if (shader_time_index >= 0) { - const fs_builder cbld = bld.exec_all().group(1, 0); - /* force non-executing channels to true in f0 so we ignore them */ - cbld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0xffff)); - if (dispatch_width == 32) - cbld.MOV(brw_flag_reg(0, 1), brw_imm_uw(0xffff)); - - /* move !condition into f0 */ - fs_inst *inst = bld.MOV(bld.null_reg_ud(), cond); - inst->conditional_mod = BRW_CONDITIONAL_Z; - - enum brw_predicate pred; - if (dispatch_width == 8) - pred = BRW_PREDICATE_ALIGN1_ALL8H; - else if (dispatch_width == 16) - pred = BRW_PREDICATE_ALIGN1_ALL16H; - else - pred = BRW_PREDICATE_ALIGN1_ALL32H; - /* add to convergent branches if all zero (i.e. the branch is never - * taken) - */ - inst = cbld.ADD(component(branches, 1), component(branches, 1), brw_imm_ud(1)); - inst->predicate = pred; - - /* move condition into f0 */ - inst = bld.MOV(bld.null_reg_ud(), cond); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - /* add to convergent branches if all true (i.e. the branch is always - * taken - */ - inst = cbld.ADD(component(branches, 1), component(branches, 1), brw_imm_ud(1)); - inst->predicate = pred; - cbld.ADD(component(branches, 0), component(branches, 0), brw_imm_ud(1)); + emit_shader_time_branch(cond); } else { fs_inst *inst = bld.MOV(bld.null_reg_d(), retype(cond, @@ -3172,7 +3178,10 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, */ fs_inst *cmp; if (instr->intrinsic == nir_intrinsic_discard_if) { - cmp = bld.CMP(bld.null_reg_f(), get_nir_src(instr->src[0]), + fs_reg cond = get_nir_src(instr->src[0]); + if (shader_time_index >= 0) + emit_shader_time_branch(cond); + cmp = bld.CMP(bld.null_reg_f(), cond, brw_imm_d(0), BRW_CONDITIONAL_Z); } else { fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0), -- cgit v1.2.3