diff options
author | Francisco Jerez <currojerez@riseup.net> | 2019-07-01 13:46:00 -0700 |
---|---|---|
committer | Francisco Jerez <currojerez@riseup.net> | 2019-08-22 06:39:54 -0700 |
commit | 7dc67176a5ad0df95d12df8a0034eb08f6df848f (patch) | |
tree | 89cb808832d007126ee2795999842f7a6042581c | |
parent | 5910492e76f9e2a2b51c5e3595138e9857bc3ba2 (diff) |
WIP: intel/fs: Rework discard handling.for-felix
-rw-r--r-- | src/intel/compiler/brw_disasm.c | 9 | ||||
-rw-r--r-- | src/intel/compiler/brw_eu.c | 2 | ||||
-rw-r--r-- | src/intel/compiler/brw_eu.h | 11 | ||||
-rw-r--r-- | src/intel/compiler/brw_eu_compact.c | 2 | ||||
-rw-r--r-- | src/intel/compiler/brw_eu_defines.h | 2 | ||||
-rw-r--r-- | src/intel/compiler/brw_eu_emit.c | 36 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs.cpp | 38 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs.h | 3 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_builder.h | 2 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_generator.cpp | 139 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_nir.cpp | 42 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_visitor.cpp | 5 | ||||
-rw-r--r-- | src/intel/compiler/brw_reg.h | 15 | ||||
-rw-r--r-- | src/intel/compiler/brw_schedule_instructions.cpp | 2 | ||||
-rw-r--r-- | src/intel/compiler/brw_shader.cpp | 4 |
15 files changed, 192 insertions, 120 deletions
diff --git a/src/intel/compiler/brw_disasm.c b/src/intel/compiler/brw_disasm.c index 8b7047db00f..b53aff9594f 100644 --- a/src/intel/compiler/brw_disasm.c +++ b/src/intel/compiler/brw_disasm.c @@ -44,7 +44,8 @@ has_jip(const struct gen_device_info *devinfo, enum opcode opcode) opcode == BRW_OPCODE_WHILE || opcode == BRW_OPCODE_BREAK || opcode == BRW_OPCODE_CONTINUE || - opcode == BRW_OPCODE_HALT; + opcode == BRW_OPCODE_HALT || + opcode == BRW_OPCODE_BRC; } static bool @@ -57,7 +58,8 @@ has_uip(const struct gen_device_info *devinfo, enum opcode opcode) (devinfo->gen >= 8 && opcode == BRW_OPCODE_ELSE) || opcode == BRW_OPCODE_BREAK || opcode == BRW_OPCODE_CONTINUE || - opcode == BRW_OPCODE_HALT; + opcode == BRW_OPCODE_HALT || + opcode == BRW_OPCODE_BRC; } static bool @@ -697,6 +699,9 @@ reg(FILE *file, unsigned _reg_file, unsigned _reg_nr) format(file, "mask%d", _reg_nr & 0x0f); break; case BRW_ARF_MASK_STACK: + format(file, "ms%d", _reg_nr & 0x0f); + break; + case BRW_ARF_MASK_STACK_DEPTH: format(file, "msd%d", _reg_nr & 0x0f); break; case BRW_ARF_STATE: diff --git a/src/intel/compiler/brw_eu.c b/src/intel/compiler/brw_eu.c index 882293e981b..fe865057025 100644 --- a/src/intel/compiler/brw_eu.c +++ b/src/intel/compiler/brw_eu.c @@ -341,6 +341,8 @@ brw_init_codegen(const struct gen_device_info *devinfo, p->loop_stack_array_size = 16; p->loop_stack = rzalloc_array(mem_ctx, int, p->loop_stack_array_size); p->if_depth_in_loop = rzalloc_array(mem_ctx, int, p->loop_stack_array_size); + + p->exit_insn_offset = ~0; } diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index cb23c9ff51a..013c34bca21 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -132,6 +132,14 @@ struct brw_codegen { int *if_depth_in_loop; int loop_stack_depth; int loop_stack_array_size; + + /** + * Instruction offset the EU will be made to jump to in the case of a + * uniform HALT condition. This will typically point to a short sequence + * of instructions used to read out the final active channel mask and + * optionally re-enable any disabled channels. + */ + unsigned exit_insn_offset; }; void brw_pop_insn_state( struct brw_codegen *p ); @@ -1067,7 +1075,8 @@ brw_inst *brw_WHILE(struct brw_codegen *p); brw_inst *brw_BREAK(struct brw_codegen *p); brw_inst *brw_CONT(struct brw_codegen *p); -brw_inst *gen6_HALT(struct brw_codegen *p); +brw_inst *brw_HALT(struct brw_codegen *p); +brw_inst *gen7_BRC(struct brw_codegen *p); /* Forward jumps: */ diff --git a/src/intel/compiler/brw_eu_compact.c b/src/intel/compiler/brw_eu_compact.c index daebdca8e37..a0934fcfa91 100644 --- a/src/intel/compiler/brw_eu_compact.c +++ b/src/intel/compiler/brw_eu_compact.c @@ -1628,7 +1628,7 @@ brw_compact_instructions(struct brw_codegen *p, int start_offset, break; case BRW_OPCODE_IF: - case BRW_OPCODE_IFF: + case BRW_OPCODE_IFF: /* Also BRW_OPCODE_BRC */ case BRW_OPCODE_ELSE: case BRW_OPCODE_ENDIF: case BRW_OPCODE_WHILE: diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index b33ea6deee1..a358b8c2ecb 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -550,9 +550,9 @@ enum opcode { FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4, FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL, FS_OPCODE_DISCARD_JUMP, + FS_OPCODE_DISCARD_LANDING_PAD, FS_OPCODE_SET_SAMPLE_ID, FS_OPCODE_PACK_HALF_2x16_SPLIT, - FS_OPCODE_PLACEHOLDER_HALT, FS_OPCODE_INTERPOLATE_AT_SAMPLE, FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index 60761e83c62..fe0ccf2e1f5 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -1671,7 +1671,7 @@ brw_CONT(struct brw_codegen *p) } brw_inst * -gen6_HALT(struct brw_codegen *p) +brw_HALT(struct brw_codegen *p) { const struct gen_device_info *devinfo = p->devinfo; brw_inst *insn; @@ -1680,9 +1680,13 @@ gen6_HALT(struct brw_codegen *p) brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); if (devinfo->gen >= 8) { brw_set_src0(p, insn, brw_imm_d(0x0)); - } else { + } else if (devinfo->gen >= 6) { brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */ + } else { + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0x0)); } brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); @@ -1690,6 +1694,24 @@ gen6_HALT(struct brw_codegen *p) return insn; } +brw_inst * +gen7_BRC(struct brw_codegen *p) +{ + const struct gen_device_info *devinfo = p->devinfo; + brw_inst *insn = next_insn(p, BRW_OPCODE_BRC); + + if (devinfo->gen >= 8) { + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(p, insn, brw_imm_d(0x0)); + } else { + assert(devinfo->gen >= 7); + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src1(p, insn, brw_imm_d(0x0)); + } + return insn; +} + /* DO/WHILE loop: * * The DO/WHILE is just an unterminated loop -- break or continue are @@ -2710,6 +2732,9 @@ brw_find_next_block_end(struct brw_codegen *p, int start_offset) offset = next_offset(devinfo, store, offset)) { brw_inst *insn = store + offset; + if (offset == p->exit_insn_offset) + return offset; + switch (brw_inst_opcode(devinfo, insn)) { case BRW_OPCODE_IF: depth++; @@ -2727,7 +2752,6 @@ brw_find_next_block_end(struct brw_codegen *p, int start_offset) continue; /* fallthrough */ case BRW_OPCODE_ELSE: - case BRW_OPCODE_HALT: if (depth == 0) return offset; } @@ -2835,6 +2859,12 @@ brw_set_uip_jip(struct brw_codegen *p, int start_offset) assert(brw_inst_uip(devinfo, insn) != 0); assert(brw_inst_jip(devinfo, insn) != 0); break; + + case BRW_OPCODE_BRC: + brw_inst_set_jip(devinfo, insn, + block_end_offset == 0 ? brw_inst_uip(devinfo, insn) : + (block_end_offset - offset) / scale); + break; } } } diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 5c225fb0a3b..78994216b12 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -1071,7 +1071,8 @@ fs_inst::flags_written() const opcode != BRW_OPCODE_IF && opcode != BRW_OPCODE_WHILE)) || opcode == SHADER_OPCODE_FIND_LIVE_CHANNEL || - opcode == FS_OPCODE_FB_WRITE) { + opcode == FS_OPCODE_FB_WRITE || + opcode == FS_OPCODE_DISCARD_LANDING_PAD) { return flag_mask(this); } else { return flag_mask(dst, size_written); @@ -1499,21 +1500,6 @@ fs_visitor::resolve_source_modifiers(const fs_reg &src) } void -fs_visitor::emit_discard_jump() -{ - assert(brw_wm_prog_data(this->prog_data)->uses_kill); - - /* For performance, after a discard, jump to the end of the - * shader if all relevant channels have been discarded. - */ - fs_inst *discard_jump = bld.emit(FS_OPCODE_DISCARD_JUMP); - discard_jump->flag_subreg = 1; - - discard_jump->predicate = BRW_PREDICATE_ALIGN1_ANY4H; - discard_jump->predicate_inverse = true; -} - -void fs_visitor::emit_gs_thread_end() { assert(stage == MESA_SHADER_GEOMETRY); @@ -2976,7 +2962,7 @@ fs_visitor::opt_redundant_discard_jumps() fs_inst *placeholder_halt = NULL; foreach_inst_in_block_reverse(fs_inst, inst, last_bblock) { - if (inst->opcode == FS_OPCODE_PLACEHOLDER_HALT) { + if (inst->opcode == FS_OPCODE_DISCARD_LANDING_PAD) { placeholder_halt = inst; break; } @@ -4232,10 +4218,9 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, } if (prog_data->uses_kill) { - assert(bld.group() < 16); ubld.group(1, 0).MOV(retype(component(header, 15), BRW_REGISTER_TYPE_UW), - brw_flag_reg(0, 1)); + brw_flag_subreg(inst->flag_subreg + inst->group / 16)); } assert(length == 0); @@ -7100,8 +7085,6 @@ fs_visitor::optimize() OPT(opt_peephole_sel); } - OPT(opt_redundant_discard_jumps); - if (OPT(lower_load_payload)) { split_virtual_grfs(); OPT(register_coalesce); @@ -7616,24 +7599,13 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send) emit_interpolation_setup_gen6(); } - /* We handle discards by keeping track of the still-live pixels in f0.1. - * Initialize it with the dispatched pixels. - */ - if (wm_prog_data->uses_kill) { - const fs_reg dispatch_mask = - devinfo->gen >= 6 ? brw_vec1_grf(1, 7) : brw_vec1_grf(0, 0); - bld.exec_all().group(1, 0) - .MOV(retype(brw_flag_reg(0, 1), BRW_REGISTER_TYPE_UW), - retype(dispatch_mask, BRW_REGISTER_TYPE_UW)); - } - emit_nir_code(); if (failed) return false; if (wm_prog_data->uses_kill) - bld.emit(FS_OPCODE_PLACEHOLDER_HALT); + bld.emit(FS_OPCODE_DISCARD_LANDING_PAD); if (wm_key->alpha_test_func) emit_alpha_test(); diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 4b81c65553f..82dc9354c8d 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -185,7 +185,6 @@ public: const fs_reg &texture_handle); void emit_gen6_gather_wa(uint8_t wa, fs_reg dst); fs_reg resolve_source_modifiers(const fs_reg &src); - void emit_discard_jump(); void emit_fsign(const class brw::fs_builder &, const nir_alu_instr *instr, fs_reg result, fs_reg *op, unsigned fsign_src); bool opt_peephole_sel(); @@ -513,7 +512,7 @@ private: struct brw_reg dst, struct brw_reg src, unsigned swiz); - bool patch_discard_jumps_to_fb_writes(); + void patch_discard_jumps_to_landing_pad(const fs_inst *inst); const struct brw_compiler *compiler; void *log_data; /* Passed to compiler->*_log functions */ diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h index 70f6e795e70..9a01b9aef43 100644 --- a/src/intel/compiler/brw_fs_builder.h +++ b/src/intel/compiler/brw_fs_builder.h @@ -251,8 +251,6 @@ namespace brw { { if (shader->stage != MESA_SHADER_FRAGMENT) { return brw_imm_d(0xffffffff); - } else if (brw_wm_prog_data(shader->stage_prog_data)->uses_kill) { - return brw_flag_reg(0, 1); } else { assert(shader->devinfo->gen >= 6 && dispatch_width() <= 16); return retype(brw_vec1_grf((_group >= 16 ? 2 : 1), 7), diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 10a12eafc76..7d891c21a5b 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -221,41 +221,109 @@ public: int ip; }; -bool -fs_generator::patch_discard_jumps_to_fb_writes() +void +fs_generator::patch_discard_jumps_to_landing_pad(const fs_inst *inst) { - if (devinfo->gen < 6 || this->discard_halt_patches.is_empty()) - return false; + const int scale = brw_jump_scale(p->devinfo); - int scale = brw_jump_scale(p->devinfo); + p->exit_insn_offset = p->next_insn_offset; - /* There is a somewhat strange undocumented requirement of using - * HALT, according to the simulator. If some channel has HALTed to - * a particular UIP, then by the end of the program, every channel - * must have HALTed to that UIP. Furthermore, the tracking is a - * stack, so you can't do the final halt of a UIP after starting - * halting to a new UIP. - * - * Symptoms of not emitting this instruction on actual hardware - * included GPU hangs and sparkly rendering on the piglit discard - * tests. - */ - brw_inst *last_halt = gen6_HALT(p); - brw_inst_set_uip(p->devinfo, last_halt, 1 * scale); - brw_inst_set_jip(p->devinfo, last_halt, 1 * scale); + if (devinfo->gen >= 8) { + brw_push_insn_state(p); + brw_set_default_exec_size(p, BRW_EXECUTE_1); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); - int ip = p->nr_insn; + const brw_reg_type t = (inst->exec_size > 16 ? BRW_REGISTER_TYPE_UD : + BRW_REGISTER_TYPE_UW); + + brw_MOV(p, retype(brw_flag_reg(0, inst->flag_subreg), t), + retype(brw_mask_reg(0), t)); + + brw_pop_insn_state(p); + + } else if (devinfo->gen >= 6) { + const brw_reg_type t = (inst->exec_size > 16 ? BRW_REGISTER_TYPE_UD : + BRW_REGISTER_TYPE_UW); + brw_inst *zero = brw_MOV(p, retype(brw_flag_reg(0, inst->flag_subreg), t), + brw_imm_uw(0)); + brw_inst_set_exec_size(devinfo, zero, BRW_EXECUTE_1); + brw_inst_set_mask_control(devinfo, zero, BRW_MASK_DISABLE); + + brw_inst *mov = brw_MOV(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW), + brw_imm_uw(0)); + brw_inst_set_cond_modifier(devinfo, mov, BRW_CONDITIONAL_Z); + brw_inst_set_flag_subreg_nr(devinfo, mov, inst->flag_subreg); + } + if (devinfo->gen >= 7) { + /* If some channel has BRCed to a particular UIP, then by the + * end of the program, every channel must have BRCed to that + * UIP. Furthermore, the tracking is a stack, so you can't do + * the final branch to a UIP after starting branching to a new + * UIP. + */ + brw_inst *last_converging = gen7_BRC(p); + brw_inst_set_uip(p->devinfo, last_converging, scale); + } + + const int ip = p->nr_insn; foreach_in_list(ip_record, patch_ip, &discard_halt_patches) { brw_inst *patch = &p->store[patch_ip->ip]; - assert(brw_inst_opcode(p->devinfo, patch) == BRW_OPCODE_HALT); + assert(brw_inst_opcode(p->devinfo, patch) == BRW_OPCODE_HALT || + brw_inst_opcode(p->devinfo, patch) == BRW_OPCODE_BRC); + /* HALT takes a half-instruction distance from the pre-incremented IP. */ - brw_inst_set_uip(p->devinfo, patch, (ip - patch_ip->ip) * scale); + if (devinfo->gen >= 6) + brw_inst_set_uip(p->devinfo, patch, (ip - patch_ip->ip) * scale); + else + brw_inst_set_gen4_jump_count(devinfo, patch, + (ip - patch_ip->ip) * scale); + } + + this->discard_halt_patches.make_empty(); + + if (devinfo->gen < 6) { + brw_inst *fetch = brw_AND(p, brw_flag_reg(0, inst->flag_subreg), + brw_mask_reg(0 /* AMASK */), + retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW)); + brw_inst_set_exec_size(devinfo, fetch, BRW_EXECUTE_1); + brw_inst_set_mask_control(devinfo, fetch, BRW_MASK_DISABLE); + brw_inst_set_qtr_control(devinfo, fetch, BRW_COMPRESSION_NONE); + brw_inst_set_thread_control(devinfo, fetch, BRW_THREAD_SWITCH); } - this->discard_halt_patches.make_empty(); - return true; + if (devinfo->gen == 4 && !devinfo->is_g4x) { + /* Workaround for the following: + * + * [DevBW, DevCL] Erratum: The subfields in mask stack register are + * reset to zero during graphics reset, however, they are not + * initialized at thread dispatch. These subfields will retain the + * values from the previous thread. Software should make sure the mask + * stack is empty (reset to zero) before terminating the thread. In case + * that this is not practical, software may have to reset the mask stack + * at the beginning of each kernel, which will impact the performance. + * + * Luckily we can rely on: + * + * [DevBW, DevCL] This register access restriction is not applicable, + * hardware does ensure execution pipeline coherency, when a mask stack + * register is used as an explicit source and/or destination. + */ + brw_push_insn_state(p); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); + + brw_set_default_exec_size(p, BRW_EXECUTE_2); + brw_MOV(p, vec2(brw_mask_stack_depth_reg(0)), brw_imm_uw(0)); + + brw_set_default_exec_size(p, BRW_EXECUTE_16); + /* Reset the if stack. */ + brw_MOV(p, retype(brw_mask_stack_reg(0), BRW_REGISTER_TYPE_UW), + brw_imm_uw(0)); + + brw_pop_insn_state(p); + } } void @@ -1332,14 +1400,17 @@ fs_generator::generate_ddy(const fs_inst *inst, void fs_generator::generate_discard_jump(fs_inst *) { - assert(devinfo->gen >= 6); - - /* This HALT will be patched up at FB write time to point UIP at the end of - * the program, and at brw_uip_jip() JIP will be set to the end of the - * current block (or the program). + /* On Gen6+ This HALT will be patched up to point UIP at the placeholder + * HALT instruction in the discard landing pad, and at brw_set_uip_jip() + * JIP will be set to the end of the current block (or the beginning of the + * discard landing pad). */ this->discard_halt_patches.push_tail(new(mem_ctx) ip_record(p->nr_insn)); - gen6_HALT(p); + + if (devinfo->gen >= 7) + gen7_BRC(p); + else + brw_HALT(p); } void @@ -2198,15 +2269,11 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) generate_pack_half_2x16_split(inst, dst, src[0], src[1]); break; - case FS_OPCODE_PLACEHOLDER_HALT: + case FS_OPCODE_DISCARD_LANDING_PAD: /* This is the place where the final HALT needs to be inserted if * we've emitted any discards. If not, this will emit no code. */ - if (!patch_discard_jumps_to_fb_writes()) { - if (unlikely(debug_flag)) { - disasm_info->use_tail = true; - } - } + patch_discard_jumps_to_landing_pad(inst); break; case FS_OPCODE_INTERPOLATE_AT_SAMPLE: diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index f6c209b77e4..3456d974b6b 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -3352,17 +3352,14 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, } case nir_intrinsic_demote: - case nir_intrinsic_discard: case nir_intrinsic_demote_if: + abort(); + + case nir_intrinsic_discard: case nir_intrinsic_discard_if: { - /* We track our discarded pixels in f0.1. By predicating on it, we can - * update just the flag bits that aren't yet discarded. If there's no - * condition, we emit a CMP of g0 != g0, so all currently executing - * channels will get turned off. - */ fs_inst *cmp = NULL; - if (instr->intrinsic == nir_intrinsic_demote_if || - instr->intrinsic == nir_intrinsic_discard_if) { + + if (instr->intrinsic == nir_intrinsic_discard_if) { nir_alu_instr *alu = nir_src_as_alu_instr(instr->src[0]); if (alu != NULL && @@ -3387,40 +3384,21 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, cmp = (fs_inst *) instructions.get_tail(); if (cmp->conditional_mod == BRW_CONDITIONAL_NONE) { if (cmp->can_do_cmod()) - cmp->conditional_mod = BRW_CONDITIONAL_Z; + cmp->conditional_mod = BRW_CONDITIONAL_NZ; else cmp = NULL; - } else { - /* The old sequence that would have been generated is, - * basically, bool_result == false. This is equivalent to - * !bool_result, so negate the old modifier. - */ - cmp->conditional_mod = brw_negate_cmod(cmp->conditional_mod); } } if (cmp == NULL) { cmp = bld.CMP(bld.null_reg_f(), get_nir_src(instr->src[0]), - brw_imm_d(0), BRW_CONDITIONAL_Z); + brw_imm_d(0), BRW_CONDITIONAL_NZ); } - } else { - fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0), - BRW_REGISTER_TYPE_UW)); - cmp = bld.CMP(bld.null_reg_f(), some_reg, some_reg, BRW_CONDITIONAL_NZ); - } - - cmp->predicate = BRW_PREDICATE_NORMAL; - cmp->flag_subreg = 1; - - if (devinfo->gen >= 6) { - /* Due to the way we implement discard, the jump will only happen - * when the whole quad is discarded. So we can do this even for - * demote as it won't break its uniformity promises. - */ - emit_discard_jump(); } - limit_dispatch_width(16, "Fragment discard/demote not implemented in SIMD32 mode."); + fs_inst *jump = bld.emit(FS_OPCODE_DISCARD_JUMP); + if (instr->intrinsic == nir_intrinsic_discard_if) + set_predicate(BRW_PREDICATE_NORMAL, jump); break; } diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index 76d15acbca8..6a9794ca8bc 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -359,7 +359,6 @@ fs_visitor::emit_alpha_test() cond_for_alpha_func(key->alpha_test_func)); } cmp->predicate = BRW_PREDICATE_NORMAL; - cmp->flag_subreg = 1; } fs_inst * @@ -393,10 +392,8 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld, fs_inst *write = bld.emit(FS_OPCODE_FB_WRITE_LOGICAL, fs_reg(), sources, ARRAY_SIZE(sources)); - if (prog_data->uses_kill) { + if (prog_data->uses_kill) write->predicate = BRW_PREDICATE_NORMAL; - write->flag_subreg = 1; - } return write; } diff --git a/src/intel/compiler/brw_reg.h b/src/intel/compiler/brw_reg.h index 4543d841c66..94d053e5ce4 100644 --- a/src/intel/compiler/brw_reg.h +++ b/src/intel/compiler/brw_reg.h @@ -939,6 +939,21 @@ brw_dmask_reg() } static inline struct brw_reg +brw_mask_stack_reg(unsigned subnr) +{ + return suboffset(retype(brw_vec16_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_MASK_STACK, 0), + BRW_REGISTER_TYPE_UB), subnr); +} + +static inline struct brw_reg +brw_mask_stack_depth_reg(unsigned subnr) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_MASK_STACK_DEPTH, subnr); +} + +static inline struct brw_reg brw_message_reg(unsigned nr) { return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0); diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index 4fed1492ff7..b07f941e004 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -1019,7 +1019,7 @@ instruction_scheduler::add_dep(schedule_node *before, schedule_node *after) static bool is_scheduling_barrier(const backend_instruction *inst) { - return inst->opcode == FS_OPCODE_PLACEHOLDER_HALT || + return inst->opcode == FS_OPCODE_DISCARD_LANDING_PAD || inst->is_control_flow() || inst->has_side_effects(); } diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index e6f6f827c44..3ade32ac865 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -427,8 +427,8 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) case FS_OPCODE_PACK_HALF_2x16_SPLIT: return "pack_half_2x16_split"; - case FS_OPCODE_PLACEHOLDER_HALT: - return "placeholder_halt"; + case FS_OPCODE_DISCARD_LANDING_PAD: + return "discard_landing_pad"; case FS_OPCODE_INTERPOLATE_AT_SAMPLE: return "interp_sample"; |