summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrancisco Jerez <currojerez@riseup.net>2019-07-01 13:46:00 -0700
committerFrancisco Jerez <currojerez@riseup.net>2019-08-22 06:39:54 -0700
commit7dc67176a5ad0df95d12df8a0034eb08f6df848f (patch)
tree89cb808832d007126ee2795999842f7a6042581c
parent5910492e76f9e2a2b51c5e3595138e9857bc3ba2 (diff)
WIP: intel/fs: Rework discard handling.for-felix
-rw-r--r--src/intel/compiler/brw_disasm.c9
-rw-r--r--src/intel/compiler/brw_eu.c2
-rw-r--r--src/intel/compiler/brw_eu.h11
-rw-r--r--src/intel/compiler/brw_eu_compact.c2
-rw-r--r--src/intel/compiler/brw_eu_defines.h2
-rw-r--r--src/intel/compiler/brw_eu_emit.c36
-rw-r--r--src/intel/compiler/brw_fs.cpp38
-rw-r--r--src/intel/compiler/brw_fs.h3
-rw-r--r--src/intel/compiler/brw_fs_builder.h2
-rw-r--r--src/intel/compiler/brw_fs_generator.cpp139
-rw-r--r--src/intel/compiler/brw_fs_nir.cpp42
-rw-r--r--src/intel/compiler/brw_fs_visitor.cpp5
-rw-r--r--src/intel/compiler/brw_reg.h15
-rw-r--r--src/intel/compiler/brw_schedule_instructions.cpp2
-rw-r--r--src/intel/compiler/brw_shader.cpp4
15 files changed, 192 insertions, 120 deletions
diff --git a/src/intel/compiler/brw_disasm.c b/src/intel/compiler/brw_disasm.c
index 8b7047db00f..b53aff9594f 100644
--- a/src/intel/compiler/brw_disasm.c
+++ b/src/intel/compiler/brw_disasm.c
@@ -44,7 +44,8 @@ has_jip(const struct gen_device_info *devinfo, enum opcode opcode)
opcode == BRW_OPCODE_WHILE ||
opcode == BRW_OPCODE_BREAK ||
opcode == BRW_OPCODE_CONTINUE ||
- opcode == BRW_OPCODE_HALT;
+ opcode == BRW_OPCODE_HALT ||
+ opcode == BRW_OPCODE_BRC;
}
static bool
@@ -57,7 +58,8 @@ has_uip(const struct gen_device_info *devinfo, enum opcode opcode)
(devinfo->gen >= 8 && opcode == BRW_OPCODE_ELSE) ||
opcode == BRW_OPCODE_BREAK ||
opcode == BRW_OPCODE_CONTINUE ||
- opcode == BRW_OPCODE_HALT;
+ opcode == BRW_OPCODE_HALT ||
+ opcode == BRW_OPCODE_BRC;
}
static bool
@@ -697,6 +699,9 @@ reg(FILE *file, unsigned _reg_file, unsigned _reg_nr)
format(file, "mask%d", _reg_nr & 0x0f);
break;
case BRW_ARF_MASK_STACK:
+ format(file, "ms%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_MASK_STACK_DEPTH:
format(file, "msd%d", _reg_nr & 0x0f);
break;
case BRW_ARF_STATE:
diff --git a/src/intel/compiler/brw_eu.c b/src/intel/compiler/brw_eu.c
index 882293e981b..fe865057025 100644
--- a/src/intel/compiler/brw_eu.c
+++ b/src/intel/compiler/brw_eu.c
@@ -341,6 +341,8 @@ brw_init_codegen(const struct gen_device_info *devinfo,
p->loop_stack_array_size = 16;
p->loop_stack = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);
p->if_depth_in_loop = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);
+
+ p->exit_insn_offset = ~0;
}
diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
index cb23c9ff51a..013c34bca21 100644
--- a/src/intel/compiler/brw_eu.h
+++ b/src/intel/compiler/brw_eu.h
@@ -132,6 +132,14 @@ struct brw_codegen {
int *if_depth_in_loop;
int loop_stack_depth;
int loop_stack_array_size;
+
+ /**
+ * Instruction offset the EU will be made to jump to in the case of a
+ * uniform HALT condition. This will typically point to a short sequence
+ * of instructions used to read out the final active channel mask and
+ * optionally re-enable any disabled channels.
+ */
+ unsigned exit_insn_offset;
};
void brw_pop_insn_state( struct brw_codegen *p );
@@ -1067,7 +1075,8 @@ brw_inst *brw_WHILE(struct brw_codegen *p);
brw_inst *brw_BREAK(struct brw_codegen *p);
brw_inst *brw_CONT(struct brw_codegen *p);
-brw_inst *gen6_HALT(struct brw_codegen *p);
+brw_inst *brw_HALT(struct brw_codegen *p);
+brw_inst *gen7_BRC(struct brw_codegen *p);
/* Forward jumps:
*/
diff --git a/src/intel/compiler/brw_eu_compact.c b/src/intel/compiler/brw_eu_compact.c
index daebdca8e37..a0934fcfa91 100644
--- a/src/intel/compiler/brw_eu_compact.c
+++ b/src/intel/compiler/brw_eu_compact.c
@@ -1628,7 +1628,7 @@ brw_compact_instructions(struct brw_codegen *p, int start_offset,
break;
case BRW_OPCODE_IF:
- case BRW_OPCODE_IFF:
+ case BRW_OPCODE_IFF: /* Also BRW_OPCODE_BRC */
case BRW_OPCODE_ELSE:
case BRW_OPCODE_ENDIF:
case BRW_OPCODE_WHILE:
diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h
index b33ea6deee1..a358b8c2ecb 100644
--- a/src/intel/compiler/brw_eu_defines.h
+++ b/src/intel/compiler/brw_eu_defines.h
@@ -550,9 +550,9 @@ enum opcode {
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4,
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL,
FS_OPCODE_DISCARD_JUMP,
+ FS_OPCODE_DISCARD_LANDING_PAD,
FS_OPCODE_SET_SAMPLE_ID,
FS_OPCODE_PACK_HALF_2x16_SPLIT,
- FS_OPCODE_PLACEHOLDER_HALT,
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET,
diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
index 60761e83c62..fe0ccf2e1f5 100644
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -1671,7 +1671,7 @@ brw_CONT(struct brw_codegen *p)
}
brw_inst *
-gen6_HALT(struct brw_codegen *p)
+brw_HALT(struct brw_codegen *p)
{
const struct gen_device_info *devinfo = p->devinfo;
brw_inst *insn;
@@ -1680,9 +1680,13 @@ gen6_HALT(struct brw_codegen *p)
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
if (devinfo->gen >= 8) {
brw_set_src0(p, insn, brw_imm_d(0x0));
- } else {
+ } else if (devinfo->gen >= 6) {
brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
+ } else {
+ brw_set_dest(p, insn, brw_ip_reg());
+ brw_set_src0(p, insn, brw_ip_reg());
+ brw_set_src1(p, insn, brw_imm_d(0x0));
}
brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
@@ -1690,6 +1694,24 @@ gen6_HALT(struct brw_codegen *p)
return insn;
}
+brw_inst *
+gen7_BRC(struct brw_codegen *p)
+{
+ const struct gen_device_info *devinfo = p->devinfo;
+ brw_inst *insn = next_insn(p, BRW_OPCODE_BRC);
+
+ if (devinfo->gen >= 8) {
+ brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src0(p, insn, brw_imm_d(0x0));
+ } else {
+ assert(devinfo->gen >= 7);
+ brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src1(p, insn, brw_imm_d(0x0));
+ }
+ return insn;
+}
+
/* DO/WHILE loop:
*
* The DO/WHILE is just an unterminated loop -- break or continue are
@@ -2710,6 +2732,9 @@ brw_find_next_block_end(struct brw_codegen *p, int start_offset)
offset = next_offset(devinfo, store, offset)) {
brw_inst *insn = store + offset;
+ if (offset == p->exit_insn_offset)
+ return offset;
+
switch (brw_inst_opcode(devinfo, insn)) {
case BRW_OPCODE_IF:
depth++;
@@ -2727,7 +2752,6 @@ brw_find_next_block_end(struct brw_codegen *p, int start_offset)
continue;
/* fallthrough */
case BRW_OPCODE_ELSE:
- case BRW_OPCODE_HALT:
if (depth == 0)
return offset;
}
@@ -2835,6 +2859,12 @@ brw_set_uip_jip(struct brw_codegen *p, int start_offset)
assert(brw_inst_uip(devinfo, insn) != 0);
assert(brw_inst_jip(devinfo, insn) != 0);
break;
+
+ case BRW_OPCODE_BRC:
+ brw_inst_set_jip(devinfo, insn,
+ block_end_offset == 0 ? brw_inst_uip(devinfo, insn) :
+ (block_end_offset - offset) / scale);
+ break;
}
}
}
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 5c225fb0a3b..78994216b12 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -1071,7 +1071,8 @@ fs_inst::flags_written() const
opcode != BRW_OPCODE_IF &&
opcode != BRW_OPCODE_WHILE)) ||
opcode == SHADER_OPCODE_FIND_LIVE_CHANNEL ||
- opcode == FS_OPCODE_FB_WRITE) {
+ opcode == FS_OPCODE_FB_WRITE ||
+ opcode == FS_OPCODE_DISCARD_LANDING_PAD) {
return flag_mask(this);
} else {
return flag_mask(dst, size_written);
@@ -1499,21 +1500,6 @@ fs_visitor::resolve_source_modifiers(const fs_reg &src)
}
void
-fs_visitor::emit_discard_jump()
-{
- assert(brw_wm_prog_data(this->prog_data)->uses_kill);
-
- /* For performance, after a discard, jump to the end of the
- * shader if all relevant channels have been discarded.
- */
- fs_inst *discard_jump = bld.emit(FS_OPCODE_DISCARD_JUMP);
- discard_jump->flag_subreg = 1;
-
- discard_jump->predicate = BRW_PREDICATE_ALIGN1_ANY4H;
- discard_jump->predicate_inverse = true;
-}
-
-void
fs_visitor::emit_gs_thread_end()
{
assert(stage == MESA_SHADER_GEOMETRY);
@@ -2976,7 +2962,7 @@ fs_visitor::opt_redundant_discard_jumps()
fs_inst *placeholder_halt = NULL;
foreach_inst_in_block_reverse(fs_inst, inst, last_bblock) {
- if (inst->opcode == FS_OPCODE_PLACEHOLDER_HALT) {
+ if (inst->opcode == FS_OPCODE_DISCARD_LANDING_PAD) {
placeholder_halt = inst;
break;
}
@@ -4232,10 +4218,9 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
}
if (prog_data->uses_kill) {
- assert(bld.group() < 16);
ubld.group(1, 0).MOV(retype(component(header, 15),
BRW_REGISTER_TYPE_UW),
- brw_flag_reg(0, 1));
+ brw_flag_subreg(inst->flag_subreg + inst->group / 16));
}
assert(length == 0);
@@ -7100,8 +7085,6 @@ fs_visitor::optimize()
OPT(opt_peephole_sel);
}
- OPT(opt_redundant_discard_jumps);
-
if (OPT(lower_load_payload)) {
split_virtual_grfs();
OPT(register_coalesce);
@@ -7616,24 +7599,13 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
emit_interpolation_setup_gen6();
}
- /* We handle discards by keeping track of the still-live pixels in f0.1.
- * Initialize it with the dispatched pixels.
- */
- if (wm_prog_data->uses_kill) {
- const fs_reg dispatch_mask =
- devinfo->gen >= 6 ? brw_vec1_grf(1, 7) : brw_vec1_grf(0, 0);
- bld.exec_all().group(1, 0)
- .MOV(retype(brw_flag_reg(0, 1), BRW_REGISTER_TYPE_UW),
- retype(dispatch_mask, BRW_REGISTER_TYPE_UW));
- }
-
emit_nir_code();
if (failed)
return false;
if (wm_prog_data->uses_kill)
- bld.emit(FS_OPCODE_PLACEHOLDER_HALT);
+ bld.emit(FS_OPCODE_DISCARD_LANDING_PAD);
if (wm_key->alpha_test_func)
emit_alpha_test();
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 4b81c65553f..82dc9354c8d 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -185,7 +185,6 @@ public:
const fs_reg &texture_handle);
void emit_gen6_gather_wa(uint8_t wa, fs_reg dst);
fs_reg resolve_source_modifiers(const fs_reg &src);
- void emit_discard_jump();
void emit_fsign(const class brw::fs_builder &, const nir_alu_instr *instr,
fs_reg result, fs_reg *op, unsigned fsign_src);
bool opt_peephole_sel();
@@ -513,7 +512,7 @@ private:
struct brw_reg dst, struct brw_reg src,
unsigned swiz);
- bool patch_discard_jumps_to_fb_writes();
+ void patch_discard_jumps_to_landing_pad(const fs_inst *inst);
const struct brw_compiler *compiler;
void *log_data; /* Passed to compiler->*_log functions */
diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h
index 70f6e795e70..9a01b9aef43 100644
--- a/src/intel/compiler/brw_fs_builder.h
+++ b/src/intel/compiler/brw_fs_builder.h
@@ -251,8 +251,6 @@ namespace brw {
{
if (shader->stage != MESA_SHADER_FRAGMENT) {
return brw_imm_d(0xffffffff);
- } else if (brw_wm_prog_data(shader->stage_prog_data)->uses_kill) {
- return brw_flag_reg(0, 1);
} else {
assert(shader->devinfo->gen >= 6 && dispatch_width() <= 16);
return retype(brw_vec1_grf((_group >= 16 ? 2 : 1), 7),
diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp
index 10a12eafc76..7d891c21a5b 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -221,41 +221,109 @@ public:
int ip;
};
-bool
-fs_generator::patch_discard_jumps_to_fb_writes()
+void
+fs_generator::patch_discard_jumps_to_landing_pad(const fs_inst *inst)
{
- if (devinfo->gen < 6 || this->discard_halt_patches.is_empty())
- return false;
+ const int scale = brw_jump_scale(p->devinfo);
- int scale = brw_jump_scale(p->devinfo);
+ p->exit_insn_offset = p->next_insn_offset;
- /* There is a somewhat strange undocumented requirement of using
- * HALT, according to the simulator. If some channel has HALTed to
- * a particular UIP, then by the end of the program, every channel
- * must have HALTed to that UIP. Furthermore, the tracking is a
- * stack, so you can't do the final halt of a UIP after starting
- * halting to a new UIP.
- *
- * Symptoms of not emitting this instruction on actual hardware
- * included GPU hangs and sparkly rendering on the piglit discard
- * tests.
- */
- brw_inst *last_halt = gen6_HALT(p);
- brw_inst_set_uip(p->devinfo, last_halt, 1 * scale);
- brw_inst_set_jip(p->devinfo, last_halt, 1 * scale);
+ if (devinfo->gen >= 8) {
+ brw_push_insn_state(p);
+ brw_set_default_exec_size(p, BRW_EXECUTE_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
- int ip = p->nr_insn;
+ const brw_reg_type t = (inst->exec_size > 16 ? BRW_REGISTER_TYPE_UD :
+ BRW_REGISTER_TYPE_UW);
+
+ brw_MOV(p, retype(brw_flag_reg(0, inst->flag_subreg), t),
+ retype(brw_mask_reg(0), t));
+
+ brw_pop_insn_state(p);
+
+ } else if (devinfo->gen >= 6) {
+ const brw_reg_type t = (inst->exec_size > 16 ? BRW_REGISTER_TYPE_UD :
+ BRW_REGISTER_TYPE_UW);
+ brw_inst *zero = brw_MOV(p, retype(brw_flag_reg(0, inst->flag_subreg), t),
+ brw_imm_uw(0));
+ brw_inst_set_exec_size(devinfo, zero, BRW_EXECUTE_1);
+ brw_inst_set_mask_control(devinfo, zero, BRW_MASK_DISABLE);
+
+ brw_inst *mov = brw_MOV(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW),
+ brw_imm_uw(0));
+ brw_inst_set_cond_modifier(devinfo, mov, BRW_CONDITIONAL_Z);
+ brw_inst_set_flag_subreg_nr(devinfo, mov, inst->flag_subreg);
+ }
+ if (devinfo->gen >= 7) {
+ /* If some channel has BRCed to a particular UIP, then by the
+ * end of the program, every channel must have BRCed to that
+ * UIP. Furthermore, the tracking is a stack, so you can't do
+ * the final branch to a UIP after starting branching to a new
+ * UIP.
+ */
+ brw_inst *last_converging = gen7_BRC(p);
+ brw_inst_set_uip(p->devinfo, last_converging, scale);
+ }
+
+ const int ip = p->nr_insn;
foreach_in_list(ip_record, patch_ip, &discard_halt_patches) {
brw_inst *patch = &p->store[patch_ip->ip];
- assert(brw_inst_opcode(p->devinfo, patch) == BRW_OPCODE_HALT);
+ assert(brw_inst_opcode(p->devinfo, patch) == BRW_OPCODE_HALT ||
+ brw_inst_opcode(p->devinfo, patch) == BRW_OPCODE_BRC);
+
/* HALT takes a half-instruction distance from the pre-incremented IP. */
- brw_inst_set_uip(p->devinfo, patch, (ip - patch_ip->ip) * scale);
+ if (devinfo->gen >= 6)
+ brw_inst_set_uip(p->devinfo, patch, (ip - patch_ip->ip) * scale);
+ else
+ brw_inst_set_gen4_jump_count(devinfo, patch,
+ (ip - patch_ip->ip) * scale);
+ }
+
+ this->discard_halt_patches.make_empty();
+
+ if (devinfo->gen < 6) {
+ brw_inst *fetch = brw_AND(p, brw_flag_reg(0, inst->flag_subreg),
+ brw_mask_reg(0 /* AMASK */),
+ retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW));
+ brw_inst_set_exec_size(devinfo, fetch, BRW_EXECUTE_1);
+ brw_inst_set_mask_control(devinfo, fetch, BRW_MASK_DISABLE);
+ brw_inst_set_qtr_control(devinfo, fetch, BRW_COMPRESSION_NONE);
+ brw_inst_set_thread_control(devinfo, fetch, BRW_THREAD_SWITCH);
}
- this->discard_halt_patches.make_empty();
- return true;
+ if (devinfo->gen == 4 && !devinfo->is_g4x) {
+ /* Workaround for the following:
+ *
+ * [DevBW, DevCL] Erratum: The subfields in mask stack register are
+ * reset to zero during graphics reset, however, they are not
+ * initialized at thread dispatch. These subfields will retain the
+ * values from the previous thread. Software should make sure the mask
+ * stack is empty (reset to zero) before terminating the thread. In case
+ * that this is not practical, software may have to reset the mask stack
+ * at the beginning of each kernel, which will impact the performance.
+ *
+ * Luckily we can rely on:
+ *
+ * [DevBW, DevCL] This register access restriction is not applicable,
+ * hardware does ensure execution pipeline coherency, when a mask stack
+ * register is used as an explicit source and/or destination.
+ */
+ brw_push_insn_state(p);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
+
+ brw_set_default_exec_size(p, BRW_EXECUTE_2);
+ brw_MOV(p, vec2(brw_mask_stack_depth_reg(0)), brw_imm_uw(0));
+
+ brw_set_default_exec_size(p, BRW_EXECUTE_16);
+ /* Reset the if stack. */
+ brw_MOV(p, retype(brw_mask_stack_reg(0), BRW_REGISTER_TYPE_UW),
+ brw_imm_uw(0));
+
+ brw_pop_insn_state(p);
+ }
}
void
@@ -1332,14 +1400,17 @@ fs_generator::generate_ddy(const fs_inst *inst,
void
fs_generator::generate_discard_jump(fs_inst *)
{
- assert(devinfo->gen >= 6);
-
- /* This HALT will be patched up at FB write time to point UIP at the end of
- * the program, and at brw_uip_jip() JIP will be set to the end of the
- * current block (or the program).
+ /* On Gen6+ This HALT will be patched up to point UIP at the placeholder
+ * HALT instruction in the discard landing pad, and at brw_set_uip_jip()
+ * JIP will be set to the end of the current block (or the beginning of the
+ * discard landing pad).
*/
this->discard_halt_patches.push_tail(new(mem_ctx) ip_record(p->nr_insn));
- gen6_HALT(p);
+
+ if (devinfo->gen >= 7)
+ gen7_BRC(p);
+ else
+ brw_HALT(p);
}
void
@@ -2198,15 +2269,11 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
generate_pack_half_2x16_split(inst, dst, src[0], src[1]);
break;
- case FS_OPCODE_PLACEHOLDER_HALT:
+ case FS_OPCODE_DISCARD_LANDING_PAD:
/* This is the place where the final HALT needs to be inserted if
* we've emitted any discards. If not, this will emit no code.
*/
- if (!patch_discard_jumps_to_fb_writes()) {
- if (unlikely(debug_flag)) {
- disasm_info->use_tail = true;
- }
- }
+ patch_discard_jumps_to_landing_pad(inst);
break;
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index f6c209b77e4..3456d974b6b 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -3352,17 +3352,14 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
}
case nir_intrinsic_demote:
- case nir_intrinsic_discard:
case nir_intrinsic_demote_if:
+ abort();
+
+ case nir_intrinsic_discard:
case nir_intrinsic_discard_if: {
- /* We track our discarded pixels in f0.1. By predicating on it, we can
- * update just the flag bits that aren't yet discarded. If there's no
- * condition, we emit a CMP of g0 != g0, so all currently executing
- * channels will get turned off.
- */
fs_inst *cmp = NULL;
- if (instr->intrinsic == nir_intrinsic_demote_if ||
- instr->intrinsic == nir_intrinsic_discard_if) {
+
+ if (instr->intrinsic == nir_intrinsic_discard_if) {
nir_alu_instr *alu = nir_src_as_alu_instr(instr->src[0]);
if (alu != NULL &&
@@ -3387,40 +3384,21 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
cmp = (fs_inst *) instructions.get_tail();
if (cmp->conditional_mod == BRW_CONDITIONAL_NONE) {
if (cmp->can_do_cmod())
- cmp->conditional_mod = BRW_CONDITIONAL_Z;
+ cmp->conditional_mod = BRW_CONDITIONAL_NZ;
else
cmp = NULL;
- } else {
- /* The old sequence that would have been generated is,
- * basically, bool_result == false. This is equivalent to
- * !bool_result, so negate the old modifier.
- */
- cmp->conditional_mod = brw_negate_cmod(cmp->conditional_mod);
}
}
if (cmp == NULL) {
cmp = bld.CMP(bld.null_reg_f(), get_nir_src(instr->src[0]),
- brw_imm_d(0), BRW_CONDITIONAL_Z);
+ brw_imm_d(0), BRW_CONDITIONAL_NZ);
}
- } else {
- fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0),
- BRW_REGISTER_TYPE_UW));
- cmp = bld.CMP(bld.null_reg_f(), some_reg, some_reg, BRW_CONDITIONAL_NZ);
- }
-
- cmp->predicate = BRW_PREDICATE_NORMAL;
- cmp->flag_subreg = 1;
-
- if (devinfo->gen >= 6) {
- /* Due to the way we implement discard, the jump will only happen
- * when the whole quad is discarded. So we can do this even for
- * demote as it won't break its uniformity promises.
- */
- emit_discard_jump();
}
- limit_dispatch_width(16, "Fragment discard/demote not implemented in SIMD32 mode.");
+ fs_inst *jump = bld.emit(FS_OPCODE_DISCARD_JUMP);
+ if (instr->intrinsic == nir_intrinsic_discard_if)
+ set_predicate(BRW_PREDICATE_NORMAL, jump);
break;
}
diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp
index 76d15acbca8..6a9794ca8bc 100644
--- a/src/intel/compiler/brw_fs_visitor.cpp
+++ b/src/intel/compiler/brw_fs_visitor.cpp
@@ -359,7 +359,6 @@ fs_visitor::emit_alpha_test()
cond_for_alpha_func(key->alpha_test_func));
}
cmp->predicate = BRW_PREDICATE_NORMAL;
- cmp->flag_subreg = 1;
}
fs_inst *
@@ -393,10 +392,8 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld,
fs_inst *write = bld.emit(FS_OPCODE_FB_WRITE_LOGICAL, fs_reg(),
sources, ARRAY_SIZE(sources));
- if (prog_data->uses_kill) {
+ if (prog_data->uses_kill)
write->predicate = BRW_PREDICATE_NORMAL;
- write->flag_subreg = 1;
- }
return write;
}
diff --git a/src/intel/compiler/brw_reg.h b/src/intel/compiler/brw_reg.h
index 4543d841c66..94d053e5ce4 100644
--- a/src/intel/compiler/brw_reg.h
+++ b/src/intel/compiler/brw_reg.h
@@ -939,6 +939,21 @@ brw_dmask_reg()
}
static inline struct brw_reg
+brw_mask_stack_reg(unsigned subnr)
+{
+ return suboffset(retype(brw_vec16_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_MASK_STACK, 0),
+ BRW_REGISTER_TYPE_UB), subnr);
+}
+
+static inline struct brw_reg
+brw_mask_stack_depth_reg(unsigned subnr)
+{
+ return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_MASK_STACK_DEPTH, subnr);
+}
+
+static inline struct brw_reg
brw_message_reg(unsigned nr)
{
return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0);
diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp
index 4fed1492ff7..b07f941e004 100644
--- a/src/intel/compiler/brw_schedule_instructions.cpp
+++ b/src/intel/compiler/brw_schedule_instructions.cpp
@@ -1019,7 +1019,7 @@ instruction_scheduler::add_dep(schedule_node *before, schedule_node *after)
static bool
is_scheduling_barrier(const backend_instruction *inst)
{
- return inst->opcode == FS_OPCODE_PLACEHOLDER_HALT ||
+ return inst->opcode == FS_OPCODE_DISCARD_LANDING_PAD ||
inst->is_control_flow() ||
inst->has_side_effects();
}
diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp
index e6f6f827c44..3ade32ac865 100644
--- a/src/intel/compiler/brw_shader.cpp
+++ b/src/intel/compiler/brw_shader.cpp
@@ -427,8 +427,8 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
case FS_OPCODE_PACK_HALF_2x16_SPLIT:
return "pack_half_2x16_split";
- case FS_OPCODE_PLACEHOLDER_HALT:
- return "placeholder_halt";
+ case FS_OPCODE_DISCARD_LANDING_PAD:
+ return "discard_landing_pad";
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
return "interp_sample";