summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2016-03-15 18:00:22 -0700
committerEric Anholt <eric@anholt.net>2016-03-16 11:28:47 -0700
commit2b9f0dffe00bdc556436da02c099b8a50ecc4f49 (patch)
tree8fbf52ef7079ea8d08cfb6822012faea156baae2
parent7c9fc439150188612c7fe595cbe0180fcea3e705 (diff)
vc4: Move discard handling to the condition flag.
Now that the field exists in the instruction, we can make discards less special. As a bonus, that means that we should be able to merge some more .sf instructions together when we get around to that. This causes some scheduling changes, as it allows tlb_color_reads to be delayed past the discard condition setup. Since the tlb_color_read ends up later, this may mean performance improvements, but I haven't tested. total instructions in shared programs: 78114 -> 78035 (-0.10%) instructions in affected programs: 1922 -> 1843 (-4.11%) total estimated cycles in shared programs: 234318 -> 234329 (0.00%) estimated cycles in affected programs: 8200 -> 8211 (0.13%)
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c19
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c1
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h16
-rw-r--r--src/gallium/drivers/vc4/vc4_qir_schedule.c5
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c22
5 files changed, 29 insertions, 34 deletions
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 81e8e9150d..f5826d8517 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1184,8 +1184,11 @@ emit_frag_end(struct vc4_compile *c)
color = qir_uniform_ui(c, 0);
}
- if (c->discard.file != QFILE_NULL)
- qir_TLB_DISCARD_SETUP(c, c->discard);
+ uint32_t discard_cond = QPU_COND_ALWAYS;
+ if (c->discard.file != QFILE_NULL) {
+ qir_SF(c, c->discard);
+ discard_cond = QPU_COND_ZS;
+ }
if (c->fs_key->stencil_enabled) {
qir_TLB_STENCIL_SETUP(c, qir_uniform(c, QUNIFORM_STENCIL, 0));
@@ -1209,14 +1212,18 @@ emit_frag_end(struct vc4_compile *c)
} else {
z = qir_FRAG_Z(c);
}
- qir_TLB_Z_WRITE(c, z);
+ struct qinst *inst = qir_TLB_Z_WRITE(c, z);
+ inst->cond = discard_cond;
}
if (!c->msaa_per_sample_output) {
- qir_TLB_COLOR_WRITE(c, color);
+ struct qinst *inst = qir_TLB_COLOR_WRITE(c, color);
+ inst->cond = discard_cond;
} else {
- for (int i = 0; i < VC4_MAX_SAMPLES; i++)
- qir_TLB_COLOR_WRITE_MS(c, c->sample_colors[i]);
+ for (int i = 0; i < VC4_MAX_SAMPLES; i++) {
+ struct qinst *inst = qir_TLB_COLOR_WRITE_MS(c, c->sample_colors[i]);
+ inst->cond = discard_cond;
+ }
}
}
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 125a9525e1..e73e389941 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -69,7 +69,6 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_RSQ] = { "rsq", 1, 1, false, true },
[QOP_EXP2] = { "exp2", 1, 2, false, true },
[QOP_LOG2] = { "log2", 1, 2, false, true },
- [QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true },
[QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true },
[QOP_TLB_Z_WRITE] = { "tlb_z", 0, 1, true },
[QOP_TLB_COLOR_WRITE] = { "tlb_color", 0, 1, true },
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 4f39d72f55..3fbf5d749e 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -101,7 +101,6 @@ enum qop {
QOP_LOG2,
QOP_VW_SETUP,
QOP_VR_SETUP,
- QOP_TLB_DISCARD_SETUP,
QOP_TLB_STENCIL_SETUP,
QOP_TLB_Z_WRITE,
QOP_TLB_COLOR_WRITE,
@@ -551,17 +550,23 @@ qir_##name##_dest(struct vc4_compile *c, struct qreg dest, \
}
#define QIR_NODST_1(name) \
-static inline void \
+static inline struct qinst * \
qir_##name(struct vc4_compile *c, struct qreg a) \
{ \
- qir_emit(c, qir_inst(QOP_##name, c->undef, a, c->undef)); \
+ struct qinst *inst = qir_inst(QOP_##name, c->undef, \
+ a, c->undef); \
+ qir_emit(c, inst); \
+ return inst; \
}
#define QIR_NODST_2(name) \
-static inline void \
+static inline struct qinst * \
qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \
{ \
- qir_emit(c, qir_inst(QOP_##name, c->undef, a, b)); \
+ struct qinst *inst = qir_inst(QOP_##name, c->undef, \
+ a, b); \
+ qir_emit(c, inst); \
+ return inst; \
}
#define QIR_PACK(name) \
@@ -623,7 +628,6 @@ QIR_ALU0(TLB_COLOR_READ)
QIR_NODST_1(TLB_COLOR_WRITE)
QIR_NODST_1(TLB_COLOR_WRITE_MS)
QIR_NODST_1(TLB_Z_WRITE)
-QIR_NODST_1(TLB_DISCARD_SETUP)
QIR_NODST_1(TLB_STENCIL_SETUP)
QIR_NODST_1(MS_MASK)
diff --git a/src/gallium/drivers/vc4/vc4_qir_schedule.c b/src/gallium/drivers/vc4/vc4_qir_schedule.c
index ee1e9aafbb..186e81be75 100644
--- a/src/gallium/drivers/vc4/vc4_qir_schedule.c
+++ b/src/gallium/drivers/vc4/vc4_qir_schedule.c
@@ -236,11 +236,6 @@ calculate_deps(struct schedule_setup_state *state, struct schedule_node *n)
add_write_dep(dir, &state->last_tlb, n);
break;
- case QOP_TLB_DISCARD_SETUP:
- add_write_dep(dir, &state->last_sf, n);
- add_write_dep(dir, &state->last_tlb, n);
- break;
-
default:
break;
}
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 450b97fc01..b507e37068 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -171,7 +171,6 @@ void
vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
{
struct qpu_reg *temp_registers = vc4_register_allocate(vc4, c);
- bool discard = false;
uint32_t inputs_remaining = c->num_inputs;
uint32_t vpm_read_fifo_count = 0;
uint32_t vpm_read_offset = 0;
@@ -375,12 +374,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
*/
break;
- case QOP_TLB_DISCARD_SETUP:
- discard = true;
- queue(c, qpu_a_MOV(src[0], src[0]) | unpack);
- *last_inst(c) |= QPU_SF;
- break;
-
case QOP_TLB_STENCIL_SETUP:
assert(!unpack);
queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_STENCIL_SETUP),
@@ -390,9 +383,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
case QOP_TLB_Z_WRITE:
queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_Z),
src[0]) | unpack);
- if (discard) {
- set_last_cond_add(c, QPU_COND_ZS);
- }
+ set_last_cond_add(c, qinst->cond);
+ handled_qinst_cond = true;
break;
case QOP_TLB_COLOR_READ:
@@ -406,16 +398,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
case QOP_TLB_COLOR_WRITE:
queue(c, qpu_a_MOV(qpu_tlbc(), src[0]) | unpack);
- if (discard) {
- set_last_cond_add(c, QPU_COND_ZS);
- }
+ set_last_cond_add(c, qinst->cond);
+ handled_qinst_cond = true;
break;
case QOP_TLB_COLOR_WRITE_MS:
queue(c, qpu_a_MOV(qpu_tlbc_ms(), src[0]));
- if (discard) {
- set_last_cond_add(c, QPU_COND_ZS);
- }
+ set_last_cond_add(c, qinst->cond);
+ handled_qinst_cond = true;
break;
case QOP_VARY_ADD_C: