summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Turner <mattst88@gmail.com>2014-04-12 17:40:18 -0700
committerMatt Turner <mattst88@gmail.com>2014-04-15 09:25:11 -0700
commitf34f39330bb41fb0a86930908de10353193a841d (patch)
tree60124c3f1f1955c558b7cfcdc394d072c1ca960b
parent596737ee91cc199a8edff5dc440736471e28f297 (diff)
i965/fs: Reimplement dead_code_elimination().
total instructions in shared programs: 1653399 -> 1651790 (-0.10%) instructions in affected programs: 92157 -> 90548 (-1.75%) GAINED: 2 LOST: 2 Also significantly reduces the number of optimization loop iterations: total loop iterations in shared programs: 39724 -> 31651 (-20.32%) loop iterations in affected programs: 21617 -> 13544 (-37.35%) Including some great pathological cases, like 29 -> 3 in Strike Suit Zero and 24 -> 3 in Dota2. Reviewed-by: Eric Anholt <eric@anholt.net>
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.sources1
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp57
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp115
3 files changed, 117 insertions, 56 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 8205fe9baa1..836c62b7e75 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -58,6 +58,7 @@ i965_FILES = \
brw_fs_channel_expressions.cpp \
brw_fs_copy_propagation.cpp \
brw_fs_cse.cpp \
+ brw_fs_dead_code_eliminate.cpp \
brw_fs_fp.cpp \
brw_fs_generator.cpp \
brw_fs_live_variables.cpp \
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 85a5463e020..c723bf0ead4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2085,61 +2085,6 @@ fs_visitor::opt_algebraic()
return progress;
}
-/**
- * Removes any instructions writing a VGRF where that VGRF is not used by any
- * later instruction.
- */
-bool
-fs_visitor::dead_code_eliminate()
-{
- bool progress = false;
- int pc = 0;
-
- calculate_live_intervals();
-
- foreach_list_safe(node, &this->instructions) {
- fs_inst *inst = (fs_inst *)node;
-
- if (inst->dst.file == GRF && !inst->has_side_effects()) {
- bool dead = true;
-
- for (int i = 0; i < inst->regs_written; i++) {
- int var = live_intervals->var_from_vgrf[inst->dst.reg];
- assert(live_intervals->end[var + inst->dst.reg_offset + i] >= pc);
- if (live_intervals->end[var + inst->dst.reg_offset + i] != pc) {
- dead = false;
- break;
- }
- }
-
- if (dead) {
- /* Don't dead code eliminate instructions that write to the
- * accumulator as a side-effect. Instead just set the destination
- * to the null register to free it.
- */
- switch (inst->opcode) {
- case BRW_OPCODE_ADDC:
- case BRW_OPCODE_SUBB:
- case BRW_OPCODE_MACH:
- inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type));
- break;
- default:
- inst->remove();
- progress = true;
- break;
- }
- }
- }
-
- pc++;
- }
-
- if (progress)
- invalidate_live_intervals();
-
- return progress;
-}
-
struct dead_code_hash_key
{
int vgrf;
@@ -3249,8 +3194,8 @@ fs_visitor::run()
progress = opt_cse() || progress;
progress = opt_copy_propagate() || progress;
progress = opt_peephole_predicated_break() || progress;
- progress = dead_code_eliminate() || progress;
progress = dead_code_eliminate_local() || progress;
+ progress = dead_code_eliminate() || progress;
progress = opt_peephole_sel() || progress;
progress = dead_control_flow_eliminate(this) || progress;
progress = opt_saturate_propagation() || progress;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
new file mode 100644
index 00000000000..390ac9ada5b
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
@@ -0,0 +1,115 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_fs.h"
+#include "brw_fs_live_variables.h"
+#include "brw_cfg.h"
+
+/** @file brw_fs_dead_code_eliminate.cpp
+ *
+ * Dataflow-aware dead code elimination.
+ *
+ * Walks the instruction list from the bottom, removing instructions that
+ * have results that both aren't used in later blocks and haven't been read
+ * yet in the tail end of this block.
+ */
+
+bool
+fs_visitor::dead_code_eliminate()
+{
+ bool progress = false;
+
+ cfg_t cfg(&instructions);
+
+ calculate_live_intervals();
+
+ int num_vars = live_intervals->num_vars;
+ BITSET_WORD *live = ralloc_array(NULL, BITSET_WORD, BITSET_WORDS(num_vars));
+
+ for (int b = 0; b < cfg.num_blocks; b++) {
+ bblock_t *block = cfg.blocks[b];
+ memcpy(live, live_intervals->bd[b].liveout,
+ sizeof(BITSET_WORD) * BITSET_WORDS(num_vars));
+
+ for (fs_inst *inst = (fs_inst *)block->end;
+ inst != block->start->prev;
+ inst = (fs_inst *)inst->prev) {
+ if (inst->dst.file == GRF &&
+ !inst->has_side_effects() &&
+ !inst->writes_flag()) {
+ bool result_live = false;
+
+ if (inst->regs_written == 1) {
+ int var = live_intervals->var_from_reg(&inst->dst);
+ result_live = BITSET_TEST(live, var);
+ } else {
+ int var = live_intervals->var_from_vgrf[inst->dst.reg];
+ for (int i = 0; i < inst->regs_written; i++) {
+ result_live = result_live || BITSET_TEST(live, var + i);
+ }
+ }
+
+ if (!result_live) {
+ progress = true;
+
+ switch (inst->opcode) {
+ case BRW_OPCODE_ADDC:
+ case BRW_OPCODE_SUBB:
+ case BRW_OPCODE_MACH:
+ inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type));
+ break;
+ default:
+ inst->opcode = BRW_OPCODE_NOP;
+ continue;
+ }
+ }
+ }
+
+ for (int i = 0; i < 3; i++) {
+ if (inst->src[i].file == GRF) {
+ int var = live_intervals->var_from_vgrf[inst->src[i].reg];
+
+ for (int j = 0; j < inst->regs_read(this, i); j++) {
+ BITSET_SET(live, var + inst->src[i].reg_offset + j);
+ }
+ }
+ }
+ }
+ }
+
+ ralloc_free(live);
+
+ if (progress) {
+ foreach_list_safe(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
+
+ if (inst->opcode == BRW_OPCODE_NOP) {
+ inst->remove();
+ }
+ }
+
+ invalidate_live_intervals();
+ }
+
+ return progress;
+}