summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2012-06-04 16:00:32 -0700
committerEric Anholt <eric@anholt.net>2013-10-10 15:54:14 -0700
commit45ffaeccaf412f322605a7c7488c6ab0d85fc4b6 (patch)
tree4ebc5f44c833358e06498de068a05227fd6b0267
parent5af8388110595f6324d697f0b468047c779f1079 (diff)
i965/fs: Do live variables dataflow analysis on a per-channel level.
This significantly improves our handling of VGRFs of size > 1. Previously, we only marked VGRFs as def'd if the whole register was written by a single instruction. Large VGRFs which were written piecemeal would not be considered def'd at all, even if they were ultimately completely written. Without being def'd, these were then marked "live in" to the basic block, often extending the range to preceding blocks and sometimes even the start of the program. The new per-component tracking gives more accurate live intervals, which makes register coalescing more effective. In the future, this should help with texturing from GRFs on Gen7+. A sampler message might be represented by a 2-register VGRF which holds the texture coordinates. If those are incoming varyings, they'll be produced by two PLN instructions, which are piecemeal writes. No reduction in shader-db instruction counts. However, code which prints the live interval ranges does show that some VGRFs now have smaller (and more correct) live intervals. v2: Rebase on current send-from-GRF code requiring adding extra use[]s. v3: Rebase on live intervals fix to include defs in the end of the interval. v4 (Kenneth Graunke): Rebase; split off a few preparatory patches; add lots of comments; minor style changes; rewrite commit message. v5 (Eric Anholt): whitespace nit. Written-by: Eric Anholt <eric@anholt.net> [v1-3] Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> [v4] Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Eric Anholt <eric@anholt.net> (v4)
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp78
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_live_variables.h12
2 files changed, 73 insertions, 17 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
index 8abed8fdfb..4e9825f1e7 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
@@ -32,8 +32,19 @@ using namespace brw;
/** @file brw_fs_live_variables.cpp
*
- * Support for computing at the basic block level which variables
- * (virtual GRFs in our case) are live at entry and exit.
+ * Support for calculating liveness information about virtual GRFs.
+ *
+ * This produces a live interval for each whole virtual GRF. We could
+ * choose to expose per-component live intervals for VGRFs of size > 1,
+ * but we currently do not. It is easier for the consumers of this
+ * information to work with whole VGRFs.
+ *
+ * However, we internally track use/def information at the per-component
+ * (reg_offset) level for greater accuracy. Large VGRFs may be accessed
+ * piecemeal over many (possibly non-adjacent) instructions. In this case,
+ * examining a single instruction is insufficient to decide whether a whole
+ * VGRF is ultimately used or defined. Tracking individual components
+ * allows us to easily assemble this information.
*
* See Muchnik's Advanced Compiler Design and Implementation, section
* 14.1 (p444).
@@ -45,6 +56,8 @@ using namespace brw;
* The basic-block-level live variable analysis needs to know which
* variables get used before they're completely defined, and which
* variables are completely defined before they're used.
+ *
+ * These are tracked at the per-component level, rather than whole VGRFs.
*/
void
fs_live_variables::setup_def_use()
@@ -67,22 +80,32 @@ fs_live_variables::setup_def_use()
if (inst->src[i].file != GRF)
continue;
- int reg = inst->src[i].reg;
+ int regs_read = 1;
+ /* We don't know how many components are read in a send-from-grf,
+ * so just assume "all of them."
+ */
+ if (inst->is_send_from_grf())
+ regs_read = v->virtual_grf_sizes[inst->src[i].reg];
+
+ for (int j = 0; j < regs_read; j++) {
+ int var = var_from_vgrf[inst->src[i].reg] +
+ inst->src[i].reg_offset + j;
- if (!BITSET_TEST(bd[b].def, reg))
- BITSET_SET(bd[b].use, reg);
+ if (!BITSET_TEST(bd[b].def, var))
+ BITSET_SET(bd[b].use, var);
+ }
}
/* Check for unconditional writes to whole registers. These
* are the things that screen off preceding definitions of a
* variable, and thus qualify for being in def[].
*/
- if (inst->dst.file == GRF &&
- inst->regs_written == v->virtual_grf_sizes[inst->dst.reg] &&
- !inst->is_partial_write()) {
- int reg = inst->dst.reg;
- if (!BITSET_TEST(bd[b].use, reg))
- BITSET_SET(bd[b].def, reg);
+ if (inst->dst.file == GRF && !inst->is_partial_write()) {
+ int var = var_from_vgrf[inst->dst.reg] + inst->dst.reg_offset;
+ for (int j = 0; j < inst->regs_written; j++) {
+ if (!BITSET_TEST(bd[b].use, var + j))
+ BITSET_SET(bd[b].def, var + j);
+ }
}
ip++;
@@ -139,9 +162,23 @@ fs_live_variables::fs_live_variables(fs_visitor *v, cfg_t *cfg)
mem_ctx = ralloc_context(cfg->mem_ctx);
num_vgrfs = v->virtual_grf_count;
+ num_vars = 0;
+ var_from_vgrf = rzalloc_array(mem_ctx, int, num_vgrfs);
+ for (int i = 0; i < num_vgrfs; i++) {
+ var_from_vgrf[i] = num_vars;
+ num_vars += v->virtual_grf_sizes[i];
+ }
+
+ vgrf_from_var = rzalloc_array(mem_ctx, int, num_vars);
+ for (int i = 0; i < num_vgrfs; i++) {
+ for (int j = 0; j < v->virtual_grf_sizes[i]; j++) {
+ vgrf_from_var[var_from_vgrf[i] + j] = i;
+ }
+ }
+
bd = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
- bitset_words = BITSET_WORDS(v->virtual_grf_count);
+ bitset_words = BITSET_WORDS(num_vars);
for (int i = 0; i < cfg->num_blocks; i++) {
bd[i].def = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
bd[i].use = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
@@ -160,6 +197,12 @@ fs_live_variables::~fs_live_variables()
#define MAX_INSTRUCTION (1 << 30)
+/**
+ * Compute the live intervals for each virtual GRF.
+ *
+ * This uses the per-component use/def data, but combines it to produce
+ * information about whole VGRFs.
+ */
void
fs_visitor::calculate_live_intervals()
{
@@ -242,15 +285,16 @@ fs_visitor::calculate_live_intervals()
fs_live_variables livevars(this, &cfg);
for (int b = 0; b < cfg.num_blocks; b++) {
- for (int i = 0; i < num_vgrfs; i++) {
+ for (int i = 0; i < livevars.num_vars; i++) {
+ int vgrf = livevars.vgrf_from_var[i];
if (BITSET_TEST(livevars.bd[b].livein, i)) {
- start[i] = MIN2(start[i], cfg.blocks[b]->start_ip);
- end[i] = MAX2(end[i], cfg.blocks[b]->start_ip);
+ start[vgrf] = MIN2(start[vgrf], cfg.blocks[b]->start_ip);
+ end[vgrf] = MAX2(end[vgrf], cfg.blocks[b]->start_ip);
}
if (BITSET_TEST(livevars.bd[b].liveout, i)) {
- start[i] = MIN2(start[i], cfg.blocks[b]->end_ip);
- end[i] = MAX2(end[i], cfg.blocks[b]->end_ip);
+ start[vgrf] = MIN2(start[vgrf], cfg.blocks[b]->end_ip);
+ end[vgrf] = MAX2(end[vgrf], cfg.blocks[b]->end_ip);
}
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.h b/src/mesa/drivers/dri/i965/brw_fs_live_variables.h
index c518755c50..ab612cce16 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.h
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.h
@@ -65,6 +65,18 @@ public:
cfg_t *cfg;
void *mem_ctx;
+ /** Map from virtual GRF number to index in block_data arrays. */
+ int *var_from_vgrf;
+
+ /**
+ * Map from any index in block_data to the virtual GRF containing it.
+ *
+ * For virtual_grf_sizes of [1, 2, 3], vgrf_from_var would contain
+ * [0, 1, 1, 2, 2, 2].
+ */
+ int *vgrf_from_var;
+
+ int num_vars;
int num_vgrfs;
int bitset_words;