summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2010-09-29 12:08:11 -0700
committerEric Anholt <eric@anholt.net>2010-09-29 16:40:31 -0700
commite1261d3c493ff48348483a0084f3017c7e663dc0 (patch)
tree4314191e191bd940fe971929ff71e498415149f6
parent9ff90b7230cceb362c7e0fe1c3d5029b8cbfe6bd (diff)
i965: First cut at register allocation using graph coloring.
The interference is totally bogus (maximal), so this is equivalent to our trivial register assignment before. As in, passes the same set of piglit tests.
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp158
1 files changed, 151 insertions, 7 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index e91351cb6c..4de64f0927 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -35,6 +35,7 @@ extern "C" {
#include "program/prog_parameter.h"
#include "program/prog_print.h"
#include "program/prog_optimize.h"
+#include "program/register_allocate.h"
#include "program/sampler.h"
#include "program/hash_table.h"
#include "brw_context.h"
@@ -448,6 +449,7 @@ public:
void assign_curb_setup();
void assign_urb_setup();
void assign_regs();
+ void assign_regs_trivial();
void generate_code();
void generate_fb_write(fs_inst *inst);
void generate_linterp(fs_inst *inst, struct brw_reg dst,
@@ -1997,7 +1999,7 @@ fs_visitor::assign_urb_setup()
}
static void
-trivial_assign_reg(int *reg_hw_locations, fs_reg *reg)
+assign_reg(int *reg_hw_locations, fs_reg *reg)
{
if (reg->file == GRF && reg->reg != 0) {
reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset;
@@ -2006,7 +2008,7 @@ trivial_assign_reg(int *reg_hw_locations, fs_reg *reg)
}
void
-fs_visitor::assign_regs()
+fs_visitor::assign_regs_trivial()
{
int last_grf = 0;
int hw_reg_mapping[this->virtual_grf_next];
@@ -2020,18 +2022,157 @@ fs_visitor::assign_regs()
}
last_grf = hw_reg_mapping[i - 1] + this->virtual_grf_sizes[i - 1];
- /* FINISHME: trivial assignment of register numbers */
foreach_iter(exec_list_iterator, iter, this->instructions) {
fs_inst *inst = (fs_inst *)iter.get();
- trivial_assign_reg(hw_reg_mapping, &inst->dst);
- trivial_assign_reg(hw_reg_mapping, &inst->src[0]);
- trivial_assign_reg(hw_reg_mapping, &inst->src[1]);
+ assign_reg(hw_reg_mapping, &inst->dst);
+ assign_reg(hw_reg_mapping, &inst->src[0]);
+ assign_reg(hw_reg_mapping, &inst->src[1]);
}
this->grf_used = last_grf + 1;
}
+void
+fs_visitor::assign_regs()
+{
+ int last_grf = 0;
+ int hw_reg_mapping[this->virtual_grf_next + 1];
+ int base_reg_count = BRW_MAX_GRF - this->first_non_payload_grf;
+ int class_sizes[base_reg_count];
+ int class_count = 0;
+
+ /* Set up the register classes.
+ *
+ * The base registers store a scalar value. For texture samples,
+ * we get virtual GRFs composed of 4 contiguous hw register. For
+ * structures and arrays, we store them as contiguous larger things
+ * than that, though we should be able to do better most of the
+ * time.
+ */
+ class_sizes[class_count++] = 1;
+ for (int r = 1; r < this->virtual_grf_next; r++) {
+ int i;
+
+ for (i = 0; i < class_count; i++) {
+ if (class_sizes[i] == this->virtual_grf_sizes[r])
+ break;
+ }
+ if (i == class_count) {
+ class_sizes[class_count++] = this->virtual_grf_sizes[r];
+ }
+ }
+
+ int ra_reg_count = 0;
+ int class_base_reg[class_count];
+ int class_reg_count[class_count];
+ int classes[class_count];
+
+ for (int i = 0; i < class_count; i++) {
+ class_base_reg[i] = ra_reg_count;
+ class_reg_count[i] = base_reg_count - (class_sizes[i] - 1);
+ ra_reg_count += class_reg_count[i];
+ }
+
+ struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count);
+ for (int i = 0; i < class_count; i++) {
+ classes[i] = ra_alloc_reg_class(regs);
+
+ for (int i_r = 0; i_r < class_reg_count[i]; i_r++) {
+ ra_class_add_reg(regs, classes[i], class_base_reg[i] + i_r);
+ }
+
+ /* Add conflicts between our contiguous registers aliasing
+ * base regs and other register classes' contiguous registers
+ * that alias base regs, or the base regs themselves for classes[0].
+ */
+ for (int c = 0; c <= i; c++) {
+ for (int i_r = 0; i_r < class_reg_count[i] - 1; i_r++) {
+ for (int c_r = MAX2(0, i_r - (class_sizes[c] - 1));
+ c_r <= MIN2(class_reg_count[c] - 1, i_r + class_sizes[i] - 1);
+ c_r++) {
+
+ if (0) {
+ printf("%d/%d conflicts %d/%d\n",
+ class_sizes[i], i_r,
+ class_sizes[c], c_r);
+ }
+
+ ra_add_reg_conflict(regs,
+ class_base_reg[i] + i_r,
+ class_base_reg[c] + c_r);
+ }
+ }
+ }
+ }
+
+ ra_set_finalize(regs);
+
+ struct ra_graph *g = ra_alloc_interference_graph(regs,
+ this->virtual_grf_next);
+ /* Node 0 is just a placeholder to keep virtual_grf[] mapping 1:1
+ * with nodes.
+ */
+ ra_set_node_class(g, 0, classes[0]);
+
+ /* FINISHME: Proper interference (live interval analysis) */
+ for (int i = 1; i < this->virtual_grf_next; i++) {
+ for (int c = 0; c < class_count; c++) {
+ if (class_sizes[c] == this->virtual_grf_sizes[i]) {
+ ra_set_node_class(g, i, classes[c]);
+ break;
+ }
+ }
+
+ for (int j = 1; j < i; j++) {
+ ra_add_node_interference(g, i, j);
+ }
+ }
+
+ /* FINISHME: Handle spilling */
+ if (!ra_allocate_no_spills(g)) {
+ fprintf(stderr, "Failed to allocate registers.\n");
+ this->fail = true;
+ return;
+ }
+
+ /* Get the chosen virtual registers for each node, and map virtual
+ * regs in the register classes back down to real hardware reg
+ * numbers.
+ */
+ hw_reg_mapping[0] = 0; /* unused */
+ for (int i = 1; i < this->virtual_grf_next; i++) {
+ int reg = ra_get_node_reg(g, i);
+ int hw_reg = -1;
+
+ for (int c = 0; c < class_count; c++) {
+ if (reg >= class_base_reg[c] &&
+ reg < class_base_reg[c] + class_reg_count[c] - 1) {
+ hw_reg = reg - class_base_reg[c];
+ break;
+ }
+ }
+
+ assert(hw_reg != -1);
+ hw_reg_mapping[i] = this->first_non_payload_grf + hw_reg;
+ last_grf = MAX2(last_grf,
+ hw_reg_mapping[i] + this->virtual_grf_sizes[i] - 1);
+ }
+
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ fs_inst *inst = (fs_inst *)iter.get();
+
+ assign_reg(hw_reg_mapping, &inst->dst);
+ assign_reg(hw_reg_mapping, &inst->src[0]);
+ assign_reg(hw_reg_mapping, &inst->src[1]);
+ }
+
+ this->grf_used = last_grf + 1;
+
+ talloc_free(g);
+ talloc_free(regs);
+}
+
static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
{
struct brw_reg brw_reg;
@@ -2319,7 +2460,10 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
v.emit_fb_writes();
v.assign_curb_setup();
v.assign_urb_setup();
- v.assign_regs();
+ if (0)
+ v.assign_regs_trivial();
+ else
+ v.assign_regs();
}
v.generate_code();