diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs.cpp')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 158 |
1 files changed, 151 insertions, 7 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index e91351cb6c..4de64f0927 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -35,6 +35,7 @@ extern "C" { #include "program/prog_parameter.h" #include "program/prog_print.h" #include "program/prog_optimize.h" +#include "program/register_allocate.h" #include "program/sampler.h" #include "program/hash_table.h" #include "brw_context.h" @@ -448,6 +449,7 @@ public: void assign_curb_setup(); void assign_urb_setup(); void assign_regs(); + void assign_regs_trivial(); void generate_code(); void generate_fb_write(fs_inst *inst); void generate_linterp(fs_inst *inst, struct brw_reg dst, @@ -1997,7 +1999,7 @@ fs_visitor::assign_urb_setup() } static void -trivial_assign_reg(int *reg_hw_locations, fs_reg *reg) +assign_reg(int *reg_hw_locations, fs_reg *reg) { if (reg->file == GRF && reg->reg != 0) { reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset; @@ -2006,7 +2008,7 @@ trivial_assign_reg(int *reg_hw_locations, fs_reg *reg) } void -fs_visitor::assign_regs() +fs_visitor::assign_regs_trivial() { int last_grf = 0; int hw_reg_mapping[this->virtual_grf_next]; @@ -2020,18 +2022,157 @@ fs_visitor::assign_regs() } last_grf = hw_reg_mapping[i - 1] + this->virtual_grf_sizes[i - 1]; - /* FINISHME: trivial assignment of register numbers */ foreach_iter(exec_list_iterator, iter, this->instructions) { fs_inst *inst = (fs_inst *)iter.get(); - trivial_assign_reg(hw_reg_mapping, &inst->dst); - trivial_assign_reg(hw_reg_mapping, &inst->src[0]); - trivial_assign_reg(hw_reg_mapping, &inst->src[1]); + assign_reg(hw_reg_mapping, &inst->dst); + assign_reg(hw_reg_mapping, &inst->src[0]); + assign_reg(hw_reg_mapping, &inst->src[1]); } this->grf_used = last_grf + 1; } +void +fs_visitor::assign_regs() +{ + int last_grf = 0; + int hw_reg_mapping[this->virtual_grf_next + 1]; + int base_reg_count = BRW_MAX_GRF - this->first_non_payload_grf; + int class_sizes[base_reg_count]; + int class_count = 0; + + /* Set up the register classes. + * + * The base registers store a scalar value. For texture samples, + * we get virtual GRFs composed of 4 contiguous hw register. For + * structures and arrays, we store them as contiguous larger things + * than that, though we should be able to do better most of the + * time. + */ + class_sizes[class_count++] = 1; + for (int r = 1; r < this->virtual_grf_next; r++) { + int i; + + for (i = 0; i < class_count; i++) { + if (class_sizes[i] == this->virtual_grf_sizes[r]) + break; + } + if (i == class_count) { + class_sizes[class_count++] = this->virtual_grf_sizes[r]; + } + } + + int ra_reg_count = 0; + int class_base_reg[class_count]; + int class_reg_count[class_count]; + int classes[class_count]; + + for (int i = 0; i < class_count; i++) { + class_base_reg[i] = ra_reg_count; + class_reg_count[i] = base_reg_count - (class_sizes[i] - 1); + ra_reg_count += class_reg_count[i]; + } + + struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count); + for (int i = 0; i < class_count; i++) { + classes[i] = ra_alloc_reg_class(regs); + + for (int i_r = 0; i_r < class_reg_count[i]; i_r++) { + ra_class_add_reg(regs, classes[i], class_base_reg[i] + i_r); + } + + /* Add conflicts between our contiguous registers aliasing + * base regs and other register classes' contiguous registers + * that alias base regs, or the base regs themselves for classes[0]. + */ + for (int c = 0; c <= i; c++) { + for (int i_r = 0; i_r < class_reg_count[i] - 1; i_r++) { + for (int c_r = MAX2(0, i_r - (class_sizes[c] - 1)); + c_r <= MIN2(class_reg_count[c] - 1, i_r + class_sizes[i] - 1); + c_r++) { + + if (0) { + printf("%d/%d conflicts %d/%d\n", + class_sizes[i], i_r, + class_sizes[c], c_r); + } + + ra_add_reg_conflict(regs, + class_base_reg[i] + i_r, + class_base_reg[c] + c_r); + } + } + } + } + + ra_set_finalize(regs); + + struct ra_graph *g = ra_alloc_interference_graph(regs, + this->virtual_grf_next); + /* Node 0 is just a placeholder to keep virtual_grf[] mapping 1:1 + * with nodes. + */ + ra_set_node_class(g, 0, classes[0]); + + /* FINISHME: Proper interference (live interval analysis) */ + for (int i = 1; i < this->virtual_grf_next; i++) { + for (int c = 0; c < class_count; c++) { + if (class_sizes[c] == this->virtual_grf_sizes[i]) { + ra_set_node_class(g, i, classes[c]); + break; + } + } + + for (int j = 1; j < i; j++) { + ra_add_node_interference(g, i, j); + } + } + + /* FINISHME: Handle spilling */ + if (!ra_allocate_no_spills(g)) { + fprintf(stderr, "Failed to allocate registers.\n"); + this->fail = true; + return; + } + + /* Get the chosen virtual registers for each node, and map virtual + * regs in the register classes back down to real hardware reg + * numbers. + */ + hw_reg_mapping[0] = 0; /* unused */ + for (int i = 1; i < this->virtual_grf_next; i++) { + int reg = ra_get_node_reg(g, i); + int hw_reg = -1; + + for (int c = 0; c < class_count; c++) { + if (reg >= class_base_reg[c] && + reg < class_base_reg[c] + class_reg_count[c] - 1) { + hw_reg = reg - class_base_reg[c]; + break; + } + } + + assert(hw_reg != -1); + hw_reg_mapping[i] = this->first_non_payload_grf + hw_reg; + last_grf = MAX2(last_grf, + hw_reg_mapping[i] + this->virtual_grf_sizes[i] - 1); + } + + foreach_iter(exec_list_iterator, iter, this->instructions) { + fs_inst *inst = (fs_inst *)iter.get(); + + assign_reg(hw_reg_mapping, &inst->dst); + assign_reg(hw_reg_mapping, &inst->src[0]); + assign_reg(hw_reg_mapping, &inst->src[1]); + } + + this->grf_used = last_grf + 1; + + talloc_free(g); + talloc_free(regs); +} + static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) { struct brw_reg brw_reg; @@ -2319,7 +2460,10 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) v.emit_fb_writes(); v.assign_curb_setup(); v.assign_urb_setup(); - v.assign_regs(); + if (0) + v.assign_regs_trivial(); + else + v.assign_regs(); } v.generate_code(); |