summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Schürmann <daniel@schuermann.dev>2023-12-18 11:21:08 +0100
committerMarge Bot <emma+marge@anholt.net>2024-03-19 13:06:58 +0000
commit9bbb9f110482bb25d05ae1e07bf9bc25a30ef7a3 (patch)
tree15c0e8836831e67328257d2678afb628d9268cfd
parent3e58a736e4c3468f3d932fc48c8c8b3e539504f0 (diff)
aco: use small_vec as Block::edge_vec for predecessors and successors
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27984>
-rw-r--r--src/amd/compiler/aco_insert_exec_mask.cpp4
-rw-r--r--src/amd/compiler/aco_instruction_selection.cpp2
-rw-r--r--src/amd/compiler/aco_ir.h10
-rw-r--r--src/amd/compiler/aco_live_var_analysis.cpp4
-rw-r--r--src/amd/compiler/aco_lower_to_cssa.cpp8
-rw-r--r--src/amd/compiler/aco_optimizer_postRA.cpp5
-rw-r--r--src/amd/compiler/aco_register_allocation.cpp8
-rw-r--r--src/amd/compiler/aco_spill.cpp17
-rw-r--r--src/amd/compiler/aco_ssa_elimination.cpp2
9 files changed, 29 insertions, 31 deletions
diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp
index 5038d5da680..7ab20c443ef 100644
--- a/src/amd/compiler/aco_insert_exec_mask.cpp
+++ b/src/amd/compiler/aco_insert_exec_mask.cpp
@@ -174,7 +174,7 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instruction>>
{
unsigned idx = block->index;
Builder bld(ctx.program, &instructions);
- std::vector<unsigned>& preds = block->linear_preds;
+ Block::edge_vec& preds = block->linear_preds;
bool restore_exec = false;
/* start block */
@@ -270,7 +270,7 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instruction>>
assert(ctx.info[pred].exec.size() >= info.num_exec_masks);
/* fill the loop header phis */
- std::vector<unsigned>& header_preds = header->linear_preds;
+ Block::edge_vec& header_preds = header->linear_preds;
int instr_idx = 0;
if (info.has_discard && header_preds.size() > 1) {
while (instr_idx < info.num_exec_masks - 1) {
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 0ec8389aa05..9c52e67d41f 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -10021,7 +10021,7 @@ visit_phi(isel_context* ctx, nir_phi_instr* instr)
nir_foreach_phi_src (src, instr)
phi_src[src->pred->index] = src->src.ssa;
- std::vector<unsigned>& preds = logical ? ctx->block->logical_preds : ctx->block->linear_preds;
+ Block::edge_vec& preds = logical ? ctx->block->logical_preds : ctx->block->linear_preds;
unsigned num_operands = 0;
Operand* const operands = (Operand*)alloca(
(std::max(exec_list_length(&instr->srcs), (unsigned)preds.size()) + 1) * sizeof(Operand));
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index 8ee4caeed47..d6f83822278 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -1930,14 +1930,16 @@ struct RegisterDemand {
/* CFG */
struct Block {
+ using edge_vec = small_vec<uint32_t, 2>;
+
float_mode fp_mode;
unsigned index;
unsigned offset = 0;
std::vector<aco_ptr<Instruction>> instructions;
- std::vector<unsigned> logical_preds;
- std::vector<unsigned> linear_preds;
- std::vector<unsigned> logical_succs;
- std::vector<unsigned> linear_succs;
+ edge_vec logical_preds;
+ edge_vec linear_preds;
+ edge_vec logical_succs;
+ edge_vec linear_succs;
RegisterDemand register_demand = RegisterDemand();
uint32_t kind = 0;
int32_t logical_idom = -1;
diff --git a/src/amd/compiler/aco_live_var_analysis.cpp b/src/amd/compiler/aco_live_var_analysis.cpp
index f894dd31b74..35eeeb2eb8f 100644
--- a/src/amd/compiler/aco_live_var_analysis.cpp
+++ b/src/amd/compiler/aco_live_var_analysis.cpp
@@ -263,7 +263,7 @@ process_live_temps_per_block(Program* program, live& lives, Block* block, unsign
} else {
for (unsigned t : live) {
RegClass rc = program->temp_rc[t];
- std::vector<unsigned>& preds = rc.is_linear() ? block->linear_preds : block->logical_preds;
+ Block::edge_vec& preds = rc.is_linear() ? block->linear_preds : block->logical_preds;
#ifndef NDEBUG
if (preds.empty())
@@ -285,7 +285,7 @@ process_live_temps_per_block(Program* program, live& lives, Block* block, unsign
Instruction* insn = block->instructions[phi_idx].get();
assert(is_phi(insn));
/* directly insert into the predecessors live-out set */
- std::vector<unsigned>& preds =
+ Block::edge_vec& preds =
insn->opcode == aco_opcode::p_phi ? block->logical_preds : block->linear_preds;
for (unsigned i = 0; i < preds.size(); ++i) {
Operand& operand = insn->operands[i];
diff --git a/src/amd/compiler/aco_lower_to_cssa.cpp b/src/amd/compiler/aco_lower_to_cssa.cpp
index 3c509ee2f81..7b9eb3be15a 100644
--- a/src/amd/compiler/aco_lower_to_cssa.cpp
+++ b/src/amd/compiler/aco_lower_to_cssa.cpp
@@ -89,7 +89,7 @@ collect_parallelcopies(cssa_ctx& ctx)
if (!def.isTemp())
continue;
- std::vector<unsigned>& preds =
+ Block::edge_vec& preds =
phi->opcode == aco_opcode::p_phi ? block.logical_preds : block.linear_preds;
uint32_t index = ctx.merge_sets.size();
merge_set set;
@@ -194,9 +194,9 @@ intersects(cssa_ctx& ctx, Temp var, Temp parent)
/* parent is defined in a different block than var */
if (node_parent.defined_at < node_var.defined_at) {
/* if the parent is not live-in, they don't interfere */
- std::vector<uint32_t>& preds = var.type() == RegType::vgpr
- ? ctx.program->blocks[block_idx].logical_preds
- : ctx.program->blocks[block_idx].linear_preds;
+ Block::edge_vec& preds = var.type() == RegType::vgpr
+ ? ctx.program->blocks[block_idx].logical_preds
+ : ctx.program->blocks[block_idx].linear_preds;
for (uint32_t pred : preds) {
if (!ctx.live_out[pred].count(parent.id()))
return false;
diff --git a/src/amd/compiler/aco_optimizer_postRA.cpp b/src/amd/compiler/aco_optimizer_postRA.cpp
index 39bbff4a660..e67f56e056d 100644
--- a/src/amd/compiler/aco_optimizer_postRA.cpp
+++ b/src/amd/compiler/aco_optimizer_postRA.cpp
@@ -74,9 +74,8 @@ struct pr_opt_ctx {
instr_idx_by_regs(std::unique_ptr<Idx_array[]>{new Idx_array[p->blocks.size()]})
{}
- ALWAYS_INLINE void reset_block_regs(const std::vector<uint32_t>& preds,
- const unsigned block_index, const unsigned min_reg,
- const unsigned num_regs)
+ ALWAYS_INLINE void reset_block_regs(const Block::edge_vec& preds, const unsigned block_index,
+ const unsigned min_reg, const unsigned num_regs)
{
const unsigned num_preds = preds.size();
const unsigned first_pred = preds[0];
diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp
index 12f8f005817..84ba242ff86 100644
--- a/src/amd/compiler/aco_register_allocation.cpp
+++ b/src/amd/compiler/aco_register_allocation.cpp
@@ -2235,7 +2235,7 @@ get_reg_phi(ra_ctx& ctx, IDSet& live_in, RegisterFile& register_file,
* to move it in this block's predecessors */
aco_opcode opcode =
pc.first.getTemp().is_linear() ? aco_opcode::p_linear_phi : aco_opcode::p_phi;
- std::vector<unsigned>& preds =
+ Block::edge_vec& preds =
pc.first.getTemp().is_linear() ? block.linear_preds : block.logical_preds;
aco_ptr<Instruction> new_phi{
create_instruction<Pseudo_instruction>(opcode, Format::PSEUDO, preds.size(), 1)};
@@ -2354,7 +2354,7 @@ read_variable(ra_ctx& ctx, Temp val, unsigned block_idx)
Temp
handle_live_in(ra_ctx& ctx, Temp val, Block* block)
{
- std::vector<unsigned>& preds = val.is_linear() ? block->linear_preds : block->logical_preds;
+ Block::edge_vec& preds = val.is_linear() ? block->linear_preds : block->logical_preds;
if (preds.size() == 0)
return val;
@@ -2444,7 +2444,7 @@ handle_loop_phis(ra_ctx& ctx, const IDSet& live_in, uint32_t loop_header_idx,
aco_ptr<Instruction>& phi = loop_header.instructions[i];
if (!is_phi(phi))
break;
- const std::vector<unsigned>& preds =
+ const Block::edge_vec& preds =
phi->opcode == aco_opcode::p_phi ? loop_header.logical_preds : loop_header.linear_preds;
for (unsigned j = 1; j < phi->operands.size(); j++) {
Operand& op = phi->operands[j];
@@ -2535,7 +2535,7 @@ init_reg_file(ra_ctx& ctx, const std::vector<IDSet>& live_out_per_block, Block&
for (aco_ptr<Instruction>& instr : block.instructions) {
if (!is_phi(instr))
break;
- const std::vector<unsigned>& preds =
+ const Block::edge_vec& preds =
instr->opcode == aco_opcode::p_phi ? block.logical_preds : block.linear_preds;
for (unsigned i = 0; i < instr->operands.size(); i++) {
diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp
index 2f9e4b0421f..56858da266e 100644
--- a/src/amd/compiler/aco_spill.cpp
+++ b/src/amd/compiler/aco_spill.cpp
@@ -259,7 +259,7 @@ next_uses_per_block(spill_ctx& ctx, unsigned block_idx, uint32_t& worklist)
}
uint32_t distance = pair.second.second;
uint32_t dom = pair.second.first;
- std::vector<unsigned>& preds = temp.is_linear() ? block->linear_preds : block->logical_preds;
+ Block::edge_vec& preds = temp.is_linear() ? block->linear_preds : block->logical_preds;
for (unsigned pred_idx : preds) {
if (ctx.program->blocks[pred_idx].loop_nest_depth > block->loop_nest_depth)
distance += 0xFFFF;
@@ -664,8 +664,7 @@ init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx)
/* keep variables spilled on all incoming paths */
for (const std::pair<const Temp, std::pair<uint32_t, uint32_t>>& pair : next_use_distances) {
- std::vector<unsigned>& preds =
- pair.first.is_linear() ? block->linear_preds : block->logical_preds;
+ Block::edge_vec& preds = pair.first.is_linear() ? block->linear_preds : block->logical_preds;
/* If it can be rematerialized, keep the variable spilled if all predecessors do not reload
* it. Otherwise, if any predecessor reloads it, ensure it's reloaded on all other
* predecessors. The idea is that it's better in practice to rematerialize redundantly than to
@@ -708,7 +707,7 @@ init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx)
if (!phi->definitions[0].isTemp())
continue;
- std::vector<unsigned>& preds =
+ Block::edge_vec& preds =
phi->opcode == aco_opcode::p_phi ? block->logical_preds : block->linear_preds;
bool is_all_spilled = true;
for (unsigned i = 0; i < phi->operands.size(); i++) {
@@ -877,7 +876,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx)
continue;
}
- std::vector<unsigned>& preds =
+ Block::edge_vec& preds =
phi->opcode == aco_opcode::p_phi ? block->logical_preds : block->linear_preds;
uint32_t def_spill_id = ctx.spills_entry[block_idx][phi->definitions[0].getTemp()];
@@ -945,8 +944,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx)
/* iterate all (other) spilled variables for which to spill at the predecessor */
// TODO: would be better to have them sorted: first vgprs and first with longest distance
for (std::pair<Temp, uint32_t> pair : ctx.spills_entry[block_idx]) {
- std::vector<unsigned> preds =
- pair.first.is_linear() ? block->linear_preds : block->logical_preds;
+ Block::edge_vec& preds = pair.first.is_linear() ? block->linear_preds : block->logical_preds;
for (unsigned pred_idx : preds) {
/* variable is already spilled at predecessor */
@@ -1000,7 +998,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx)
assert(!phi->definitions[0].isTemp() ||
!ctx.spills_entry[block_idx].count(phi->definitions[0].getTemp()));
- std::vector<unsigned>& preds =
+ Block::edge_vec& preds =
phi->opcode == aco_opcode::p_phi ? block->logical_preds : block->linear_preds;
for (unsigned i = 0; i < phi->operands.size(); i++) {
if (!phi->operands[i].isTemp())
@@ -1060,8 +1058,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx)
/* skip spilled variables */
if (ctx.spills_entry[block_idx].count(pair.first))
continue;
- std::vector<unsigned> preds =
- pair.first.is_linear() ? block->linear_preds : block->logical_preds;
+ Block::edge_vec& preds = pair.first.is_linear() ? block->linear_preds : block->logical_preds;
/* variable is dead at predecessor, it must be from a phi */
bool is_dead = false;
diff --git a/src/amd/compiler/aco_ssa_elimination.cpp b/src/amd/compiler/aco_ssa_elimination.cpp
index 477b59ba6be..43ccb5c250e 100644
--- a/src/amd/compiler/aco_ssa_elimination.cpp
+++ b/src/amd/compiler/aco_ssa_elimination.cpp
@@ -68,7 +68,7 @@ collect_phi_info(ssa_elimination_ctx& ctx)
assert(phi->definitions[0].size() == phi->operands[i].size());
- std::vector<unsigned>& preds =
+ Block::edge_vec& preds =
phi->opcode == aco_opcode::p_phi ? block.logical_preds : block.linear_preds;
uint32_t pred_idx = preds[i];
auto& info_vec = phi->opcode == aco_opcode::p_phi ? ctx.logical_phi_info[pred_idx]