diff options
author | Jorge Zapata <jorgeluis.zapata@gmail.com> | 2024-01-30 13:43:36 +0100 |
---|---|---|
committer | Jorge Zapata <jorgeluis.zapata@gmail.com> | 2024-03-12 10:03:58 +0100 |
commit | ad63c197a8990efdff2338447dd6d6012c2f7046 (patch) | |
tree | 0f1eda920261104a78776abd24843f4ce0645a83 | |
parent | e92ad6d5c0fd699e8d39d3756e5fee09626991ef (diff) |
Port the AVX instruction reordering to X86
Part-of: <https://gitlab.freedesktop.org/gstreamer/orc/-/merge_requests/148>
-rw-r--r-- | orc/orcprogram-avx.c | 148 | ||||
-rw-r--r-- | orc/orcprogram-x86.c | 151 |
2 files changed, 149 insertions, 150 deletions
diff --git a/orc/orcprogram-avx.c b/orc/orcprogram-avx.c index fe64b6a..90f0e31 100644 --- a/orc/orcprogram-avx.c +++ b/orc/orcprogram-avx.c @@ -421,151 +421,3 @@ orc_avx_init (void) t = orc_x86_register_target (&target); orc_compiler_avx_register_rules (t); } - -/* - * The following code was ported from the MIPS backend, - * and extended to allow for store reordering and the - * multi-operand VEX syntax. - */ -static int -uses_in_destination_register (const OrcCompiler *const compiler, - const OrcInstruction *const insn, - int reg) -{ - for (int i=0; i<ORC_STATIC_OPCODE_N_DEST; i++) { - const OrcVariable *const var = compiler->vars + insn->dest_args[i]; - if (var->alloc == reg || var->ptr_register == reg) - return TRUE; - } - - return FALSE; -} - -static int uses_in_source_register(const OrcCompiler *const compiler, - const OrcInstruction *const insn, - int reg) { - for (int i=0; i<ORC_STATIC_OPCODE_N_SRC; i++) { - const OrcVariable *const var = compiler->vars + insn->src_args[i]; - if (var->alloc == reg || var->ptr_register == reg) - return TRUE; - } - - return FALSE; -} - -static void -do_swap (int *tab, int i, int j) -{ - int tmp = tab[i]; - tab[i] = tab[j]; - tab[j] = tmp; -} - -/* Assumes that the instruction at indexes[i] is a load instruction */ -static int -can_raise (const OrcCompiler *const compiler, const int *const indexes, int i) -{ - if (i==0) - return FALSE; - - const OrcInstruction *const insn = compiler->insns + indexes[i]; - const OrcInstruction *const previous_insn = compiler->insns + indexes[i-1]; - - /* Register where the load operation will put the data */ - const int reg = compiler->vars[insn->dest_args[0]].alloc; - if (uses_in_source_register(compiler, previous_insn, reg) || uses_in_destination_register(compiler, previous_insn, reg)) { - return FALSE; - } - - for (int j = 0; j < ORC_STATIC_OPCODE_N_SRC; j++) { - const OrcVariable *const var = compiler->vars + insn->src_args[j]; - // If the previous instruction touches anything RIP - if (uses_in_destination_register(compiler, previous_insn, var->alloc) || uses_in_destination_register(compiler, previous_insn, var->ptr_register)) - return FALSE; - } - - return TRUE; -} - -/* Recursive. */ -static void -try_raise (OrcCompiler *compiler, int *indexes, int i) -{ - if (can_raise (compiler, indexes, i)) { - do_swap (indexes, i-1, i); - try_raise (compiler, indexes, i-1); - } -} - - -/* Assumes that the instruction at indexes[i] is a load instruction */ -static int -can_lower (const OrcCompiler *const compiler, const int *const indexes, int i) -{ - if (i >= compiler->n_insns - 1) - return FALSE; - - const OrcInstruction *const insn = compiler->insns + indexes[i]; - const OrcInstruction *const next_insn = compiler->insns + indexes[i+1]; - - /* Register where the store operation will put the data */ - const int reg = compiler->vars[insn->dest_args[0]].ptr_register; - if (uses_in_source_register(compiler, next_insn, reg)) { - return FALSE; - } - - for (int j = 0; j < ORC_STATIC_OPCODE_N_SRC; j++) { - const OrcVariable *const var = compiler->vars + insn->src_args[j]; - // If the next instruction touches anything RIP - if (uses_in_destination_register(compiler, next_insn, var->alloc) || uses_in_destination_register(compiler, next_insn, var->ptr_register)) - return FALSE; - } - - return TRUE; -} - -static void -try_lower (OrcCompiler *compiler, int *indexes, int i) -{ - if (can_lower (compiler, indexes, i)) { - do_swap (indexes, i-1, i); - try_lower (compiler, indexes, i+1); - } -} - -/* - Do a kind of bubble sort, though it might not exactly be a sort. It only - moves load instructions up until they reach an operation above which they - cannot go. - - FIXME: also push store instructions down. - */ -static void -optimise_order (OrcCompiler *compiler, int *const indexes) -{ - for (int i=0; i<compiler->n_insns; i++) { - const OrcInstruction *const insn = compiler->insns + indexes[i]; - if (insn->opcode->flags & ORC_STATIC_OPCODE_LOAD) { - try_raise(compiler, indexes, i); - } - else if (insn->opcode->flags & ORC_STATIC_OPCODE_STORE) { - try_lower(compiler, indexes, i); - } - } -} - -static int * -get_optimised_instruction_order (OrcCompiler *compiler) -{ - if (compiler->n_insns == 0) - return NULL; - - int *const instruction_idx = malloc (compiler->n_insns * sizeof(int)); - for (int i=0; i<compiler->n_insns; i++) - instruction_idx[i] = i; - - optimise_order (compiler, instruction_idx); - - return instruction_idx; -} - diff --git a/orc/orcprogram-x86.c b/orc/orcprogram-x86.c index aa2baef..30c0289 100644 --- a/orc/orcprogram-x86.c +++ b/orc/orcprogram-x86.c @@ -66,7 +66,8 @@ orc_x86_compiler_max_loop_shift (OrcX86Target *t, OrcCompiler *c) c->loop_shift = i; } -void orc_x86_compiler_init (OrcCompiler *c) +static void +orc_x86_compiler_init (OrcCompiler *c) { OrcX86Target *t; int i; @@ -539,6 +540,151 @@ orc_x86_emit_split_2_regions (OrcX86Target *t, OrcCompiler *compiler) (int)ORC_STRUCT_OFFSET (OrcExecutor, counter3), compiler->exec_reg); } +/* + * The following code was ported from the MIPS backend, + * and extended to allow for store reordering and the + * multi-operand VEX syntax. + */ +static int +uses_in_destination_register (const OrcCompiler *const compiler, + const OrcInstruction *const insn, + int reg) +{ + for (int i=0; i<ORC_STATIC_OPCODE_N_DEST; i++) { + const OrcVariable *const var = compiler->vars + insn->dest_args[i]; + if (var->alloc == reg || var->ptr_register == reg) + return TRUE; + } + + return FALSE; +} + +static int uses_in_source_register(const OrcCompiler *const compiler, + const OrcInstruction *const insn, + int reg) { + for (int i=0; i<ORC_STATIC_OPCODE_N_SRC; i++) { + const OrcVariable *const var = compiler->vars + insn->src_args[i]; + if (var->alloc == reg || var->ptr_register == reg) + return TRUE; + } + + return FALSE; +} + +static void +do_swap (int *tab, int i, int j) +{ + int tmp = tab[i]; + tab[i] = tab[j]; + tab[j] = tmp; +} + +/* Assumes that the instruction at indexes[i] is a load instruction */ +static int +can_raise (const OrcCompiler *const compiler, const int *const indexes, int i) +{ + if (i==0) + return FALSE; + + const OrcInstruction *const insn = compiler->insns + indexes[i]; + const OrcInstruction *const previous_insn = compiler->insns + indexes[i-1]; + + /* Register where the load operation will put the data */ + const int reg = compiler->vars[insn->dest_args[0]].alloc; + if (uses_in_source_register(compiler, previous_insn, reg) || uses_in_destination_register(compiler, previous_insn, reg)) { + return FALSE; + } + + for (int j = 0; j < ORC_STATIC_OPCODE_N_SRC; j++) { + const OrcVariable *const var = compiler->vars + insn->src_args[j]; + // If the previous instruction touches anything RIP + if (uses_in_destination_register(compiler, previous_insn, var->alloc) || uses_in_destination_register(compiler, previous_insn, var->ptr_register)) + return FALSE; + } + + return TRUE; +} + +/* Recursive. */ +static void +try_raise (OrcCompiler *compiler, int *indexes, int i) +{ + if (can_raise (compiler, indexes, i)) { + do_swap (indexes, i-1, i); + try_raise (compiler, indexes, i-1); + } +} + + +/* Assumes that the instruction at indexes[i] is a load instruction */ +static int +can_lower (const OrcCompiler *const compiler, const int *const indexes, int i) +{ + if (i >= compiler->n_insns - 1) + return FALSE; + + const OrcInstruction *const insn = compiler->insns + indexes[i]; + const OrcInstruction *const next_insn = compiler->insns + indexes[i+1]; + + /* Register where the store operation will put the data */ + const int reg = compiler->vars[insn->dest_args[0]].ptr_register; + if (uses_in_source_register(compiler, next_insn, reg)) { + return FALSE; + } + + for (int j = 0; j < ORC_STATIC_OPCODE_N_SRC; j++) { + const OrcVariable *const var = compiler->vars + insn->src_args[j]; + // If the next instruction touches anything RIP + if (uses_in_destination_register(compiler, next_insn, var->alloc) || uses_in_destination_register(compiler, next_insn, var->ptr_register)) + return FALSE; + } + + return TRUE; +} + +static void +try_lower (OrcCompiler *compiler, int *indexes, int i) +{ + if (can_lower (compiler, indexes, i)) { + do_swap (indexes, i-1, i); + try_lower (compiler, indexes, i+1); + } +} + +/* + * Do a kind of bubble sort, though it might not exactly be a sort. It only + * moves load instructions up until they reach an operation above which they + * cannot go. + */ +static void +optimise_order (OrcCompiler *compiler, int *const indexes) +{ + for (int i=0; i<compiler->n_insns; i++) { + const OrcInstruction *const insn = compiler->insns + indexes[i]; + if (insn->opcode->flags & ORC_STATIC_OPCODE_LOAD) { + try_raise(compiler, indexes, i); + } + else if (insn->opcode->flags & ORC_STATIC_OPCODE_STORE) { + try_lower(compiler, indexes, i); + } + } +} + +static int * +get_optimised_instruction_order (OrcCompiler *compiler) +{ + if (compiler->n_insns == 0) + return NULL; + + int *const instruction_idx = malloc (compiler->n_insns * sizeof(int)); + for (int i=0; i<compiler->n_insns; i++) + instruction_idx[i] = i; + + optimise_order (compiler, instruction_idx); + + return instruction_idx; +} + static void orc_x86_emit_loop (OrcCompiler *compiler, int offset, int update) { @@ -547,9 +693,10 @@ orc_x86_emit_loop (OrcCompiler *compiler, int offset, int update) OrcRule *rule; int j; int k; + int *const insn_idx = get_optimised_instruction_order (compiler); for (j = 0; j < compiler->n_insns; j++) { - insn = compiler->insns + j; + insn = compiler->insns + insn_idx[j]; opcode = insn->opcode; compiler->insn_index = j; |