summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJorge Zapata <jorgeluis.zapata@gmail.com>2024-01-30 13:43:36 +0100
committerJorge Zapata <jorgeluis.zapata@gmail.com>2024-03-12 10:03:58 +0100
commitad63c197a8990efdff2338447dd6d6012c2f7046 (patch)
tree0f1eda920261104a78776abd24843f4ce0645a83
parente92ad6d5c0fd699e8d39d3756e5fee09626991ef (diff)
Port the AVX instruction reordering to X86
Part-of: <https://gitlab.freedesktop.org/gstreamer/orc/-/merge_requests/148>
-rw-r--r--orc/orcprogram-avx.c148
-rw-r--r--orc/orcprogram-x86.c151
2 files changed, 149 insertions, 150 deletions
diff --git a/orc/orcprogram-avx.c b/orc/orcprogram-avx.c
index fe64b6a..90f0e31 100644
--- a/orc/orcprogram-avx.c
+++ b/orc/orcprogram-avx.c
@@ -421,151 +421,3 @@ orc_avx_init (void)
t = orc_x86_register_target (&target);
orc_compiler_avx_register_rules (t);
}
-
-/*
- * The following code was ported from the MIPS backend,
- * and extended to allow for store reordering and the
- * multi-operand VEX syntax.
- */
-static int
-uses_in_destination_register (const OrcCompiler *const compiler,
- const OrcInstruction *const insn,
- int reg)
-{
- for (int i=0; i<ORC_STATIC_OPCODE_N_DEST; i++) {
- const OrcVariable *const var = compiler->vars + insn->dest_args[i];
- if (var->alloc == reg || var->ptr_register == reg)
- return TRUE;
- }
-
- return FALSE;
-}
-
-static int uses_in_source_register(const OrcCompiler *const compiler,
- const OrcInstruction *const insn,
- int reg) {
- for (int i=0; i<ORC_STATIC_OPCODE_N_SRC; i++) {
- const OrcVariable *const var = compiler->vars + insn->src_args[i];
- if (var->alloc == reg || var->ptr_register == reg)
- return TRUE;
- }
-
- return FALSE;
-}
-
-static void
-do_swap (int *tab, int i, int j)
-{
- int tmp = tab[i];
- tab[i] = tab[j];
- tab[j] = tmp;
-}
-
-/* Assumes that the instruction at indexes[i] is a load instruction */
-static int
-can_raise (const OrcCompiler *const compiler, const int *const indexes, int i)
-{
- if (i==0)
- return FALSE;
-
- const OrcInstruction *const insn = compiler->insns + indexes[i];
- const OrcInstruction *const previous_insn = compiler->insns + indexes[i-1];
-
- /* Register where the load operation will put the data */
- const int reg = compiler->vars[insn->dest_args[0]].alloc;
- if (uses_in_source_register(compiler, previous_insn, reg) || uses_in_destination_register(compiler, previous_insn, reg)) {
- return FALSE;
- }
-
- for (int j = 0; j < ORC_STATIC_OPCODE_N_SRC; j++) {
- const OrcVariable *const var = compiler->vars + insn->src_args[j];
- // If the previous instruction touches anything RIP
- if (uses_in_destination_register(compiler, previous_insn, var->alloc) || uses_in_destination_register(compiler, previous_insn, var->ptr_register))
- return FALSE;
- }
-
- return TRUE;
-}
-
-/* Recursive. */
-static void
-try_raise (OrcCompiler *compiler, int *indexes, int i)
-{
- if (can_raise (compiler, indexes, i)) {
- do_swap (indexes, i-1, i);
- try_raise (compiler, indexes, i-1);
- }
-}
-
-
-/* Assumes that the instruction at indexes[i] is a load instruction */
-static int
-can_lower (const OrcCompiler *const compiler, const int *const indexes, int i)
-{
- if (i >= compiler->n_insns - 1)
- return FALSE;
-
- const OrcInstruction *const insn = compiler->insns + indexes[i];
- const OrcInstruction *const next_insn = compiler->insns + indexes[i+1];
-
- /* Register where the store operation will put the data */
- const int reg = compiler->vars[insn->dest_args[0]].ptr_register;
- if (uses_in_source_register(compiler, next_insn, reg)) {
- return FALSE;
- }
-
- for (int j = 0; j < ORC_STATIC_OPCODE_N_SRC; j++) {
- const OrcVariable *const var = compiler->vars + insn->src_args[j];
- // If the next instruction touches anything RIP
- if (uses_in_destination_register(compiler, next_insn, var->alloc) || uses_in_destination_register(compiler, next_insn, var->ptr_register))
- return FALSE;
- }
-
- return TRUE;
-}
-
-static void
-try_lower (OrcCompiler *compiler, int *indexes, int i)
-{
- if (can_lower (compiler, indexes, i)) {
- do_swap (indexes, i-1, i);
- try_lower (compiler, indexes, i+1);
- }
-}
-
-/*
- Do a kind of bubble sort, though it might not exactly be a sort. It only
- moves load instructions up until they reach an operation above which they
- cannot go.
-
- FIXME: also push store instructions down.
- */
-static void
-optimise_order (OrcCompiler *compiler, int *const indexes)
-{
- for (int i=0; i<compiler->n_insns; i++) {
- const OrcInstruction *const insn = compiler->insns + indexes[i];
- if (insn->opcode->flags & ORC_STATIC_OPCODE_LOAD) {
- try_raise(compiler, indexes, i);
- }
- else if (insn->opcode->flags & ORC_STATIC_OPCODE_STORE) {
- try_lower(compiler, indexes, i);
- }
- }
-}
-
-static int *
-get_optimised_instruction_order (OrcCompiler *compiler)
-{
- if (compiler->n_insns == 0)
- return NULL;
-
- int *const instruction_idx = malloc (compiler->n_insns * sizeof(int));
- for (int i=0; i<compiler->n_insns; i++)
- instruction_idx[i] = i;
-
- optimise_order (compiler, instruction_idx);
-
- return instruction_idx;
-}
-
diff --git a/orc/orcprogram-x86.c b/orc/orcprogram-x86.c
index aa2baef..30c0289 100644
--- a/orc/orcprogram-x86.c
+++ b/orc/orcprogram-x86.c
@@ -66,7 +66,8 @@ orc_x86_compiler_max_loop_shift (OrcX86Target *t, OrcCompiler *c)
c->loop_shift = i;
}
-void orc_x86_compiler_init (OrcCompiler *c)
+static void
+orc_x86_compiler_init (OrcCompiler *c)
{
OrcX86Target *t;
int i;
@@ -539,6 +540,151 @@ orc_x86_emit_split_2_regions (OrcX86Target *t, OrcCompiler *compiler)
(int)ORC_STRUCT_OFFSET (OrcExecutor, counter3), compiler->exec_reg);
}
+/*
+ * The following code was ported from the MIPS backend,
+ * and extended to allow for store reordering and the
+ * multi-operand VEX syntax.
+ */
+static int
+uses_in_destination_register (const OrcCompiler *const compiler,
+ const OrcInstruction *const insn,
+ int reg)
+{
+ for (int i=0; i<ORC_STATIC_OPCODE_N_DEST; i++) {
+ const OrcVariable *const var = compiler->vars + insn->dest_args[i];
+ if (var->alloc == reg || var->ptr_register == reg)
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static int uses_in_source_register(const OrcCompiler *const compiler,
+ const OrcInstruction *const insn,
+ int reg) {
+ for (int i=0; i<ORC_STATIC_OPCODE_N_SRC; i++) {
+ const OrcVariable *const var = compiler->vars + insn->src_args[i];
+ if (var->alloc == reg || var->ptr_register == reg)
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static void
+do_swap (int *tab, int i, int j)
+{
+ int tmp = tab[i];
+ tab[i] = tab[j];
+ tab[j] = tmp;
+}
+
+/* Assumes that the instruction at indexes[i] is a load instruction */
+static int
+can_raise (const OrcCompiler *const compiler, const int *const indexes, int i)
+{
+ if (i==0)
+ return FALSE;
+
+ const OrcInstruction *const insn = compiler->insns + indexes[i];
+ const OrcInstruction *const previous_insn = compiler->insns + indexes[i-1];
+
+ /* Register where the load operation will put the data */
+ const int reg = compiler->vars[insn->dest_args[0]].alloc;
+ if (uses_in_source_register(compiler, previous_insn, reg) || uses_in_destination_register(compiler, previous_insn, reg)) {
+ return FALSE;
+ }
+
+ for (int j = 0; j < ORC_STATIC_OPCODE_N_SRC; j++) {
+ const OrcVariable *const var = compiler->vars + insn->src_args[j];
+ // If the previous instruction touches anything RIP
+ if (uses_in_destination_register(compiler, previous_insn, var->alloc) || uses_in_destination_register(compiler, previous_insn, var->ptr_register))
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+/* Recursive. */
+static void
+try_raise (OrcCompiler *compiler, int *indexes, int i)
+{
+ if (can_raise (compiler, indexes, i)) {
+ do_swap (indexes, i-1, i);
+ try_raise (compiler, indexes, i-1);
+ }
+}
+
+
+/* Assumes that the instruction at indexes[i] is a load instruction */
+static int
+can_lower (const OrcCompiler *const compiler, const int *const indexes, int i)
+{
+ if (i >= compiler->n_insns - 1)
+ return FALSE;
+
+ const OrcInstruction *const insn = compiler->insns + indexes[i];
+ const OrcInstruction *const next_insn = compiler->insns + indexes[i+1];
+
+ /* Register where the store operation will put the data */
+ const int reg = compiler->vars[insn->dest_args[0]].ptr_register;
+ if (uses_in_source_register(compiler, next_insn, reg)) {
+ return FALSE;
+ }
+
+ for (int j = 0; j < ORC_STATIC_OPCODE_N_SRC; j++) {
+ const OrcVariable *const var = compiler->vars + insn->src_args[j];
+ // If the next instruction touches anything RIP
+ if (uses_in_destination_register(compiler, next_insn, var->alloc) || uses_in_destination_register(compiler, next_insn, var->ptr_register))
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static void
+try_lower (OrcCompiler *compiler, int *indexes, int i)
+{
+ if (can_lower (compiler, indexes, i)) {
+ do_swap (indexes, i-1, i);
+ try_lower (compiler, indexes, i+1);
+ }
+}
+
+/*
+ * Do a kind of bubble sort, though it might not exactly be a sort. It only
+ * moves load instructions up until they reach an operation above which they
+ * cannot go.
+ */
+static void
+optimise_order (OrcCompiler *compiler, int *const indexes)
+{
+ for (int i=0; i<compiler->n_insns; i++) {
+ const OrcInstruction *const insn = compiler->insns + indexes[i];
+ if (insn->opcode->flags & ORC_STATIC_OPCODE_LOAD) {
+ try_raise(compiler, indexes, i);
+ }
+ else if (insn->opcode->flags & ORC_STATIC_OPCODE_STORE) {
+ try_lower(compiler, indexes, i);
+ }
+ }
+}
+
+static int *
+get_optimised_instruction_order (OrcCompiler *compiler)
+{
+ if (compiler->n_insns == 0)
+ return NULL;
+
+ int *const instruction_idx = malloc (compiler->n_insns * sizeof(int));
+ for (int i=0; i<compiler->n_insns; i++)
+ instruction_idx[i] = i;
+
+ optimise_order (compiler, instruction_idx);
+
+ return instruction_idx;
+}
+
static void
orc_x86_emit_loop (OrcCompiler *compiler, int offset, int update)
{
@@ -547,9 +693,10 @@ orc_x86_emit_loop (OrcCompiler *compiler, int offset, int update)
OrcRule *rule;
int j;
int k;
+ int *const insn_idx = get_optimised_instruction_order (compiler);
for (j = 0; j < compiler->n_insns; j++) {
- insn = compiler->insns + j;
+ insn = compiler->insns + insn_idx[j];
opcode = insn->opcode;
compiler->insn_index = j;