diff options
author | L. E. Segovia <amy@centricular.com> | 2024-01-27 13:49:14 -0300 |
---|---|---|
committer | GStreamer Marge Bot <gitlab-merge-bot@gstreamer-foundation.org> | 2024-02-02 13:53:43 +0000 |
commit | 913211a8bd5ebd86702be010157d5b89b3243123 (patch) | |
tree | 584e273738398bfc05bb35ab6a41c560e56f7c54 | |
parent | 98cc9d23e8c4ed30a5f7e86a619db259f6f209f1 (diff) |
mmx: Fix mulll using the wrong size for the movs
Part-of: <https://gitlab.freedesktop.org/gstreamer/orc/-/merge_requests/158>
-rw-r--r-- | orc/orcrules-mmx.c | 15 | ||||
-rw-r--r-- | orc/orcrules-sse.c | 15 |
2 files changed, 20 insertions, 10 deletions
diff --git a/orc/orcrules-mmx.c b/orc/orcrules-mmx.c index d04dfc4..820bbd6 100644 --- a/orc/orcrules-mmx.c +++ b/orc/orcrules-mmx.c @@ -13,6 +13,11 @@ #include <orc/orcmmx.h> #define MMX 1 +#ifdef MMX +# define ORC_REG_SIZE 8 +#else +# define ORC_REG_SIZE 16 +#endif #define SIZE 65536 /* sse rules */ @@ -1666,21 +1671,21 @@ mmx_rule_mulll_slow (OrcCompiler *p, void *user, OrcInstruction *insn) { const int offset = ORC_STRUCT_OFFSET (OrcExecutor, arrays[ORC_VAR_T1]); - orc_x86_emit_mov_mmx_memoffset (p, 16, p->vars[insn->src_args[0]].alloc, + orc_x86_emit_mov_mmx_memoffset (p, ORC_REG_SIZE, p->vars[insn->src_args[0]].alloc, offset, p->exec_reg, FALSE, FALSE); - orc_x86_emit_mov_mmx_memoffset (p, 16, p->vars[insn->src_args[1]].alloc, - offset + 16, p->exec_reg, FALSE, FALSE); + orc_x86_emit_mov_mmx_memoffset (p, ORC_REG_SIZE, p->vars[insn->src_args[1]].alloc, + offset + ORC_REG_SIZE, p->exec_reg, FALSE, FALSE); for (int i = 0; i < (1 << p->insn_shift); i++) { orc_x86_emit_mov_memoffset_reg (p, 4, offset + 4 * i, p->exec_reg, p->gp_tmpreg); - orc_x86_emit_imul_memoffset_reg (p, 4, offset + 16 + 4 * i, p->exec_reg, + orc_x86_emit_imul_memoffset_reg (p, 4, offset + ORC_REG_SIZE + 4 * i, p->exec_reg, p->gp_tmpreg); orc_x86_emit_mov_reg_memoffset (p, 4, p->gp_tmpreg, offset + 4 * i, p->exec_reg); } - orc_x86_emit_mov_memoffset_mmx (p, 16, offset, p->exec_reg, + orc_x86_emit_mov_memoffset_mmx (p, ORC_REG_SIZE, offset, p->exec_reg, p->vars[insn->dest_args[0]].alloc, FALSE); } diff --git a/orc/orcrules-sse.c b/orc/orcrules-sse.c index 429b31b..9bdc8f3 100644 --- a/orc/orcrules-sse.c +++ b/orc/orcrules-sse.c @@ -13,6 +13,11 @@ #include <orc/orcsse.h> #undef MMX +#ifdef MMX +# define ORC_REG_SIZE 8 +#else +# define ORC_REG_SIZE 16 +#endif #define SIZE 65536 /* sse rules */ @@ -1666,21 +1671,21 @@ sse_rule_mulll_slow (OrcCompiler *p, void *user, OrcInstruction *insn) { const int offset = ORC_STRUCT_OFFSET (OrcExecutor, arrays[ORC_VAR_T1]); - orc_x86_emit_mov_sse_memoffset (p, 16, p->vars[insn->src_args[0]].alloc, + orc_x86_emit_mov_sse_memoffset (p, ORC_REG_SIZE, p->vars[insn->src_args[0]].alloc, offset, p->exec_reg, FALSE, FALSE); - orc_x86_emit_mov_sse_memoffset (p, 16, p->vars[insn->src_args[1]].alloc, - offset + 16, p->exec_reg, FALSE, FALSE); + orc_x86_emit_mov_sse_memoffset (p, ORC_REG_SIZE, p->vars[insn->src_args[1]].alloc, + offset + ORC_REG_SIZE, p->exec_reg, FALSE, FALSE); for (int i = 0; i < (1 << p->insn_shift); i++) { orc_x86_emit_mov_memoffset_reg (p, 4, offset + 4 * i, p->exec_reg, p->gp_tmpreg); - orc_x86_emit_imul_memoffset_reg (p, 4, offset + 16 + 4 * i, p->exec_reg, + orc_x86_emit_imul_memoffset_reg (p, 4, offset + ORC_REG_SIZE + 4 * i, p->exec_reg, p->gp_tmpreg); orc_x86_emit_mov_reg_memoffset (p, 4, p->gp_tmpreg, offset + 4 * i, p->exec_reg); } - orc_x86_emit_mov_memoffset_sse (p, 16, offset, p->exec_reg, + orc_x86_emit_mov_memoffset_sse (p, ORC_REG_SIZE, offset, p->exec_reg, p->vars[insn->dest_args[0]].alloc, FALSE); } |