diff options
author | L. E. Segovia <amy@centricular.com> | 2023-12-26 14:03:18 -0300 |
---|---|---|
committer | L. E. Segovia <amy@centricular.com> | 2023-12-26 14:03:18 -0300 |
commit | 5f84b1482cddce2e52a583b0c42f039b0408fa6f (patch) | |
tree | 47830358e5d6069367447a74484d93672b94f3db | |
parent | ea318e059ae638bf554cf6223134b50f3b6797b7 (diff) |
avx: Fix codegen of psrlq opcode and convql Orc instruction
This fixes volume_orc_process_int32 in GStreamer, also adding
it to the test.orc suite.
Part-of: <https://gitlab.freedesktop.org/gstreamer/orc/-/merge_requests/138>
-rw-r--r-- | orc/orcrules-avx.c | 4 | ||||
-rw-r--r-- | orc/orcx86insn.c | 1 | ||||
-rw-r--r-- | testsuite/test.orc | 9 |
3 files changed, 12 insertions, 2 deletions
diff --git a/orc/orcrules-avx.c b/orc/orcrules-avx.c index 9964a2b..b41a980 100644 --- a/orc/orcrules-avx.c +++ b/orc/orcrules-avx.c @@ -1153,14 +1153,14 @@ avx_rule_convql (OrcCompiler *p, void *user, OrcInstruction *insn) const int zero = orc_compiler_get_temp_constant (p, 4, 0); const int size = p->vars[insn->src_args[0]].size << p->loop_shift; - if (size >= 16) { + if (size >= 32) { orc_avx_emit_pshufd (p, ORC_AVX_SSE_SHUF (2, 0, 2, 0), src, dest); orc_avx_emit_punpcklqdq (p, dest, zero, dest); // same as above orc_avx_emit_permute4x64_imm (p, ORC_AVX_SSE_SHUF (3, 1, 2, 0), dest, dest); } else { orc_avx_sse_emit_pshufd (p, ORC_AVX_SSE_SHUF (2, 0, 2, 0), src, dest); - orc_avx_emit_punpcklqdq (p, dest, zero, dest); + orc_avx_sse_emit_punpcklqdq (p, dest, zero, dest); } } diff --git a/orc/orcx86insn.c b/orc/orcx86insn.c index 5d46dd1..6ec4009 100644 --- a/orc/orcx86insn.c +++ b/orc/orcx86insn.c @@ -1277,6 +1277,7 @@ get_vex_vvvv (OrcCompiler *p, const OrcX86Insn *const xinsn) switch (xinsn->opcode_index) { case ORC_X86_pslldq_imm: case ORC_X86_psrldq_imm: + case ORC_X86_psrlq_imm: case ORC_X86_psrlw_imm: case ORC_X86_psrld_imm: case ORC_X86_psraw_imm: diff --git a/testsuite/test.orc b/testsuite/test.orc index 85892c8..3e9c579 100644 --- a/testsuite/test.orc +++ b/testsuite/test.orc @@ -2797,3 +2797,12 @@ convssswb g, wg mergebw wr, r, g mergewl x, wr, wb x4 addb argb, x, c128 + +.function volume_orc_process_int32 +.dest 4 d1 orc_int32 +.param 4 p1 +.temp 8 t1 + +mulslq t1, d1, p1 +shrsq t1, t1, 27 +convql d1, t1 |