summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWim Taymans <wtaymans@redhat.com>2014-09-26 10:09:17 +0200
committerWim Taymans <wtaymans@redhat.com>2014-09-26 10:09:17 +0200
commit7186fbaad7b2d5fe62781ae33196e00a35edc03a (patch)
tree3d73b3a0a05abadc29cbb59ad29e2a011e390e6d
parent9bf0679ede472ef9b1a9bc3365d7fd7ef06f1356 (diff)
sse: improve splitXX on sse3
We can implement splitXX with byte shuffles on sse3
-rw-r--r--orc/orcrules-sse.c47
1 files changed, 47 insertions, 0 deletions
diff --git a/orc/orcrules-sse.c b/orc/orcrules-sse.c
index b9a6d3a..ea9a5c6 100644
--- a/orc/orcrules-sse.c
+++ b/orc/orcrules-sse.c
@@ -1958,6 +1958,51 @@ sse_rule_swapq_ssse3 (OrcCompiler *p, void *user, OrcInstruction *insn)
}
static void
+sse_rule_splitlw_ssse3 (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ int src = p->vars[insn->src_args[0]].alloc;
+ int dest1 = p->vars[insn->dest_args[0]].alloc;
+ int dest2 = p->vars[insn->dest_args[1]].alloc;
+ int tmp1, tmp2;
+
+ tmp1 = orc_compiler_try_get_constant_long (p,
+ 0x07060302, 0x0f0e0b0a, 0x07060302, 0x0f0e0b0a);
+ tmp2 = orc_compiler_try_get_constant_long (p,
+ 0x05040100, 0x0d0c0908, 0x05040100, 0x0d0c0908);
+ if (tmp1 != ORC_REG_INVALID && tmp2 != ORC_REG_INVALID) {
+ orc_sse_emit_pshufb (p, tmp1, dest1);
+ if (dest2 != src)
+ orc_sse_emit_movdqa (p, src, dest2);
+ orc_sse_emit_pshufb (p, tmp2, dest2);
+ } else {
+ sse_rule_splitlw (p, user, insn);
+ }
+}
+
+
+static void
+sse_rule_splitwb_ssse3 (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ int src = p->vars[insn->src_args[0]].alloc;
+ int dest1 = p->vars[insn->dest_args[0]].alloc;
+ int dest2 = p->vars[insn->dest_args[1]].alloc;
+ int tmp1, tmp2;
+
+ tmp1 = orc_compiler_try_get_constant_long (p,
+ 0x07050301, 0x0f0d0b09, 0x07050301, 0x0f0d0b09);
+ tmp2 = orc_compiler_try_get_constant_long (p,
+ 0x06040200, 0x0e0c0a08, 0x06040200, 0x0e0c0a08);
+ if (tmp1 != ORC_REG_INVALID && tmp2 != ORC_REG_INVALID) {
+ orc_sse_emit_pshufb (p, tmp1, dest1);
+ if (dest2 != src)
+ orc_sse_emit_movdqa (p, src, dest2);
+ orc_sse_emit_pshufb (p, tmp2, dest2);
+ } else {
+ sse_rule_splitwb (p, user, insn);
+ }
+}
+
+static void
sse_rule_select0lw_ssse3 (OrcCompiler *p, void *user, OrcInstruction *insn)
{
int dest = p->vars[insn->dest_args[0]].alloc;
@@ -2933,6 +2978,8 @@ orc_compiler_sse_register_rules (OrcTarget *target)
orc_rule_register (rule_set, "swapl", sse_rule_swapl_ssse3, NULL);
orc_rule_register (rule_set, "swapwl", sse_rule_swapwl_ssse3, NULL);
orc_rule_register (rule_set, "swapq", sse_rule_swapq_ssse3, NULL);
+ orc_rule_register (rule_set, "splitlw", sse_rule_splitlw_ssse3, NULL);
+ orc_rule_register (rule_set, "splitwb", sse_rule_splitwb_ssse3, NULL);
orc_rule_register (rule_set, "select0lw", sse_rule_select0lw_ssse3, NULL);
orc_rule_register (rule_set, "select1lw", sse_rule_select1lw_ssse3, NULL);
orc_rule_register (rule_set, "select0wb", sse_rule_select0wb_ssse3, NULL);