summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWim Taymans <wtaymans@redhat.com>2014-12-09 09:21:58 +0100
committerWim Taymans <wtaymans@redhat.com>2014-12-09 09:21:58 +0100
commit4221fa97a594cdad4c704e9afe0e9fb6dac10baf (patch)
tree874981e34450963bc9df6bf4f26616f6df287ac7
parentf1cfa5bba9824374d769e312381d8f5d85a417bc (diff)
add mulas4wlmulas
-rw-r--r--orc/orcbytecodes.h3
-rw-r--r--orc/orcemulateopcodes.c37
-rw-r--r--orc/orcemulateopcodes.h1
-rw-r--r--orc/orcopcodes.c2
-rw-r--r--orc/orcprogram-c.c20
-rw-r--r--orc/orcrules-sse.c22
6 files changed, 84 insertions, 1 deletions
diff --git a/orc/orcbytecodes.h b/orc/orcbytecodes.h
index cea1ff4..6eb9a6a 100644
--- a/orc/orcbytecodes.h
+++ b/orc/orcbytecodes.h
@@ -250,6 +250,7 @@ typedef enum {
ORC_BC_convld,
ORC_BC_convfd,
ORC_BC_convdf,
- /* 226 */
+ ORC_BC_mulas4wl,
+ /* 227 */
ORC_BC_LAST
} OrcBytecodes;
diff --git a/orc/orcemulateopcodes.c b/orc/orcemulateopcodes.c
index e80fce2..67566c0 100644
--- a/orc/orcemulateopcodes.c
+++ b/orc/orcemulateopcodes.c
@@ -5371,3 +5371,40 @@ emulate_convdf (OrcOpcodeExecutor *ex, int offset, int n)
}
+void
+emulate_mulas4wl (OrcOpcodeExecutor *ex, int offset, int n)
+{
+ int i;
+ orc_union32 * ORC_RESTRICT ptr0;
+ const orc_union64 * ORC_RESTRICT ptr4;
+ const orc_union64 * ORC_RESTRICT ptr5;
+ orc_union64 var32;
+ orc_union64 var33;
+ orc_union32 var34;
+
+ ptr0 = (orc_union32 *)ex->dest_ptrs[0];
+ ptr4 = (orc_union64 *)ex->src_ptrs[0];
+ ptr5 = (orc_union64 *)ex->src_ptrs[1];
+
+
+ for (i = 0; i < n; i++) {
+ /* 0: loadq */
+ var32 = ptr4[i];
+ /* 1: loadq */
+ var33 = ptr5[i];
+ /* 2: mulas4wl */
+ {
+ orc_union64 _src1, _src2;
+ _src1.i = var32.i;
+ _src2.i = var33.i;
+ var34.i = (_src1.x4[0] * _src2.x4[0]) +
+ (_src1.x4[1] * _src2.x4[1]) +
+ (_src1.x4[2] * _src2.x4[2]) +
+ (_src1.x4[3] * _src2.x4[3]);
+ }
+ /* 3: storel */
+ ptr0[i] = var34;
+ }
+
+}
+
diff --git a/orc/orcemulateopcodes.h b/orc/orcemulateopcodes.h
index b02050e..2458d06 100644
--- a/orc/orcemulateopcodes.h
+++ b/orc/orcemulateopcodes.h
@@ -198,6 +198,7 @@ void emulate_convdl (OrcOpcodeExecutor *ex, int i, int n);
void emulate_convld (OrcOpcodeExecutor *ex, int i, int n);
void emulate_convfd (OrcOpcodeExecutor *ex, int i, int n);
void emulate_convdf (OrcOpcodeExecutor *ex, int i, int n);
+void emulate_mulas4wl (OrcOpcodeExecutor *ex, int i, int n);
#endif
diff --git a/orc/orcopcodes.c b/orc/orcopcodes.c
index 820f0c9..7710e25 100644
--- a/orc/orcopcodes.c
+++ b/orc/orcopcodes.c
@@ -499,6 +499,8 @@ static OrcStaticOpcode opcodes[] = {
{ "convfd", ORC_STATIC_OPCODE_FLOAT, { 8 }, { 4 }, emulate_convfd },
{ "convdf", ORC_STATIC_OPCODE_FLOAT, { 4 }, { 8 }, emulate_convdf },
+ { "mulas4wl", 0, { 4 }, { 8, 8 }, emulate_mulas4wl },
+
{ "" }
};
diff --git a/orc/orcprogram-c.c b/orc/orcprogram-c.c
index bff075a..0798463 100644
--- a/orc/orcprogram-c.c
+++ b/orc/orcprogram-c.c
@@ -1478,6 +1478,25 @@ c_rule_mergebw (OrcCompiler *p, void *user, OrcInstruction *insn)
ORC_ASM_CODE(p, " }\n");
}
+static void
+c_rule_mulas4wl (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ char dest[40], src1[40], src2[40];
+
+ c_get_name_int (dest, p, insn, insn->dest_args[0]);
+ c_get_name_int (src1, p, insn, insn->src_args[0]);
+ c_get_name_int (src2, p, insn, insn->src_args[1]);
+
+ ORC_ASM_CODE(p, " {\n");
+ ORC_ASM_CODE(p," orc_union64 _src1, _src2;\n");
+ ORC_ASM_CODE(p," _src1.i = %s;\n", src1);
+ ORC_ASM_CODE(p," _src2.i = %s;\n", src2);
+ ORC_ASM_CODE(p," %s = (_src1.x4[0] * _src2.x4[0]) +\n", dest);
+ ORC_ASM_CODE(p," (_src1.x4[1] * _src2.x4[1]) +\n");
+ ORC_ASM_CODE(p," (_src1.x4[2] * _src2.x4[2]) +\n");
+ ORC_ASM_CODE(p," (_src1.x4[3] * _src2.x4[3]);\n");
+ ORC_ASM_CODE(p, " }\n");
+}
static OrcTarget c_target = {
"c",
@@ -1599,5 +1618,6 @@ orc_c_init (void)
orc_rule_register (rule_set, "mergebw", c_rule_mergebw, NULL);
orc_rule_register (rule_set, "mergewl", c_rule_mergewl, NULL);
orc_rule_register (rule_set, "mergelq", c_rule_mergelq, NULL);
+ orc_rule_register (rule_set, "mulas4wl", c_rule_mulas4wl, NULL);
}
diff --git a/orc/orcrules-sse.c b/orc/orcrules-sse.c
index c2c13d5..25d439b 100644
--- a/orc/orcrules-sse.c
+++ b/orc/orcrules-sse.c
@@ -1244,6 +1244,26 @@ sse_rule_splatw3q (OrcCompiler *p, void *user, OrcInstruction *insn)
}
static void
+sse_rule_mulas4wl (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ int dest = p->vars[insn->dest_args[0]].alloc;
+ int src = p->vars[insn->src_args[1]].alloc;
+
+#ifndef MMX
+ orc_sse_emit_pmaddwd (p, src, dest);
+ orc_sse_emit_phaddd (p, dest, dest);
+#else
+ {
+ int tmp = orc_compiler_get_temp_reg (p);
+ orc_mmx_emit_pmaddwd (p, src, dest);
+ orc_mmx_emit_movq (p, src, tmp);
+ orc_mmx_emit_psrlq_imm (compiler, 32, tmp);
+ orc_mmx_emit_paddd (p, tmp, dest);
+ }
+#endif
+}
+
+static void
sse_rule_splatbw (OrcCompiler *p, void *user, OrcInstruction *insn)
{
int dest = p->vars[insn->dest_args[0]].alloc;
@@ -2958,6 +2978,8 @@ orc_compiler_sse_register_rules (OrcTarget *target)
orc_rule_register (rule_set, "div255w", sse_rule_div255w, NULL);
orc_rule_register (rule_set, "divluw", sse_rule_divluw, NULL);
+ orc_rule_register (rule_set, "mulas4wl", sse_rule_mulas4wl, NULL);
+
/* SSE 3 -- no rules */
/* SSSE 3 */