diff options
author | Wim Taymans <wtaymans@redhat.com> | 2014-12-09 09:21:58 +0100 |
---|---|---|
committer | Wim Taymans <wtaymans@redhat.com> | 2014-12-09 09:21:58 +0100 |
commit | 4221fa97a594cdad4c704e9afe0e9fb6dac10baf (patch) | |
tree | 874981e34450963bc9df6bf4f26616f6df287ac7 | |
parent | f1cfa5bba9824374d769e312381d8f5d85a417bc (diff) |
add mulas4wlmulas
-rw-r--r-- | orc/orcbytecodes.h | 3 | ||||
-rw-r--r-- | orc/orcemulateopcodes.c | 37 | ||||
-rw-r--r-- | orc/orcemulateopcodes.h | 1 | ||||
-rw-r--r-- | orc/orcopcodes.c | 2 | ||||
-rw-r--r-- | orc/orcprogram-c.c | 20 | ||||
-rw-r--r-- | orc/orcrules-sse.c | 22 |
6 files changed, 84 insertions, 1 deletions
diff --git a/orc/orcbytecodes.h b/orc/orcbytecodes.h index cea1ff4..6eb9a6a 100644 --- a/orc/orcbytecodes.h +++ b/orc/orcbytecodes.h @@ -250,6 +250,7 @@ typedef enum { ORC_BC_convld, ORC_BC_convfd, ORC_BC_convdf, - /* 226 */ + ORC_BC_mulas4wl, + /* 227 */ ORC_BC_LAST } OrcBytecodes; diff --git a/orc/orcemulateopcodes.c b/orc/orcemulateopcodes.c index e80fce2..67566c0 100644 --- a/orc/orcemulateopcodes.c +++ b/orc/orcemulateopcodes.c @@ -5371,3 +5371,40 @@ emulate_convdf (OrcOpcodeExecutor *ex, int offset, int n) } +void +emulate_mulas4wl (OrcOpcodeExecutor *ex, int offset, int n) +{ + int i; + orc_union32 * ORC_RESTRICT ptr0; + const orc_union64 * ORC_RESTRICT ptr4; + const orc_union64 * ORC_RESTRICT ptr5; + orc_union64 var32; + orc_union64 var33; + orc_union32 var34; + + ptr0 = (orc_union32 *)ex->dest_ptrs[0]; + ptr4 = (orc_union64 *)ex->src_ptrs[0]; + ptr5 = (orc_union64 *)ex->src_ptrs[1]; + + + for (i = 0; i < n; i++) { + /* 0: loadq */ + var32 = ptr4[i]; + /* 1: loadq */ + var33 = ptr5[i]; + /* 2: mulas4wl */ + { + orc_union64 _src1, _src2; + _src1.i = var32.i; + _src2.i = var33.i; + var34.i = (_src1.x4[0] * _src2.x4[0]) + + (_src1.x4[1] * _src2.x4[1]) + + (_src1.x4[2] * _src2.x4[2]) + + (_src1.x4[3] * _src2.x4[3]); + } + /* 3: storel */ + ptr0[i] = var34; + } + +} + diff --git a/orc/orcemulateopcodes.h b/orc/orcemulateopcodes.h index b02050e..2458d06 100644 --- a/orc/orcemulateopcodes.h +++ b/orc/orcemulateopcodes.h @@ -198,6 +198,7 @@ void emulate_convdl (OrcOpcodeExecutor *ex, int i, int n); void emulate_convld (OrcOpcodeExecutor *ex, int i, int n); void emulate_convfd (OrcOpcodeExecutor *ex, int i, int n); void emulate_convdf (OrcOpcodeExecutor *ex, int i, int n); +void emulate_mulas4wl (OrcOpcodeExecutor *ex, int i, int n); #endif diff --git a/orc/orcopcodes.c b/orc/orcopcodes.c index 820f0c9..7710e25 100644 --- a/orc/orcopcodes.c +++ b/orc/orcopcodes.c @@ -499,6 +499,8 @@ static OrcStaticOpcode opcodes[] = { { "convfd", ORC_STATIC_OPCODE_FLOAT, { 8 }, { 4 }, emulate_convfd }, { "convdf", ORC_STATIC_OPCODE_FLOAT, { 4 }, { 8 }, emulate_convdf }, + { "mulas4wl", 0, { 4 }, { 8, 8 }, emulate_mulas4wl }, + { "" } }; diff --git a/orc/orcprogram-c.c b/orc/orcprogram-c.c index bff075a..0798463 100644 --- a/orc/orcprogram-c.c +++ b/orc/orcprogram-c.c @@ -1478,6 +1478,25 @@ c_rule_mergebw (OrcCompiler *p, void *user, OrcInstruction *insn) ORC_ASM_CODE(p, " }\n"); } +static void +c_rule_mulas4wl (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + char dest[40], src1[40], src2[40]; + + c_get_name_int (dest, p, insn, insn->dest_args[0]); + c_get_name_int (src1, p, insn, insn->src_args[0]); + c_get_name_int (src2, p, insn, insn->src_args[1]); + + ORC_ASM_CODE(p, " {\n"); + ORC_ASM_CODE(p," orc_union64 _src1, _src2;\n"); + ORC_ASM_CODE(p," _src1.i = %s;\n", src1); + ORC_ASM_CODE(p," _src2.i = %s;\n", src2); + ORC_ASM_CODE(p," %s = (_src1.x4[0] * _src2.x4[0]) +\n", dest); + ORC_ASM_CODE(p," (_src1.x4[1] * _src2.x4[1]) +\n"); + ORC_ASM_CODE(p," (_src1.x4[2] * _src2.x4[2]) +\n"); + ORC_ASM_CODE(p," (_src1.x4[3] * _src2.x4[3]);\n"); + ORC_ASM_CODE(p, " }\n"); +} static OrcTarget c_target = { "c", @@ -1599,5 +1618,6 @@ orc_c_init (void) orc_rule_register (rule_set, "mergebw", c_rule_mergebw, NULL); orc_rule_register (rule_set, "mergewl", c_rule_mergewl, NULL); orc_rule_register (rule_set, "mergelq", c_rule_mergelq, NULL); + orc_rule_register (rule_set, "mulas4wl", c_rule_mulas4wl, NULL); } diff --git a/orc/orcrules-sse.c b/orc/orcrules-sse.c index c2c13d5..25d439b 100644 --- a/orc/orcrules-sse.c +++ b/orc/orcrules-sse.c @@ -1244,6 +1244,26 @@ sse_rule_splatw3q (OrcCompiler *p, void *user, OrcInstruction *insn) } static void +sse_rule_mulas4wl (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + int dest = p->vars[insn->dest_args[0]].alloc; + int src = p->vars[insn->src_args[1]].alloc; + +#ifndef MMX + orc_sse_emit_pmaddwd (p, src, dest); + orc_sse_emit_phaddd (p, dest, dest); +#else + { + int tmp = orc_compiler_get_temp_reg (p); + orc_mmx_emit_pmaddwd (p, src, dest); + orc_mmx_emit_movq (p, src, tmp); + orc_mmx_emit_psrlq_imm (compiler, 32, tmp); + orc_mmx_emit_paddd (p, tmp, dest); + } +#endif +} + +static void sse_rule_splatbw (OrcCompiler *p, void *user, OrcInstruction *insn) { int dest = p->vars[insn->dest_args[0]].alloc; @@ -2958,6 +2978,8 @@ orc_compiler_sse_register_rules (OrcTarget *target) orc_rule_register (rule_set, "div255w", sse_rule_div255w, NULL); orc_rule_register (rule_set, "divluw", sse_rule_divluw, NULL); + orc_rule_register (rule_set, "mulas4wl", sse_rule_mulas4wl, NULL); + /* SSE 3 -- no rules */ /* SSSE 3 */ |