diff options
author | David Schleef <ds@schleef.org> | 2010-09-08 12:28:10 -0700 |
---|---|---|
committer | David Schleef <ds@schleef.org> | 2010-09-08 12:28:10 -0700 |
commit | 8e1f83c4cbf088855a777eb0d59e1351042b5493 (patch) | |
tree | a7cc6581f80855cf5a476052b83cd202365648f0 | |
parent | 15a8a11ff898f35b4be3dd9f8779e00c665c90dd (diff) |
sse: Reenable rules that use pshufb
-rw-r--r-- | orc/orcrules-sse.c | 64 |
1 files changed, 29 insertions, 35 deletions
diff --git a/orc/orcrules-sse.c b/orc/orcrules-sse.c index 2b69b2d..c66511f 100644 --- a/orc/orcrules-sse.c +++ b/orc/orcrules-sse.c @@ -1500,45 +1500,38 @@ sse_rule_swapq (OrcCompiler *p, void *user, OrcInstruction *insn) orc_sse_emit_por (p, tmp, dest); } -#define LOAD_MASK_IS_SLOW -#ifndef LOAD_MASK_IS_SLOW +#ifndef MMX static void -sse_emit_load_mask (OrcCompiler *p, unsigned int mask1, unsigned int mask2) +sse_rule_swapw_ssse3 (OrcCompiler *p, void *user, OrcInstruction *insn) { - int tmp = orc_compiler_get_temp_reg (p); - int gptmp = p->gp_tmpreg; - int tmp2 = orc_compiler_get_temp_reg (p); + int dest = p->vars[insn->dest_args[0]].alloc; + int tmp; - orc_x86_emit_mov_imm_reg (p, 4, mask1, gptmp); - orc_x86_emit_mov_reg_sse (p, gptmp, tmp); - orc_sse_emit_pshufd (p, 0, tmp, tmp); - orc_x86_emit_mov_imm_reg (p, 4, mask2, gptmp); - orc_x86_emit_mov_reg_sse (p, gptmp, tmp2); - orc_sse_emit_punpcklbw (p, tmp2, tmp2); - orc_sse_emit_punpcklwd (p, tmp2, tmp2); - orc_sse_emit_paddb (p, tmp2, tmp); + tmp = orc_compiler_get_constant_long (p, + 0x02030001, 0x06070405, 0x0a0b0809, 0x0e0f0c0d); + orc_sse_emit_pshufb (p, tmp, dest); } static void -sse_rule_swapw_ssse3 (OrcCompiler *p, void *user, OrcInstruction *insn) +sse_rule_swapl_ssse3 (OrcCompiler *p, void *user, OrcInstruction *insn) { - int src = p->vars[insn->src_args[0]].alloc; int dest = p->vars[insn->dest_args[0]].alloc; - int tmp = orc_compiler_get_temp_reg (p); + int tmp; - sse_emit_load_mask (p, 0x02030001, 0x0c080400); + tmp = orc_compiler_get_constant_long (p, + 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f); orc_sse_emit_pshufb (p, tmp, dest); } static void -sse_rule_swapl_ssse3 (OrcCompiler *p, void *user, OrcInstruction *insn) +sse_rule_swapq_ssse3 (OrcCompiler *p, void *user, OrcInstruction *insn) { - int src = p->vars[insn->src_args[0]].alloc; int dest = p->vars[insn->dest_args[0]].alloc; - int tmp = orc_compiler_get_temp_reg (p); + int tmp; - sse_emit_load_mask (p, 0x00010203, 0x0c080400); + tmp = orc_compiler_get_constant_long (p, + 0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b); orc_sse_emit_pshufb (p, tmp, dest); } @@ -1546,11 +1539,11 @@ sse_rule_swapl_ssse3 (OrcCompiler *p, void *user, OrcInstruction *insn) static void sse_rule_select0lw_ssse3 (OrcCompiler *p, void *user, OrcInstruction *insn) { - int src = p->vars[insn->src_args[0]].alloc; int dest = p->vars[insn->dest_args[0]].alloc; - int tmp = orc_compiler_get_temp_reg (p); + int tmp; - sse_emit_load_mask (p, 0x05040100, 0x08000800); + tmp = orc_compiler_get_constant_long (p, + 0x05040100, 0x0d0c0908, 0x05040100, 0x0d0c0908); orc_sse_emit_pshufb (p, tmp, dest); } @@ -1558,11 +1551,11 @@ sse_rule_select0lw_ssse3 (OrcCompiler *p, void *user, OrcInstruction *insn) static void sse_rule_select1lw_ssse3 (OrcCompiler *p, void *user, OrcInstruction *insn) { - int src = p->vars[insn->src_args[0]].alloc; int dest = p->vars[insn->dest_args[0]].alloc; - int tmp = orc_compiler_get_temp_reg (p); + int tmp; - sse_emit_load_mask (p, 0x07060302, 0x08000800); + tmp = orc_compiler_get_constant_long (p, + 0x07060302, 0x0f0e0b0a, 0x07060302, 0x0f0e0b0a); orc_sse_emit_pshufb (p, tmp, dest); } @@ -1570,11 +1563,11 @@ sse_rule_select1lw_ssse3 (OrcCompiler *p, void *user, OrcInstruction *insn) static void sse_rule_select0wb_ssse3 (OrcCompiler *p, void *user, OrcInstruction *insn) { - int src = p->vars[insn->src_args[0]].alloc; int dest = p->vars[insn->dest_args[0]].alloc; - int tmp = orc_compiler_get_temp_reg (p); + int tmp; - sse_emit_load_mask (p, 0x06040200, 0x08000800); + tmp = orc_compiler_get_constant_long (p, + 0x06040200, 0x0e0c0a08, 0x06040200, 0x0e0c0a08); orc_sse_emit_pshufb (p, tmp, dest); } @@ -1582,11 +1575,11 @@ sse_rule_select0wb_ssse3 (OrcCompiler *p, void *user, OrcInstruction *insn) static void sse_rule_select1wb_ssse3 (OrcCompiler *p, void *user, OrcInstruction *insn) { - int src = p->vars[insn->src_args[0]].alloc; int dest = p->vars[insn->dest_args[0]].alloc; - int tmp = orc_compiler_get_temp_reg (p); + int tmp; - sse_emit_load_mask (p, 0x07050301, 0x08000800); + tmp = orc_compiler_get_constant_long (p, + 0x07050301, 0x0f0d0b09, 0x07050301, 0x0f0d0b09); orc_sse_emit_pshufb (p, tmp, dest); } @@ -2468,9 +2461,10 @@ orc_compiler_sse_register_rules (OrcTarget *target) REG(absb); REG(absw); REG(absl); -#ifndef LOAD_MASK_IS_SLOW +#ifndef MMX orc_rule_register (rule_set, "swapw", sse_rule_swapw_ssse3, NULL); orc_rule_register (rule_set, "swapl", sse_rule_swapl_ssse3, NULL); + orc_rule_register (rule_set, "swapq", sse_rule_swapq_ssse3, NULL); orc_rule_register (rule_set, "select0lw", sse_rule_select0lw_ssse3, NULL); orc_rule_register (rule_set, "select1lw", sse_rule_select1lw_ssse3, NULL); orc_rule_register (rule_set, "select0wb", sse_rule_select0wb_ssse3, NULL); |