summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWim Taymans <wtaymans@redhat.com>2014-11-26 16:58:23 +0100
committerWim Taymans <wtaymans@redhat.com>2014-11-26 16:58:23 +0100
commit3ff0aa072b74a9f6e8f67ff63547dde591a845cc (patch)
treecc8256da5c61b4987cb9adc8cf89c2744e7964de
parent85f79a8a4ecf6f22e5c334d6ad0222502e37a5eb (diff)
add loadidx instructionloadidx
-rw-r--r--orc/orcbytecode.c1
-rw-r--r--orc/orcbytecodes.h6
-rw-r--r--orc/orccompiler.c1
-rw-r--r--orc/orcemulateopcodes.c92
-rw-r--r--orc/orcemulateopcodes.h4
-rw-r--r--orc/orcopcodes.c5
-rw-r--r--orc/orcprogram-c.c21
-rw-r--r--orc/orcrules-sse.c72
-rw-r--r--orc/orcx86insn.c2
-rw-r--r--orc/orcx86insn.h4
10 files changed, 207 insertions, 1 deletions
diff --git a/orc/orcbytecode.c b/orc/orcbytecode.c
index 8e7bad4..d780abe 100644
--- a/orc/orcbytecode.c
+++ b/orc/orcbytecode.c
@@ -492,6 +492,7 @@ orc_bytecode_parse_function (OrcProgram *program, const orc_uint8 *bytecode)
insn = program->insns + program->n_insns;
insn->opcode = opcode_set->opcodes + (bc - 32);
+ ORC_DEBUG ("%s", insn->opcode->name);
if (insn->opcode->dest_size[0] != 0) {
insn->dest_args[0] = orc_bytecode_parse_get_int (parse);
}
diff --git a/orc/orcbytecodes.h b/orc/orcbytecodes.h
index cea1ff4..de0355d 100644
--- a/orc/orcbytecodes.h
+++ b/orc/orcbytecodes.h
@@ -250,6 +250,10 @@ typedef enum {
ORC_BC_convld,
ORC_BC_convfd,
ORC_BC_convdf,
- /* 226 */
+ ORC_BC_loadidxb,
+ ORC_BC_loadidxw,
+ ORC_BC_loadidxl,
+ ORC_BC_loadidxq,
+ /* 230 */
ORC_BC_LAST
} OrcBytecodes;
diff --git a/orc/orccompiler.c b/orc/orccompiler.c
index 3618f5b..9f18b29 100644
--- a/orc/orccompiler.c
+++ b/orc/orccompiler.c
@@ -383,6 +383,7 @@ orc_program_compile_full (OrcProgram *program, OrcTarget *target,
if (compiler->output_insns) free (compiler->output_insns);
free (compiler);
ORC_INFO("finished compiling (success)");
+ ORC_INFO("%s", orc_program_get_asm_code (program));
return result;
error:
diff --git a/orc/orcemulateopcodes.c b/orc/orcemulateopcodes.c
index e80fce2..74548df 100644
--- a/orc/orcemulateopcodes.c
+++ b/orc/orcemulateopcodes.c
@@ -5371,3 +5371,95 @@ emulate_convdf (OrcOpcodeExecutor *ex, int offset, int n)
}
+void
+emulate_loadidxb (OrcOpcodeExecutor *ex, int offset, int n)
+{
+ int i;
+ orc_int8 * ORC_RESTRICT ptr0;
+ const orc_int8 * ORC_RESTRICT ptr4;
+ const orc_union32 * ORC_RESTRICT ptr5;
+ orc_int8 var32;
+
+ ptr0 = (orc_int8 *)ex->dest_ptrs[0];
+ ptr4 = (orc_int8 *)ex->src_ptrs[0];
+ ptr5 = (orc_union32 *)ex->src_ptrs[1];
+
+
+ for (i = 0; i < n; i++) {
+ /* 0: loadidxb */
+ var32 = ptr4[ptr5[offset + i].i];
+ /* 1: storeb */
+ ptr0[i] = var32;
+ }
+
+}
+
+void
+emulate_loadidxw (OrcOpcodeExecutor *ex, int offset, int n)
+{
+ int i;
+ orc_union16 * ORC_RESTRICT ptr0;
+ const orc_union16 * ORC_RESTRICT ptr4;
+ const orc_union32 * ORC_RESTRICT ptr5;
+ orc_union16 var32;
+
+ ptr0 = (orc_union16 *)ex->dest_ptrs[0];
+ ptr4 = (orc_union16 *)ex->src_ptrs[0];
+ ptr5 = (orc_union32 *)ex->src_ptrs[1];
+
+
+ for (i = 0; i < n; i++) {
+ /* 0: loadidxw */
+ var32 = ptr4[ptr5[offset + i].i];
+ /* 1: storew */
+ ptr0[i] = var32;
+ }
+
+}
+
+void
+emulate_loadidxl (OrcOpcodeExecutor *ex, int offset, int n)
+{
+ int i;
+ orc_union32 * ORC_RESTRICT ptr0;
+ const orc_union32 * ORC_RESTRICT ptr4;
+ const orc_union32 * ORC_RESTRICT ptr5;
+ orc_union32 var32;
+
+ ptr0 = (orc_union32 *)ex->dest_ptrs[0];
+ ptr4 = (orc_union32 *)ex->src_ptrs[0];
+ ptr5 = (orc_union32 *)ex->src_ptrs[1];
+
+
+ for (i = 0; i < n; i++) {
+ /* 0: loadidxl */
+ var32 = ptr4[ptr5[offset + i].i];
+ /* 1: storel */
+ ptr0[i] = var32;
+ }
+
+}
+
+void
+emulate_loadidxq (OrcOpcodeExecutor *ex, int offset, int n)
+{
+ int i;
+ orc_union64 * ORC_RESTRICT ptr0;
+ const orc_union64 * ORC_RESTRICT ptr4;
+ const orc_union32 * ORC_RESTRICT ptr5;
+ orc_union64 var32;
+
+ ptr0 = (orc_union64 *)ex->dest_ptrs[0];
+ ptr4 = (orc_union64 *)ex->src_ptrs[0];
+ ptr5 = (orc_union32 *)ex->src_ptrs[1];
+
+
+ for (i = 0; i < n; i++) {
+ /* 0: loadidxq */
+ var32 = ptr4[ptr5[offset + i].i];
+ /* 1: storeq */
+ ptr0[i] = var32;
+ }
+
+}
+
diff --git a/orc/orcemulateopcodes.h b/orc/orcemulateopcodes.h
index b02050e..8e19a17 100644
--- a/orc/orcemulateopcodes.h
+++ b/orc/orcemulateopcodes.h
@@ -198,6 +198,10 @@ void emulate_convdl (OrcOpcodeExecutor *ex, int i, int n);
void emulate_convld (OrcOpcodeExecutor *ex, int i, int n);
void emulate_convfd (OrcOpcodeExecutor *ex, int i, int n);
void emulate_convdf (OrcOpcodeExecutor *ex, int i, int n);
+void emulate_loadidxb (OrcOpcodeExecutor *ex, int i, int n);
+void emulate_loadidxw (OrcOpcodeExecutor *ex, int i, int n);
+void emulate_loadidxl (OrcOpcodeExecutor *ex, int i, int n);
+void emulate_loadidxq (OrcOpcodeExecutor *ex, int i, int n);
#endif
diff --git a/orc/orcopcodes.c b/orc/orcopcodes.c
index 820f0c9..b87f3e3 100644
--- a/orc/orcopcodes.c
+++ b/orc/orcopcodes.c
@@ -499,6 +499,11 @@ static OrcStaticOpcode opcodes[] = {
{ "convfd", ORC_STATIC_OPCODE_FLOAT, { 8 }, { 4 }, emulate_convfd },
{ "convdf", ORC_STATIC_OPCODE_FLOAT, { 4 }, { 8 }, emulate_convdf },
+ { "loadidxb", ORC_STATIC_OPCODE_LOAD, { 1 }, { 1, 4 }, emulate_loadidxb },
+ { "loadidxw", ORC_STATIC_OPCODE_LOAD, { 2 }, { 2, 4 }, emulate_loadidxw },
+ { "loadidxl", ORC_STATIC_OPCODE_LOAD, { 4 }, { 4, 4 }, emulate_loadidxl },
+ { "loadidxq", ORC_STATIC_OPCODE_LOAD, { 8 }, { 8, 4 }, emulate_loadidxq },
+
{ "" }
};
diff --git a/orc/orcprogram-c.c b/orc/orcprogram-c.c
index bff075a..1936c9b 100644
--- a/orc/orcprogram-c.c
+++ b/orc/orcprogram-c.c
@@ -883,6 +883,23 @@ c_rule_loadoffX (OrcCompiler *p, void *user, OrcInstruction *insn)
}
static void
+c_rule_loadidxX (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ char dest[40];
+
+ c_get_name_int (dest, p, insn, insn->dest_args[0]); \
+
+ if (p->target_flags & ORC_TARGET_C_OPCODE &&
+ !(insn->flags & ORC_INSN_FLAG_ADDED)) {
+ ORC_ASM_CODE(p," var%d = ptr%d[ptr%d[offset + i].i];\n", insn->dest_args[0],
+ insn->src_args[0], insn->src_args[1]);
+ } else {
+ ORC_ASM_CODE(p," var%d = ptr%d[ptr%d[i].i];\n", insn->dest_args[0],
+ insn->src_args[0], insn->src_args[1]);
+ }
+}
+
+static void
c_rule_loadupdb (OrcCompiler *p, void *user, OrcInstruction *insn)
{
if (p->target_flags & ORC_TARGET_C_OPCODE &&
@@ -1558,6 +1575,10 @@ orc_c_init (void)
orc_rule_register (rule_set, "loadoffl", c_rule_loadoffX, NULL);
orc_rule_register (rule_set, "loadupdb", c_rule_loadupdb, NULL);
orc_rule_register (rule_set, "loadupib", c_rule_loadupib, NULL);
+ orc_rule_register (rule_set, "loadidxb", c_rule_loadidxX, NULL);
+ orc_rule_register (rule_set, "loadidxw", c_rule_loadidxX, NULL);
+ orc_rule_register (rule_set, "loadidxl", c_rule_loadidxX, NULL);
+ orc_rule_register (rule_set, "loadidxq", c_rule_loadidxX, NULL);
orc_rule_register (rule_set, "ldresnearb", c_rule_ldresnearX, NULL);
orc_rule_register (rule_set, "ldresnearl", c_rule_ldresnearX, NULL);
orc_rule_register (rule_set, "ldreslinb", c_rule_ldreslinb, NULL);
diff --git a/orc/orcrules-sse.c b/orc/orcrules-sse.c
index c2c13d5..367ecd8 100644
--- a/orc/orcrules-sse.c
+++ b/orc/orcrules-sse.c
@@ -667,6 +667,73 @@ mmx_rule_ldreslinl (OrcCompiler *compiler, void *user, OrcInstruction *insn)
#endif
static void
+sse_rule_loadidxX (OrcCompiler *compiler, void *user, OrcInstruction *insn)
+{
+ OrcVariable *src = compiler->vars + insn->src_args[0];
+ OrcVariable *idx = compiler->vars + insn->src_args[1];
+ OrcVariable *dest = compiler->vars + insn->dest_args[0];
+ int tmp = orc_compiler_get_temp_reg (compiler);
+ int ptr_reg, i;
+ int offset = 0;
+ int size = ORC_PTR_TO_INT(user);
+
+ offset = compiler->offset * src->size;
+ if (idx->ptr_register == 0) {
+ i = insn->src_args[1];
+ orc_x86_emit_mov_memoffset_reg (compiler, compiler->is_64bit ? 8 : 4,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[i]),
+ compiler->exec_reg, compiler->gp_tmpreg);
+ ptr_reg = compiler->gp_tmpreg;
+ } else {
+ ptr_reg = idx->ptr_register;
+ }
+
+ for(i=0;i<(1<<compiler->loop_shift);i++){
+ orc_x86_emit_mov_memoffset_reg (compiler, 4, 4 * i + offset, ptr_reg,
+ compiler->gp_tmpreg);
+ if (compiler->target_flags & ORC_TARGET_SSE_SSE4_1) {
+ switch (size) {
+ case 1:
+ orc_sse_emit_pinsrb_memindex (compiler, i, 0,
+ src->ptr_register, compiler->gp_tmpreg, 0, dest->alloc);
+ break;
+ case 2:
+ orc_sse_emit_pinsrw_memindex (compiler, i, 0,
+ src->ptr_register, compiler->gp_tmpreg, 1, dest->alloc);
+ break;
+ case 4:
+ orc_sse_emit_pinsrd_memindex (compiler, i, 0,
+ src->ptr_register, compiler->gp_tmpreg, 2, dest->alloc);
+ break;
+ case 8:
+ orc_sse_emit_pinsrd_memindex (compiler, 2 * i, 0,
+ src->ptr_register, compiler->gp_tmpreg, 2, dest->alloc);
+ orc_sse_emit_pinsrd_memindex (compiler, 2 * i + 1, 4,
+ src->ptr_register, compiler->gp_tmpreg, 2, dest->alloc);
+ break;
+ }
+ } else {
+ if (i == 0) {
+ orc_x86_emit_mov_memindex_sse (compiler, 4, 0,
+ src->ptr_register, compiler->gp_tmpreg, 2, dest->alloc, FALSE);
+ } else {
+ orc_x86_emit_mov_memindex_sse (compiler, 4, 0,
+ src->ptr_register, compiler->gp_tmpreg, 2, tmp, FALSE);
+#ifdef MMX
+ orc_sse_emit_psllq_imm (compiler, 8*4*i, tmp);
+ orc_sse_emit_por (compiler, tmp, dest->alloc);
+#else
+ orc_sse_emit_pslldq_imm (compiler, 4*i, tmp);
+ orc_sse_emit_por (compiler, tmp, dest->alloc);
+#endif
+ }
+ }
+ }
+ idx->update_type = 2;
+}
+
+
+static void
sse_rule_copyx (OrcCompiler *p, void *user, OrcInstruction *insn)
{
if (p->vars[insn->src_args[0]].alloc == p->vars[insn->dest_args[0]].alloc) {
@@ -2908,6 +2975,11 @@ orc_compiler_sse_register_rules (OrcTarget *target)
orc_rule_register (rule_set, "convdf", sse_rule_convdf, NULL);
#endif
+ orc_rule_register (rule_set, "loadidxb", sse_rule_loadidxX, (void *)1);
+ orc_rule_register (rule_set, "loadidxw", sse_rule_loadidxX, (void *)2);
+ orc_rule_register (rule_set, "loadidxl", sse_rule_loadidxX, (void *)4);
+ orc_rule_register (rule_set, "loadidxq", sse_rule_loadidxX, (void *)8);
+
/* slow rules */
orc_rule_register (rule_set, "maxuw", sse_rule_maxuw_slow, NULL);
orc_rule_register (rule_set, "minuw", sse_rule_minuw_slow, NULL);
diff --git a/orc/orcx86insn.c b/orc/orcx86insn.c
index 4675a99..2109ba7 100644
--- a/orc/orcx86insn.c
+++ b/orc/orcx86insn.c
@@ -156,7 +156,9 @@ static const OrcSysOpcode orc_x86_opcodes[] = {
{ "pshuflw", ORC_X86_INSN_TYPE_IMM8_MMXM_MMX, 0, 0xf2, 0x0f70 },
{ "pshufhw", ORC_X86_INSN_TYPE_IMM8_MMXM_MMX, 0, 0xf3, 0x0f70 },
{ "palignr", ORC_X86_INSN_TYPE_IMM8_MMXM_MMX, 0, 0x66, 0x0f3a0f },
+ { "pinsrb", ORC_X86_INSN_TYPE_IMM8_REGM_MMX, 0, 0x01, 0x0f3a20 },
{ "pinsrw", ORC_X86_INSN_TYPE_IMM8_REGM_MMX, 0, 0x01, 0x0fc4 },
+ { "pinsrd", ORC_X86_INSN_TYPE_IMM8_REGM_MMX, 0, 0x01, 0x0f3a22 },
{ "movd", ORC_X86_INSN_TYPE_REGM_MMX, 0, 0x01, 0x0f6e },
{ "movq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0xf3, 0x0f7e },
{ "movdqa", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f6f },
diff --git a/orc/orcx86insn.h b/orc/orcx86insn.h
index d109c95..354adf5 100644
--- a/orc/orcx86insn.h
+++ b/orc/orcx86insn.h
@@ -179,7 +179,9 @@ typedef enum {
ORC_X86_pshuflw,
ORC_X86_pshufhw,
ORC_X86_palignr,
+ ORC_X86_pinsrb,
ORC_X86_pinsrw,
+ ORC_X86_pinsrd,
ORC_X86_movd_load,
ORC_X86_movq_sse_load,
ORC_X86_movdqa_load,
@@ -465,7 +467,9 @@ void orc_x86_calculate_offsets (OrcCompiler *p);
#define orc_sse_emit_movdqu_store_memoffset(p,a,offset,b) orc_x86_emit_cpuinsn_store_memoffset(p, ORC_X86_movdqu_store, 16, 0, a, offset, b)
#define orc_sse_emit_movntdq_store_memoffset(p,a,offset,b) orc_x86_emit_cpuinsn_store_memoffset(p, ORC_X86_movntdq_store, 16, 0, a, offset, b)
+#define orc_sse_emit_pinsrb_memindex(p,imm,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_pinsrb, 4, imm, offset, a, a_index, shift, b)
#define orc_sse_emit_pinsrw_memindex(p,imm,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_pinsrw, 4, imm, offset, a, a_index, shift, b)
+#define orc_sse_emit_pinsrd_memindex(p,imm,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_pinsrd, 4, imm, offset, a, a_index, shift, b)
#define orc_sse_emit_movd_load_memindex(p,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_movd_load, 4, 0, offset, a, a_index, shift, b)
#define orc_sse_emit_movq_load_memindex(p,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_movq_sse_load, 4, 0, offset, a, a_index, shift, b)
#define orc_sse_emit_movdqa_load_memindex(p,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_movdqa_load, 4, 0, offset, a, a_index, shift, b)