diff options
-rw-r--r-- | orc/orcbytecode.c | 1 | ||||
-rw-r--r-- | orc/orcbytecodes.h | 6 | ||||
-rw-r--r-- | orc/orccompiler.c | 1 | ||||
-rw-r--r-- | orc/orcemulateopcodes.c | 92 | ||||
-rw-r--r-- | orc/orcemulateopcodes.h | 4 | ||||
-rw-r--r-- | orc/orcopcodes.c | 5 | ||||
-rw-r--r-- | orc/orcprogram-c.c | 21 | ||||
-rw-r--r-- | orc/orcrules-sse.c | 72 | ||||
-rw-r--r-- | orc/orcx86insn.c | 2 | ||||
-rw-r--r-- | orc/orcx86insn.h | 4 |
10 files changed, 207 insertions, 1 deletions
diff --git a/orc/orcbytecode.c b/orc/orcbytecode.c index 8e7bad4..d780abe 100644 --- a/orc/orcbytecode.c +++ b/orc/orcbytecode.c @@ -492,6 +492,7 @@ orc_bytecode_parse_function (OrcProgram *program, const orc_uint8 *bytecode) insn = program->insns + program->n_insns; insn->opcode = opcode_set->opcodes + (bc - 32); + ORC_DEBUG ("%s", insn->opcode->name); if (insn->opcode->dest_size[0] != 0) { insn->dest_args[0] = orc_bytecode_parse_get_int (parse); } diff --git a/orc/orcbytecodes.h b/orc/orcbytecodes.h index cea1ff4..de0355d 100644 --- a/orc/orcbytecodes.h +++ b/orc/orcbytecodes.h @@ -250,6 +250,10 @@ typedef enum { ORC_BC_convld, ORC_BC_convfd, ORC_BC_convdf, - /* 226 */ + ORC_BC_loadidxb, + ORC_BC_loadidxw, + ORC_BC_loadidxl, + ORC_BC_loadidxq, + /* 230 */ ORC_BC_LAST } OrcBytecodes; diff --git a/orc/orccompiler.c b/orc/orccompiler.c index 3618f5b..9f18b29 100644 --- a/orc/orccompiler.c +++ b/orc/orccompiler.c @@ -383,6 +383,7 @@ orc_program_compile_full (OrcProgram *program, OrcTarget *target, if (compiler->output_insns) free (compiler->output_insns); free (compiler); ORC_INFO("finished compiling (success)"); + ORC_INFO("%s", orc_program_get_asm_code (program)); return result; error: diff --git a/orc/orcemulateopcodes.c b/orc/orcemulateopcodes.c index e80fce2..74548df 100644 --- a/orc/orcemulateopcodes.c +++ b/orc/orcemulateopcodes.c @@ -5371,3 +5371,95 @@ emulate_convdf (OrcOpcodeExecutor *ex, int offset, int n) } +void +emulate_loadidxb (OrcOpcodeExecutor *ex, int offset, int n) +{ + int i; + orc_int8 * ORC_RESTRICT ptr0; + const orc_int8 * ORC_RESTRICT ptr4; + const orc_union32 * ORC_RESTRICT ptr5; + orc_int8 var32; + + ptr0 = (orc_int8 *)ex->dest_ptrs[0]; + ptr4 = (orc_int8 *)ex->src_ptrs[0]; + ptr5 = (orc_union32 *)ex->src_ptrs[1]; + + + for (i = 0; i < n; i++) { + /* 0: loadidxb */ + var32 = ptr4[ptr5[offset + i].i]; + /* 1: storeb */ + ptr0[i] = var32; + } + +} + +void +emulate_loadidxw (OrcOpcodeExecutor *ex, int offset, int n) +{ + int i; + orc_union16 * ORC_RESTRICT ptr0; + const orc_union16 * ORC_RESTRICT ptr4; + const orc_union32 * ORC_RESTRICT ptr5; + orc_union16 var32; + + ptr0 = (orc_union16 *)ex->dest_ptrs[0]; + ptr4 = (orc_union16 *)ex->src_ptrs[0]; + ptr5 = (orc_union32 *)ex->src_ptrs[1]; + + + for (i = 0; i < n; i++) { + /* 0: loadidxw */ + var32 = ptr4[ptr5[offset + i].i]; + /* 1: storew */ + ptr0[i] = var32; + } + +} + +void +emulate_loadidxl (OrcOpcodeExecutor *ex, int offset, int n) +{ + int i; + orc_union32 * ORC_RESTRICT ptr0; + const orc_union32 * ORC_RESTRICT ptr4; + const orc_union32 * ORC_RESTRICT ptr5; + orc_union32 var32; + + ptr0 = (orc_union32 *)ex->dest_ptrs[0]; + ptr4 = (orc_union32 *)ex->src_ptrs[0]; + ptr5 = (orc_union32 *)ex->src_ptrs[1]; + + + for (i = 0; i < n; i++) { + /* 0: loadidxl */ + var32 = ptr4[ptr5[offset + i].i]; + /* 1: storel */ + ptr0[i] = var32; + } + +} + +void +emulate_loadidxq (OrcOpcodeExecutor *ex, int offset, int n) +{ + int i; + orc_union64 * ORC_RESTRICT ptr0; + const orc_union64 * ORC_RESTRICT ptr4; + const orc_union32 * ORC_RESTRICT ptr5; + orc_union64 var32; + + ptr0 = (orc_union64 *)ex->dest_ptrs[0]; + ptr4 = (orc_union64 *)ex->src_ptrs[0]; + ptr5 = (orc_union32 *)ex->src_ptrs[1]; + + + for (i = 0; i < n; i++) { + /* 0: loadidxq */ + var32 = ptr4[ptr5[offset + i].i]; + /* 1: storeq */ + ptr0[i] = var32; + } + +} + diff --git a/orc/orcemulateopcodes.h b/orc/orcemulateopcodes.h index b02050e..8e19a17 100644 --- a/orc/orcemulateopcodes.h +++ b/orc/orcemulateopcodes.h @@ -198,6 +198,10 @@ void emulate_convdl (OrcOpcodeExecutor *ex, int i, int n); void emulate_convld (OrcOpcodeExecutor *ex, int i, int n); void emulate_convfd (OrcOpcodeExecutor *ex, int i, int n); void emulate_convdf (OrcOpcodeExecutor *ex, int i, int n); +void emulate_loadidxb (OrcOpcodeExecutor *ex, int i, int n); +void emulate_loadidxw (OrcOpcodeExecutor *ex, int i, int n); +void emulate_loadidxl (OrcOpcodeExecutor *ex, int i, int n); +void emulate_loadidxq (OrcOpcodeExecutor *ex, int i, int n); #endif diff --git a/orc/orcopcodes.c b/orc/orcopcodes.c index 820f0c9..b87f3e3 100644 --- a/orc/orcopcodes.c +++ b/orc/orcopcodes.c @@ -499,6 +499,11 @@ static OrcStaticOpcode opcodes[] = { { "convfd", ORC_STATIC_OPCODE_FLOAT, { 8 }, { 4 }, emulate_convfd }, { "convdf", ORC_STATIC_OPCODE_FLOAT, { 4 }, { 8 }, emulate_convdf }, + { "loadidxb", ORC_STATIC_OPCODE_LOAD, { 1 }, { 1, 4 }, emulate_loadidxb }, + { "loadidxw", ORC_STATIC_OPCODE_LOAD, { 2 }, { 2, 4 }, emulate_loadidxw }, + { "loadidxl", ORC_STATIC_OPCODE_LOAD, { 4 }, { 4, 4 }, emulate_loadidxl }, + { "loadidxq", ORC_STATIC_OPCODE_LOAD, { 8 }, { 8, 4 }, emulate_loadidxq }, + { "" } }; diff --git a/orc/orcprogram-c.c b/orc/orcprogram-c.c index bff075a..1936c9b 100644 --- a/orc/orcprogram-c.c +++ b/orc/orcprogram-c.c @@ -883,6 +883,23 @@ c_rule_loadoffX (OrcCompiler *p, void *user, OrcInstruction *insn) } static void +c_rule_loadidxX (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + char dest[40]; + + c_get_name_int (dest, p, insn, insn->dest_args[0]); \ + + if (p->target_flags & ORC_TARGET_C_OPCODE && + !(insn->flags & ORC_INSN_FLAG_ADDED)) { + ORC_ASM_CODE(p," var%d = ptr%d[ptr%d[offset + i].i];\n", insn->dest_args[0], + insn->src_args[0], insn->src_args[1]); + } else { + ORC_ASM_CODE(p," var%d = ptr%d[ptr%d[i].i];\n", insn->dest_args[0], + insn->src_args[0], insn->src_args[1]); + } +} + +static void c_rule_loadupdb (OrcCompiler *p, void *user, OrcInstruction *insn) { if (p->target_flags & ORC_TARGET_C_OPCODE && @@ -1558,6 +1575,10 @@ orc_c_init (void) orc_rule_register (rule_set, "loadoffl", c_rule_loadoffX, NULL); orc_rule_register (rule_set, "loadupdb", c_rule_loadupdb, NULL); orc_rule_register (rule_set, "loadupib", c_rule_loadupib, NULL); + orc_rule_register (rule_set, "loadidxb", c_rule_loadidxX, NULL); + orc_rule_register (rule_set, "loadidxw", c_rule_loadidxX, NULL); + orc_rule_register (rule_set, "loadidxl", c_rule_loadidxX, NULL); + orc_rule_register (rule_set, "loadidxq", c_rule_loadidxX, NULL); orc_rule_register (rule_set, "ldresnearb", c_rule_ldresnearX, NULL); orc_rule_register (rule_set, "ldresnearl", c_rule_ldresnearX, NULL); orc_rule_register (rule_set, "ldreslinb", c_rule_ldreslinb, NULL); diff --git a/orc/orcrules-sse.c b/orc/orcrules-sse.c index c2c13d5..367ecd8 100644 --- a/orc/orcrules-sse.c +++ b/orc/orcrules-sse.c @@ -667,6 +667,73 @@ mmx_rule_ldreslinl (OrcCompiler *compiler, void *user, OrcInstruction *insn) #endif static void +sse_rule_loadidxX (OrcCompiler *compiler, void *user, OrcInstruction *insn) +{ + OrcVariable *src = compiler->vars + insn->src_args[0]; + OrcVariable *idx = compiler->vars + insn->src_args[1]; + OrcVariable *dest = compiler->vars + insn->dest_args[0]; + int tmp = orc_compiler_get_temp_reg (compiler); + int ptr_reg, i; + int offset = 0; + int size = ORC_PTR_TO_INT(user); + + offset = compiler->offset * src->size; + if (idx->ptr_register == 0) { + i = insn->src_args[1]; + orc_x86_emit_mov_memoffset_reg (compiler, compiler->is_64bit ? 8 : 4, + (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[i]), + compiler->exec_reg, compiler->gp_tmpreg); + ptr_reg = compiler->gp_tmpreg; + } else { + ptr_reg = idx->ptr_register; + } + + for(i=0;i<(1<<compiler->loop_shift);i++){ + orc_x86_emit_mov_memoffset_reg (compiler, 4, 4 * i + offset, ptr_reg, + compiler->gp_tmpreg); + if (compiler->target_flags & ORC_TARGET_SSE_SSE4_1) { + switch (size) { + case 1: + orc_sse_emit_pinsrb_memindex (compiler, i, 0, + src->ptr_register, compiler->gp_tmpreg, 0, dest->alloc); + break; + case 2: + orc_sse_emit_pinsrw_memindex (compiler, i, 0, + src->ptr_register, compiler->gp_tmpreg, 1, dest->alloc); + break; + case 4: + orc_sse_emit_pinsrd_memindex (compiler, i, 0, + src->ptr_register, compiler->gp_tmpreg, 2, dest->alloc); + break; + case 8: + orc_sse_emit_pinsrd_memindex (compiler, 2 * i, 0, + src->ptr_register, compiler->gp_tmpreg, 2, dest->alloc); + orc_sse_emit_pinsrd_memindex (compiler, 2 * i + 1, 4, + src->ptr_register, compiler->gp_tmpreg, 2, dest->alloc); + break; + } + } else { + if (i == 0) { + orc_x86_emit_mov_memindex_sse (compiler, 4, 0, + src->ptr_register, compiler->gp_tmpreg, 2, dest->alloc, FALSE); + } else { + orc_x86_emit_mov_memindex_sse (compiler, 4, 0, + src->ptr_register, compiler->gp_tmpreg, 2, tmp, FALSE); +#ifdef MMX + orc_sse_emit_psllq_imm (compiler, 8*4*i, tmp); + orc_sse_emit_por (compiler, tmp, dest->alloc); +#else + orc_sse_emit_pslldq_imm (compiler, 4*i, tmp); + orc_sse_emit_por (compiler, tmp, dest->alloc); +#endif + } + } + } + idx->update_type = 2; +} + + +static void sse_rule_copyx (OrcCompiler *p, void *user, OrcInstruction *insn) { if (p->vars[insn->src_args[0]].alloc == p->vars[insn->dest_args[0]].alloc) { @@ -2908,6 +2975,11 @@ orc_compiler_sse_register_rules (OrcTarget *target) orc_rule_register (rule_set, "convdf", sse_rule_convdf, NULL); #endif + orc_rule_register (rule_set, "loadidxb", sse_rule_loadidxX, (void *)1); + orc_rule_register (rule_set, "loadidxw", sse_rule_loadidxX, (void *)2); + orc_rule_register (rule_set, "loadidxl", sse_rule_loadidxX, (void *)4); + orc_rule_register (rule_set, "loadidxq", sse_rule_loadidxX, (void *)8); + /* slow rules */ orc_rule_register (rule_set, "maxuw", sse_rule_maxuw_slow, NULL); orc_rule_register (rule_set, "minuw", sse_rule_minuw_slow, NULL); diff --git a/orc/orcx86insn.c b/orc/orcx86insn.c index 4675a99..2109ba7 100644 --- a/orc/orcx86insn.c +++ b/orc/orcx86insn.c @@ -156,7 +156,9 @@ static const OrcSysOpcode orc_x86_opcodes[] = { { "pshuflw", ORC_X86_INSN_TYPE_IMM8_MMXM_MMX, 0, 0xf2, 0x0f70 }, { "pshufhw", ORC_X86_INSN_TYPE_IMM8_MMXM_MMX, 0, 0xf3, 0x0f70 }, { "palignr", ORC_X86_INSN_TYPE_IMM8_MMXM_MMX, 0, 0x66, 0x0f3a0f }, + { "pinsrb", ORC_X86_INSN_TYPE_IMM8_REGM_MMX, 0, 0x01, 0x0f3a20 }, { "pinsrw", ORC_X86_INSN_TYPE_IMM8_REGM_MMX, 0, 0x01, 0x0fc4 }, + { "pinsrd", ORC_X86_INSN_TYPE_IMM8_REGM_MMX, 0, 0x01, 0x0f3a22 }, { "movd", ORC_X86_INSN_TYPE_REGM_MMX, 0, 0x01, 0x0f6e }, { "movq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0xf3, 0x0f7e }, { "movdqa", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f6f }, diff --git a/orc/orcx86insn.h b/orc/orcx86insn.h index d109c95..354adf5 100644 --- a/orc/orcx86insn.h +++ b/orc/orcx86insn.h @@ -179,7 +179,9 @@ typedef enum { ORC_X86_pshuflw, ORC_X86_pshufhw, ORC_X86_palignr, + ORC_X86_pinsrb, ORC_X86_pinsrw, + ORC_X86_pinsrd, ORC_X86_movd_load, ORC_X86_movq_sse_load, ORC_X86_movdqa_load, @@ -465,7 +467,9 @@ void orc_x86_calculate_offsets (OrcCompiler *p); #define orc_sse_emit_movdqu_store_memoffset(p,a,offset,b) orc_x86_emit_cpuinsn_store_memoffset(p, ORC_X86_movdqu_store, 16, 0, a, offset, b) #define orc_sse_emit_movntdq_store_memoffset(p,a,offset,b) orc_x86_emit_cpuinsn_store_memoffset(p, ORC_X86_movntdq_store, 16, 0, a, offset, b) +#define orc_sse_emit_pinsrb_memindex(p,imm,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_pinsrb, 4, imm, offset, a, a_index, shift, b) #define orc_sse_emit_pinsrw_memindex(p,imm,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_pinsrw, 4, imm, offset, a, a_index, shift, b) +#define orc_sse_emit_pinsrd_memindex(p,imm,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_pinsrd, 4, imm, offset, a, a_index, shift, b) #define orc_sse_emit_movd_load_memindex(p,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_movd_load, 4, 0, offset, a, a_index, shift, b) #define orc_sse_emit_movq_load_memindex(p,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_movq_sse_load, 4, 0, offset, a, a_index, shift, b) #define orc_sse_emit_movdqa_load_memindex(p,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_movdqa_load, 4, 0, offset, a, a_index, shift, b) |