summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Schleef <ds@schleef.org>2010-09-15 17:50:44 -0700
committerDavid Schleef <ds@schleef.org>2010-09-15 17:50:44 -0700
commitd7ce7adbd21c84d8a4d233d3e6010bffe2f6efa3 (patch)
treeec73248fff322d407d05d1e2d75646ddf5b533bb
parent68259b8d7bd4b706d75ecdd291148e884dfc282e (diff)
sse: implement 64-bit params and constants
-rw-r--r--orc/orcprogram-sse.c30
-rw-r--r--orc/orcrules-sse.c69
-rw-r--r--orc/orcsse.c13
-rw-r--r--orc/orcsse.h5
4 files changed, 92 insertions, 25 deletions
diff --git a/orc/orcprogram-sse.c b/orc/orcprogram-sse.c
index 89b3bbd..f4d85cb 100644
--- a/orc/orcprogram-sse.c
+++ b/orc/orcprogram-sse.c
@@ -341,8 +341,36 @@ sse_save_accumulators (OrcCompiler *compiler)
void
sse_load_constant (OrcCompiler *compiler, int reg, int size, int value)
{
+ orc_sse_load_constant (compiler, reg, size, value);
+}
+
+void
+orc_sse_load_constant (OrcCompiler *compiler, int reg, int size, orc_uint64 value)
+{
int i;
+ if (size == 8) {
+ int offset = ORC_STRUCT_OFFSET(OrcExecutor,arrays[ORC_VAR_T1]);
+
+ /* FIXME how ugly and slow! */
+ orc_x86_emit_mov_imm_reg (compiler, 4, value>>0,
+ compiler->gp_tmpreg);
+ orc_x86_emit_mov_reg_memoffset (compiler, 4, compiler->gp_tmpreg,
+ offset + 0, compiler->exec_reg);
+
+ orc_x86_emit_mov_imm_reg (compiler, 4, value>>32,
+ compiler->gp_tmpreg);
+ orc_x86_emit_mov_reg_memoffset (compiler, 4, compiler->gp_tmpreg,
+ offset + 4, compiler->exec_reg);
+
+ orc_x86_emit_mov_memoffset_sse (compiler, 8, offset, compiler->exec_reg,
+ reg, FALSE);
+#ifndef MMX
+ orc_sse_emit_pshufd (compiler, ORC_SSE_SHUF(1,0,1,0), reg, reg);
+#endif
+ return;
+ }
+
if (size == 1) {
value &= 0xff;
value |= (value << 8);
@@ -353,7 +381,7 @@ sse_load_constant (OrcCompiler *compiler, int reg, int size, int value)
value |= (value << 16);
}
- ORC_ASM_CODE(compiler, "# loading constant %d 0x%08x\n", value, value);
+ ORC_ASM_CODE(compiler, "# loading constant %d 0x%08x\n", (int)value, (int)value);
if (value == 0) {
orc_sse_emit_pxor(compiler, reg, reg);
return;
diff --git a/orc/orcrules-sse.c b/orc/orcrules-sse.c
index ef0830c..04a3e31 100644
--- a/orc/orcrules-sse.c
+++ b/orc/orcrules-sse.c
@@ -28,32 +28,55 @@ sse_rule_loadpX (OrcCompiler *compiler, void *user, OrcInstruction *insn)
reg = dest->alloc;
if (size == 8 && src->size == 8) {
- ORC_COMPILER_ERROR(compiler,"64-bit parameters not implemented");
- }
-
- orc_x86_emit_mov_memoffset_sse (compiler, 4,
- (int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[0]]),
- compiler->exec_reg, reg, FALSE);
- if (size == 1) {
- orc_sse_emit_punpcklbw (compiler, reg, reg);
- }
+ orc_x86_emit_mov_memoffset_sse (compiler, 4,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[0]]),
+ compiler->exec_reg, reg, FALSE);
+ if (0) {
+ /* FIXME yes, I understand this is terrible */
+ orc_sse_emit_pinsrw_memoffset (compiler, 2,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,
+ params[insn->src_args[0] + (ORC_VAR_T1 - ORC_VAR_P1)]) + 0,
+ compiler->exec_reg, reg);
+ orc_sse_emit_pinsrw_memoffset (compiler, 3,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,
+ params[insn->src_args[0] + (ORC_VAR_T1 - ORC_VAR_P1)]) + 1,
+ compiler->exec_reg, reg);
+ orc_sse_emit_pshufd (compiler, ORC_SSE_SHUF(1,0,1,0), reg, reg);
+ } else {
+ orc_x86_emit_movhps_memoffset_sse (compiler,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,
+ params[insn->src_args[0] + (ORC_VAR_T1 - ORC_VAR_P1)]),
+ compiler->exec_reg, reg);
+ orc_sse_emit_pshufd (compiler, ORC_SSE_SHUF(2,0,2,0), reg, reg);
+ }
+ } else {
+ orc_x86_emit_mov_memoffset_sse (compiler, 4,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[0]]),
+ compiler->exec_reg, reg, FALSE);
+ if (size < 8) {
+ if (size == 1) {
+ orc_sse_emit_punpcklbw (compiler, reg, reg);
+ }
#ifndef MMX
- if (size <= 2) {
- orc_sse_emit_pshuflw (compiler, 0, reg, reg);
- }
- orc_sse_emit_pshufd (compiler, 0, reg, reg);
+ if (size <= 2) {
+ orc_sse_emit_pshuflw (compiler, 0, reg, reg);
+ }
+ orc_sse_emit_pshufd (compiler, 0, reg, reg);
#else
- if (size <= 2) {
- orc_mmx_emit_pshufw (compiler, ORC_MMX_SHUF(0,0,0,0), reg, reg);
- } else {
- orc_mmx_emit_pshufw (compiler, ORC_MMX_SHUF(1,0,1,0), reg, reg);
- }
+ if (size <= 2) {
+ orc_mmx_emit_pshufw (compiler, ORC_MMX_SHUF(0,0,0,0), reg, reg);
+ } else {
+ orc_mmx_emit_pshufw (compiler, ORC_MMX_SHUF(1,0,1,0), reg, reg);
+ }
#endif
- } else if (src->vartype == ORC_VAR_TYPE_CONST) {
- if (size == 8 && src->size == 8) {
- ORC_COMPILER_ERROR(compiler,"64-bit constants not implemented");
+ } else {
+#ifndef MMX
+ orc_sse_emit_pshufd (compiler, ORC_SSE_SHUF(1,0,1,0), reg, reg);
+#endif
+ }
}
- sse_load_constant (compiler, dest->alloc, size, src->value.i);
+ } else if (src->vartype == ORC_VAR_TYPE_CONST) {
+ orc_sse_load_constant (compiler, dest->alloc, size, src->value.i);
} else {
ORC_ASSERT(0);
}
@@ -1200,7 +1223,7 @@ sse_rule_divluw (OrcCompiler *p, void *user, OrcInstruction *insn)
orc_sse_emit_psllw (p, 8, divisor);
orc_sse_emit_psrlw (p, 1, divisor);
- sse_load_constant (p, a, 2, 0x00ff);
+ orc_sse_load_constant (p, a, 2, 0x00ff);
tmp = orc_compiler_get_constant (p, 2, 0x8000);
orc_sse_emit_movdqa (p, tmp, j);
orc_sse_emit_psrlw (p, 8, j);
diff --git a/orc/orcsse.c b/orc/orcsse.c
index f812075..6e932e4 100644
--- a/orc/orcsse.c
+++ b/orc/orcsse.c
@@ -244,6 +244,19 @@ orc_x86_emit_mov_memoffset_sse (OrcCompiler *compiler, int size, int offset,
}
void
+orc_x86_emit_movhps_memoffset_sse (OrcCompiler *compiler, int offset,
+ int reg1, int reg2)
+{
+ ORC_ASM_CODE(compiler," movhps %d(%%%s), %%%s\n", offset,
+ orc_x86_get_regname_ptr(compiler, reg1),
+ orc_x86_get_regname_sse(reg2));
+ orc_x86_emit_rex(compiler, 0, reg2, 0, reg1);
+ *compiler->codeptr++ = 0x0f;
+ *compiler->codeptr++ = 0x16;
+ orc_x86_emit_modrm_memoffset (compiler, reg2, offset, reg1);
+}
+
+void
orc_x86_emit_mov_memindex_sse (OrcCompiler *compiler, int size, int offset,
int reg1, int regindex, int shift, int reg2, int is_aligned)
{
diff --git a/orc/orcsse.h b/orc/orcsse.h
index e662824..6b58e7f 100644
--- a/orc/orcsse.h
+++ b/orc/orcsse.h
@@ -41,6 +41,8 @@ typedef enum {
const char * orc_x86_get_regname_sse(int i);
void orc_x86_emit_mov_memoffset_sse (OrcCompiler *compiler, int size, int offset,
int reg1, int reg2, int is_aligned);
+void orc_x86_emit_movhps_memoffset_sse (OrcCompiler *compiler, int offset,
+ int reg1, int reg2);
void orc_x86_emit_mov_memindex_sse (OrcCompiler *compiler, int size, int offset,
int reg1, int regindex, int shift, int reg2, int is_aligned);
void orc_x86_emit_mov_sse_memoffset (OrcCompiler *compiler, int size, int reg1, int offset,
@@ -76,7 +78,8 @@ void orc_sse_emit_shiftimm (OrcCompiler *p, const char *insn_name,
void orc_sse_set_mxcsr (OrcCompiler *compiler);
void orc_sse_restore_mxcsr (OrcCompiler *compiler);
-void sse_load_constant (OrcCompiler *compiler, int reg, int size, int value);
+void orc_sse_load_constant (OrcCompiler *compiler, int reg, int size,
+ orc_uint64 value);
unsigned int orc_sse_get_cpu_flags (void);