summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWim Taymans <wtaymans@redhat.com>2014-12-09 16:54:19 +0100
committerWim Taymans <wtaymans@redhat.com>2014-12-09 18:01:37 +0100
commit2a187f7f1bc91a5ba7dbc608f7c0a98016e45651 (patch)
treebff31cc5999a2517a30367e9118f10cfc62615cb
parentf1cfa5bba9824374d769e312381d8f5d85a417bc (diff)
neon: vtrn and vzip are undefined when src == dest
Make sure we use different src and dest registers for vtrn and vzip because otherwise they are undefined.
-rw-r--r--orc/orcrules-neon.c15
1 files changed, 10 insertions, 5 deletions
diff --git a/orc/orcrules-neon.c b/orc/orcrules-neon.c
index 9959160..83b33d3 100644
--- a/orc/orcrules-neon.c
+++ b/orc/orcrules-neon.c
@@ -2062,7 +2062,8 @@ orc_neon_rule_mergebw (OrcCompiler *p, void *user, OrcInstruction *insn)
p->vars[insn->src_args[0]].alloc);
}
- if (p->vars[insn->src_args[1]].last_use != p->insn_index) {
+ if (p->vars[insn->src_args[1]].last_use != p->insn_index ||
+ p->vars[insn->src_args[1]].alloc == p->vars[insn->dest_args[0]].alloc) {
orc_neon_emit_mov (p, p->tmpreg, p->vars[insn->src_args[1]].alloc);
orc_neon_emit_unary (p, "vzip.8", 0xf3b20180,
p->vars[insn->dest_args[0]].alloc,
@@ -2094,7 +2095,8 @@ orc_neon_rule_mergewl (OrcCompiler *p, void *user, OrcInstruction *insn)
p->vars[insn->src_args[0]].alloc);
}
- if (p->vars[insn->src_args[1]].last_use != p->insn_index) {
+ if (p->vars[insn->src_args[1]].last_use != p->insn_index ||
+ p->vars[insn->src_args[1]].alloc == p->vars[insn->dest_args[0]].alloc) {
orc_neon_emit_mov (p, p->tmpreg, p->vars[insn->src_args[1]].alloc);
orc_neon_emit_unary (p, "vzip.16", 0xf3b60180,
p->vars[insn->dest_args[0]].alloc,
@@ -2110,7 +2112,8 @@ orc_neon_rule_mergewl (OrcCompiler *p, void *user, OrcInstruction *insn)
p->vars[insn->src_args[0]].alloc);
}
- if (p->vars[insn->src_args[1]].last_use != p->insn_index) {
+ if (p->vars[insn->src_args[1]].last_use != p->insn_index ||
+ p->vars[insn->src_args[1]].alloc == p->vars[insn->dest_args[0]].alloc) {
orc_neon_emit_mov_quad (p, p->tmpreg, p->vars[insn->src_args[1]].alloc);
orc_neon_emit_unary_quad (p, "vzip.16", 0xf3b60180,
p->vars[insn->dest_args[0]].alloc,
@@ -2132,7 +2135,8 @@ orc_neon_rule_mergelq (OrcCompiler *p, void *user, OrcInstruction *insn)
p->vars[insn->src_args[0]].alloc);
}
- if (p->vars[insn->src_args[1]].last_use != p->insn_index) {
+ if (p->vars[insn->src_args[1]].last_use != p->insn_index ||
+ p->vars[insn->src_args[1]].alloc == p->vars[insn->dest_args[0]].alloc) {
orc_neon_emit_mov (p, p->tmpreg, p->vars[insn->src_args[1]].alloc);
orc_neon_emit_unary (p, "vtrn.32", 0xf3ba0080,
p->vars[insn->dest_args[0]].alloc,
@@ -2148,7 +2152,8 @@ orc_neon_rule_mergelq (OrcCompiler *p, void *user, OrcInstruction *insn)
p->vars[insn->src_args[0]].alloc);
}
- if (p->vars[insn->src_args[1]].last_use != p->insn_index) {
+ if (p->vars[insn->src_args[1]].last_use != p->insn_index ||
+ p->vars[insn->src_args[1]].alloc == p->vars[insn->dest_args[0]].alloc) {
orc_neon_emit_mov_quad (p, p->tmpreg, p->vars[insn->src_args[1]].alloc);
orc_neon_emit_unary_quad (p, "vzip.32", 0xf3ba0180,
p->vars[insn->dest_args[0]].alloc,