diff options
author | Wim Taymans <wtaymans@redhat.com> | 2014-12-09 16:54:19 +0100 |
---|---|---|
committer | Wim Taymans <wtaymans@redhat.com> | 2014-12-09 18:01:37 +0100 |
commit | 2a187f7f1bc91a5ba7dbc608f7c0a98016e45651 (patch) | |
tree | bff31cc5999a2517a30367e9118f10cfc62615cb | |
parent | f1cfa5bba9824374d769e312381d8f5d85a417bc (diff) |
neon: vtrn and vzip are undefined when src == dest
Make sure we use different src and dest registers for vtrn and vzip
because otherwise they are undefined.
-rw-r--r-- | orc/orcrules-neon.c | 15 |
1 files changed, 10 insertions, 5 deletions
diff --git a/orc/orcrules-neon.c b/orc/orcrules-neon.c index 9959160..83b33d3 100644 --- a/orc/orcrules-neon.c +++ b/orc/orcrules-neon.c @@ -2062,7 +2062,8 @@ orc_neon_rule_mergebw (OrcCompiler *p, void *user, OrcInstruction *insn) p->vars[insn->src_args[0]].alloc); } - if (p->vars[insn->src_args[1]].last_use != p->insn_index) { + if (p->vars[insn->src_args[1]].last_use != p->insn_index || + p->vars[insn->src_args[1]].alloc == p->vars[insn->dest_args[0]].alloc) { orc_neon_emit_mov (p, p->tmpreg, p->vars[insn->src_args[1]].alloc); orc_neon_emit_unary (p, "vzip.8", 0xf3b20180, p->vars[insn->dest_args[0]].alloc, @@ -2094,7 +2095,8 @@ orc_neon_rule_mergewl (OrcCompiler *p, void *user, OrcInstruction *insn) p->vars[insn->src_args[0]].alloc); } - if (p->vars[insn->src_args[1]].last_use != p->insn_index) { + if (p->vars[insn->src_args[1]].last_use != p->insn_index || + p->vars[insn->src_args[1]].alloc == p->vars[insn->dest_args[0]].alloc) { orc_neon_emit_mov (p, p->tmpreg, p->vars[insn->src_args[1]].alloc); orc_neon_emit_unary (p, "vzip.16", 0xf3b60180, p->vars[insn->dest_args[0]].alloc, @@ -2110,7 +2112,8 @@ orc_neon_rule_mergewl (OrcCompiler *p, void *user, OrcInstruction *insn) p->vars[insn->src_args[0]].alloc); } - if (p->vars[insn->src_args[1]].last_use != p->insn_index) { + if (p->vars[insn->src_args[1]].last_use != p->insn_index || + p->vars[insn->src_args[1]].alloc == p->vars[insn->dest_args[0]].alloc) { orc_neon_emit_mov_quad (p, p->tmpreg, p->vars[insn->src_args[1]].alloc); orc_neon_emit_unary_quad (p, "vzip.16", 0xf3b60180, p->vars[insn->dest_args[0]].alloc, @@ -2132,7 +2135,8 @@ orc_neon_rule_mergelq (OrcCompiler *p, void *user, OrcInstruction *insn) p->vars[insn->src_args[0]].alloc); } - if (p->vars[insn->src_args[1]].last_use != p->insn_index) { + if (p->vars[insn->src_args[1]].last_use != p->insn_index || + p->vars[insn->src_args[1]].alloc == p->vars[insn->dest_args[0]].alloc) { orc_neon_emit_mov (p, p->tmpreg, p->vars[insn->src_args[1]].alloc); orc_neon_emit_unary (p, "vtrn.32", 0xf3ba0080, p->vars[insn->dest_args[0]].alloc, @@ -2148,7 +2152,8 @@ orc_neon_rule_mergelq (OrcCompiler *p, void *user, OrcInstruction *insn) p->vars[insn->src_args[0]].alloc); } - if (p->vars[insn->src_args[1]].last_use != p->insn_index) { + if (p->vars[insn->src_args[1]].last_use != p->insn_index || + p->vars[insn->src_args[1]].alloc == p->vars[insn->dest_args[0]].alloc) { orc_neon_emit_mov_quad (p, p->tmpreg, p->vars[insn->src_args[1]].alloc); orc_neon_emit_unary_quad (p, "vzip.32", 0xf3ba0180, p->vars[insn->dest_args[0]].alloc, |