diff options
author | Matthew Simpson <mssimpso@codeaurora.org> | 2015-12-21 18:31:25 +0000 |
---|---|---|
committer | Matthew Simpson <mssimpso@codeaurora.org> | 2015-12-21 18:31:25 +0000 |
commit | 0ce5d69ee3fb6415ed625805b50ddedc2ee6964f (patch) | |
tree | 777bae92280897abb8bd65ffc583c3f60735c762 | |
parent | 8c9c853ff4829cf9e532278d1a6e11987bcf2dbf (diff) |
[AArch64] Add additional extract-extend patterns for smov
This patch adds to the target description two additional patterns for matching
extract-extend operations to SMOV. The patterns catch the v16i8-to-i64 and
v8i16-to-i64 cases. The existing patterns miss these cases because the
extracted elements must first be legalized to i32, resulting in any_extend
nodes.
This was originally implemented as a DAG combine (r255895), but was reverted
due to failing out-of-tree tests.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@256176 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/AArch64/AArch64InstrInfo.td | 7 | ||||
-rw-r--r-- | test/CodeGen/AArch64/arm64-neon-copy.ll | 17 |
2 files changed, 15 insertions, 9 deletions
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 70a1f849f1a..d02bc9ff394 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -3806,6 +3806,13 @@ def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))), (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>; +def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn), + VectorIndexB:$idx)))), i8), + (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; +def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn), + VectorIndexH:$idx)))), i16), + (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; + // Extracting i8 or i16 elements will have the zero-extend transformed to // an 'and' mask by type legalization since neither i8 nor i16 are legal types // for AArch64. Match these patterns here since UMOV already zeroes out the high diff --git a/test/CodeGen/AArch64/arm64-neon-copy.ll b/test/CodeGen/AArch64/arm64-neon-copy.ll index b74a40626ce..83b1cac70f5 100644 --- a/test/CodeGen/AArch64/arm64-neon-copy.ll +++ b/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -320,21 +320,20 @@ define i32 @smovw8h(<8 x i16> %tmp1) { ret i32 %tmp5 } -define i32 @smovx16b(<16 x i8> %tmp1) { +define i64 @smovx16b(<16 x i8> %tmp1) { ; CHECK-LABEL: smovx16b: -; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[8] +; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[8] %tmp3 = extractelement <16 x i8> %tmp1, i32 8 - %tmp4 = sext i8 %tmp3 to i32 - %tmp5 = add i32 %tmp4, %tmp4 - ret i32 %tmp5 + %tmp4 = sext i8 %tmp3 to i64 + ret i64 %tmp4 } -define i32 @smovx8h(<8 x i16> %tmp1) { +define i64 @smovx8h(<8 x i16> %tmp1) { ; CHECK-LABEL: smovx8h: -; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2] +; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2] %tmp3 = extractelement <8 x i16> %tmp1, i32 2 - %tmp4 = sext i16 %tmp3 to i32 - ret i32 %tmp4 + %tmp4 = sext i16 %tmp3 to i64 + ret i64 %tmp4 } define i64 @smovx4s(<4 x i32> %tmp1) { |