summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Simpson <mssimpso@codeaurora.org>2015-12-21 18:31:25 +0000
committerMatthew Simpson <mssimpso@codeaurora.org>2015-12-21 18:31:25 +0000
commit0ce5d69ee3fb6415ed625805b50ddedc2ee6964f (patch)
tree777bae92280897abb8bd65ffc583c3f60735c762
parent8c9c853ff4829cf9e532278d1a6e11987bcf2dbf (diff)
[AArch64] Add additional extract-extend patterns for smov
This patch adds to the target description two additional patterns for matching extract-extend operations to SMOV. The patterns catch the v16i8-to-i64 and v8i16-to-i64 cases. The existing patterns miss these cases because the extracted elements must first be legalized to i32, resulting in any_extend nodes. This was originally implemented as a DAG combine (r255895), but was reverted due to failing out-of-tree tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@256176 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td7
-rw-r--r--test/CodeGen/AArch64/arm64-neon-copy.ll17
2 files changed, 15 insertions, 9 deletions
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 70a1f849f1a..d02bc9ff394 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -3806,6 +3806,13 @@ def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))),
(i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>;
+def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn),
+ VectorIndexB:$idx)))), i8),
+ (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
+def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
+ VectorIndexH:$idx)))), i16),
+ (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
+
// Extracting i8 or i16 elements will have the zero-extend transformed to
// an 'and' mask by type legalization since neither i8 nor i16 are legal types
// for AArch64. Match these patterns here since UMOV already zeroes out the high
diff --git a/test/CodeGen/AArch64/arm64-neon-copy.ll b/test/CodeGen/AArch64/arm64-neon-copy.ll
index b74a40626ce..83b1cac70f5 100644
--- a/test/CodeGen/AArch64/arm64-neon-copy.ll
+++ b/test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -320,21 +320,20 @@ define i32 @smovw8h(<8 x i16> %tmp1) {
ret i32 %tmp5
}
-define i32 @smovx16b(<16 x i8> %tmp1) {
+define i64 @smovx16b(<16 x i8> %tmp1) {
; CHECK-LABEL: smovx16b:
-; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[8]
+; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[8]
%tmp3 = extractelement <16 x i8> %tmp1, i32 8
- %tmp4 = sext i8 %tmp3 to i32
- %tmp5 = add i32 %tmp4, %tmp4
- ret i32 %tmp5
+ %tmp4 = sext i8 %tmp3 to i64
+ ret i64 %tmp4
}
-define i32 @smovx8h(<8 x i16> %tmp1) {
+define i64 @smovx8h(<8 x i16> %tmp1) {
; CHECK-LABEL: smovx8h:
-; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2]
+; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2]
%tmp3 = extractelement <8 x i16> %tmp1, i32 2
- %tmp4 = sext i16 %tmp3 to i32
- ret i32 %tmp4
+ %tmp4 = sext i16 %tmp3 to i64
+ ret i64 %tmp4
}
define i64 @smovx4s(<4 x i32> %tmp1) {