summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenjamin Kramer <benny.kra@googlemail.com>2013-04-26 09:19:19 +0000
committerBenjamin Kramer <benny.kra@googlemail.com>2013-04-26 09:19:19 +0000
commit6242fda42ad13eebc908e744426ae7bc8cf8d1c3 (patch)
tree71212f305027e461844fc0631056819517c0d0ea
parent7557e521e50180817a165f8c897220b0e2020b7b (diff)
DAGCombiner: Canonicalize vector integer abs in the same way we do it for scalars.
This already helps SSE2 x86 a lot because it lacks an efficient way to represent a vector select. The long term goal is to enable the backend to match a canonicalized pattern into a single instruction (e.g. vabs or pabs). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180597 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp42
-rw-r--r--test/CodeGen/X86/viabs.ll66
2 files changed, 108 insertions, 0 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f67cd3e2709..05a58b1c1b2 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -205,6 +205,7 @@ namespace {
SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
SDValue visitCTPOP(SDNode *N);
SDValue visitSELECT(SDNode *N);
+ SDValue visitVSELECT(SDNode *N);
SDValue visitSELECT_CC(SDNode *N);
SDValue visitSETCC(SDNode *N);
SDValue visitSIGN_EXTEND(SDNode *N);
@@ -1126,6 +1127,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
case ISD::CTPOP: return visitCTPOP(N);
case ISD::SELECT: return visitSELECT(N);
+ case ISD::VSELECT: return visitVSELECT(N);
case ISD::SELECT_CC: return visitSELECT_CC(N);
case ISD::SETCC: return visitSETCC(N);
case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
@@ -4162,6 +4164,46 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitVSELECT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ DebugLoc DL = N->getDebugLoc();
+
+ // Canonicalize integer abs.
+ // vselect (setg[te] X, 0), X, -X ->
+ // vselect (setgt X, -1), X, -X ->
+ // vselect (setl[te] X, 0), -X, X ->
+ // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+ if (N0.getOpcode() == ISD::SETCC) {
+ SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ bool isAbs = false;
+ bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
+
+ if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
+ (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
+ N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
+ isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
+ else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
+ N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
+ isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
+
+ if (isAbs) {
+ EVT VT = LHS.getValueType();
+ SDValue Shift = DAG.getNode(
+ ISD::SRA, DL, VT, LHS,
+ DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT));
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
+ AddToWorkList(Shift.getNode());
+ AddToWorkList(Add.getNode());
+ return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
+ }
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
diff --git a/test/CodeGen/X86/viabs.ll b/test/CodeGen/X86/viabs.ll
new file mode 100644
index 00000000000..a509d8aa81e
--- /dev/null
+++ b/test/CodeGen/X86/viabs.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -march=x86-64 -mcpu=x86-64 | FileCheck %s -check-prefix=SSE2
+
+define <4 x i32> @test1(<4 x i32> %a) nounwind {
+; SSE2: test1:
+; SSE2: movdqa
+; SSE2-NEXT: psrad $31
+; SSE2-NEXT: padd
+; SSE2-NEXT: pxor
+; SSE2-NEXT: ret
+ %tmp1neg = sub <4 x i32> zeroinitializer, %a
+ %b = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg
+ ret <4 x i32> %abs
+}
+
+define <4 x i32> @test2(<4 x i32> %a) nounwind {
+; SSE2: test2:
+; SSE2: movdqa
+; SSE2-NEXT: psrad $31
+; SSE2-NEXT: padd
+; SSE2-NEXT: pxor
+; SSE2-NEXT: ret
+ %tmp1neg = sub <4 x i32> zeroinitializer, %a
+ %b = icmp sge <4 x i32> %a, zeroinitializer
+ %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg
+ ret <4 x i32> %abs
+}
+
+define <4 x i32> @test3(<4 x i32> %a) nounwind {
+; SSE2: test3:
+; SSE2: movdqa
+; SSE2-NEXT: psrad $31
+; SSE2-NEXT: padd
+; SSE2-NEXT: pxor
+; SSE2-NEXT: ret
+ %tmp1neg = sub <4 x i32> zeroinitializer, %a
+ %b = icmp sgt <4 x i32> %a, zeroinitializer
+ %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg
+ ret <4 x i32> %abs
+}
+
+define <4 x i32> @test4(<4 x i32> %a) nounwind {
+; SSE2: test4:
+; SSE2: movdqa
+; SSE2-NEXT: psrad $31
+; SSE2-NEXT: padd
+; SSE2-NEXT: pxor
+; SSE2-NEXT: ret
+ %tmp1neg = sub <4 x i32> zeroinitializer, %a
+ %b = icmp slt <4 x i32> %a, zeroinitializer
+ %abs = select <4 x i1> %b, <4 x i32> %tmp1neg, <4 x i32> %a
+ ret <4 x i32> %abs
+}
+
+define <4 x i32> @test5(<4 x i32> %a) nounwind {
+; SSE2: test5:
+; SSE2: movdqa
+; SSE2-NEXT: psrad $31
+; SSE2-NEXT: padd
+; SSE2-NEXT: pxor
+; SSE2-NEXT: ret
+ %tmp1neg = sub <4 x i32> zeroinitializer, %a
+ %b = icmp sle <4 x i32> %a, zeroinitializer
+ %abs = select <4 x i1> %b, <4 x i32> %tmp1neg, <4 x i32> %a
+ ret <4 x i32> %abs
+}