summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Gohman <gohman@apple.com>2009-09-21 18:03:22 +0000
committerDan Gohman <gohman@apple.com>2009-09-21 18:03:22 +0000
commit670e53977bf289009bb460538987542c9c46ac90 (patch)
tree10ca07ba7f1ced6a7e9d48bc821dfd61416b149b
parentb29ff977d10ddc90fd993d64d25f5c3c6ac935aa (diff)
Recognize SSE min and max opportunities in even more cases.
And fix a bug with the behavior of min/max instructions formed from fcmp uge comparisons. Also, use FiniteOnlyFPMath() for this code instead of UnsafeFPMath, as it is more specific. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@82466 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp156
-rw-r--r--test/CodeGen/X86/scalar-min-max-fill-operand.ll6
-rw-r--r--test/CodeGen/X86/sse-minmax.ll321
3 files changed, 442 insertions, 41 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 1ce5137799..fe94418d10 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -8256,76 +8256,158 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
- // If we have SSE[12] support, try to form min/max nodes.
+ // If we have SSE[12] support, try to form min/max nodes. SSE min/max
+ // instructions have the peculiarity that if either operand is a NaN,
+ // they chose what we call the RHS operand (and as such are not symmetric).
+ // It happens that this matches the semantics of the common C idiom
+ // x<y?x:y and related forms, so we can recognize these cases.
if (Subtarget->hasSSE2() &&
(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64) &&
Cond.getOpcode() == ISD::SETCC) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
unsigned Opcode = 0;
+ // Check for x CC y ? x : y.
if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) {
switch (CC) {
default: break;
- case ISD::SETOLE: // (X <= Y) ? X : Y -> min
+ case ISD::SETULT:
+ // This can be a min if we can prove that at least one of the operands
+ // is not a nan.
+ if (!FiniteOnlyFPMath()) {
+ if (DAG.isKnownNeverNaN(RHS)) {
+ // Put the potential NaN in the RHS so that SSE will preserve it.
+ std::swap(LHS, RHS);
+ } else if (!DAG.isKnownNeverNaN(LHS))
+ break;
+ }
+ Opcode = X86ISD::FMIN;
+ break;
+ case ISD::SETOLE:
+ // This can be a min if we can prove that at least one of the operands
+ // is not a nan.
+ if (!FiniteOnlyFPMath()) {
+ if (DAG.isKnownNeverNaN(LHS)) {
+ // Put the potential NaN in the RHS so that SSE will preserve it.
+ std::swap(LHS, RHS);
+ } else if (!DAG.isKnownNeverNaN(RHS))
+ break;
+ }
+ Opcode = X86ISD::FMIN;
+ break;
case ISD::SETULE:
- case ISD::SETLE:
- if (!UnsafeFPMath) break;
- // FALL THROUGH.
- case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min
+ // This can be a min, but if either operand is a NaN we need it to
+ // preserve the original LHS.
+ std::swap(LHS, RHS);
+ case ISD::SETOLT:
case ISD::SETLT:
+ case ISD::SETLE:
Opcode = X86ISD::FMIN;
break;
- case ISD::SETOGT: // (X > Y) ? X : Y -> max
+ case ISD::SETOGE:
+ // This can be a max if we can prove that at least one of the operands
+ // is not a nan.
+ if (!FiniteOnlyFPMath()) {
+ if (DAG.isKnownNeverNaN(LHS)) {
+ // Put the potential NaN in the RHS so that SSE will preserve it.
+ std::swap(LHS, RHS);
+ } else if (!DAG.isKnownNeverNaN(RHS))
+ break;
+ }
+ Opcode = X86ISD::FMAX;
+ break;
case ISD::SETUGT:
+ // This can be a max if we can prove that at least one of the operands
+ // is not a nan.
+ if (!FiniteOnlyFPMath()) {
+ if (DAG.isKnownNeverNaN(RHS)) {
+ // Put the potential NaN in the RHS so that SSE will preserve it.
+ std::swap(LHS, RHS);
+ } else if (!DAG.isKnownNeverNaN(LHS))
+ break;
+ }
+ Opcode = X86ISD::FMAX;
+ break;
+ case ISD::SETUGE:
+ // This can be a max, but if either operand is a NaN we need it to
+ // preserve the original LHS.
+ std::swap(LHS, RHS);
+ case ISD::SETOGT:
case ISD::SETGT:
- if (!UnsafeFPMath) break;
- // FALL THROUGH.
- case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max
case ISD::SETGE:
Opcode = X86ISD::FMAX;
break;
}
+ // Check for x CC y ? y : x -- a min/max with reversed arms.
} else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) {
switch (CC) {
default: break;
- case ISD::SETOGT:
- // This can use a min only if the LHS isn't NaN.
- if (DAG.isKnownNeverNaN(LHS))
- Opcode = X86ISD::FMIN;
- else if (DAG.isKnownNeverNaN(RHS)) {
- Opcode = X86ISD::FMIN;
- // Put the potential NaN in the RHS so that SSE will preserve it.
- std::swap(LHS, RHS);
+ case ISD::SETOGE:
+ // This can be a min if we can prove that at least one of the operands
+ // is not a nan.
+ if (!FiniteOnlyFPMath()) {
+ if (DAG.isKnownNeverNaN(RHS)) {
+ // Put the potential NaN in the RHS so that SSE will preserve it.
+ std::swap(LHS, RHS);
+ } else if (!DAG.isKnownNeverNaN(LHS))
+ break;
}
+ Opcode = X86ISD::FMIN;
break;
-
- case ISD::SETUGT: // (X > Y) ? Y : X -> min
+ case ISD::SETUGT:
+ // This can be a min if we can prove that at least one of the operands
+ // is not a nan.
+ if (!FiniteOnlyFPMath()) {
+ if (DAG.isKnownNeverNaN(LHS)) {
+ // Put the potential NaN in the RHS so that SSE will preserve it.
+ std::swap(LHS, RHS);
+ } else if (!DAG.isKnownNeverNaN(RHS))
+ break;
+ }
+ Opcode = X86ISD::FMIN;
+ break;
+ case ISD::SETUGE:
+ // This can be a min, but if either operand is a NaN we need it to
+ // preserve the original LHS.
+ std::swap(LHS, RHS);
+ case ISD::SETOGT:
case ISD::SETGT:
- if (!UnsafeFPMath) break;
- // FALL THROUGH.
- case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min
case ISD::SETGE:
Opcode = X86ISD::FMIN;
break;
- case ISD::SETULE:
- // This can use a max only if the LHS isn't NaN.
- if (DAG.isKnownNeverNaN(LHS))
- Opcode = X86ISD::FMAX;
- else if (DAG.isKnownNeverNaN(RHS)) {
- Opcode = X86ISD::FMAX;
- // Put the potential NaN in the RHS so that SSE will preserve it.
- std::swap(LHS, RHS);
+ case ISD::SETULT:
+ // This can be a max if we can prove that at least one of the operands
+ // is not a nan.
+ if (!FiniteOnlyFPMath()) {
+ if (DAG.isKnownNeverNaN(LHS)) {
+ // Put the potential NaN in the RHS so that SSE will preserve it.
+ std::swap(LHS, RHS);
+ } else if (!DAG.isKnownNeverNaN(RHS))
+ break;
}
+ Opcode = X86ISD::FMAX;
break;
-
- case ISD::SETOLE: // (X <= Y) ? Y : X -> max
- case ISD::SETLE:
- if (!UnsafeFPMath) break;
- // FALL THROUGH.
- case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max
+ case ISD::SETOLE:
+ // This can be a max if we can prove that at least one of the operands
+ // is not a nan.
+ if (!FiniteOnlyFPMath()) {
+ if (DAG.isKnownNeverNaN(RHS)) {
+ // Put the potential NaN in the RHS so that SSE will preserve it.
+ std::swap(LHS, RHS);
+ } else if (!DAG.isKnownNeverNaN(LHS))
+ break;
+ }
+ Opcode = X86ISD::FMAX;
+ break;
+ case ISD::SETULE:
+ // This can be a max, but if either operand is a NaN we need it to
+ // preserve the original LHS.
+ std::swap(LHS, RHS);
+ case ISD::SETOLT:
case ISD::SETLT:
+ case ISD::SETLE:
Opcode = X86ISD::FMAX;
break;
}
diff --git a/test/CodeGen/X86/scalar-min-max-fill-operand.ll b/test/CodeGen/X86/scalar-min-max-fill-operand.ll
index bda50ccd58..fe40758d8e 100644
--- a/test/CodeGen/X86/scalar-min-max-fill-operand.ll
+++ b/test/CodeGen/X86/scalar-min-max-fill-operand.ll
@@ -4,17 +4,17 @@
declare float @bar()
-define float @foo(float %a)
+define float @foo(float %a) nounwind
{
%s = call float @bar()
%t = fcmp olt float %s, %a
%u = select i1 %t, float %s, float %a
ret float %u
}
-define float @hem(float %a)
+define float @hem(float %a) nounwind
{
%s = call float @bar()
- %t = fcmp uge float %s, %a
+ %t = fcmp ogt float %s, %a
%u = select i1 %t, float %s, float %a
ret float %u
}
diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll
index 9952834253..17ffb5e464 100644
--- a/test/CodeGen/X86/sse-minmax.ll
+++ b/test/CodeGen/X86/sse-minmax.ll
@@ -1,4 +1,323 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s
+
+; Some of these patterns can be matched as SSE min or max. Some of
+; then can be matched provided that the operands are swapped.
+; Some of them can't be matched at all and require a comparison
+; and a conditional branch.
+
+; The naming convention is {,x_}{o,u}{gt,lt,ge,le}{,_inverse}
+; x_ : use 0.0 instead of %y
+; _inverse : swap the arms of the select.
+
+; CHECK: ogt:
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @ogt(double %x, double %y) nounwind {
+ %c = fcmp ogt double %x, %y
+ %d = select i1 %c, double %x, double %y
+ ret double %d
+}
+
+; CHECK: olt:
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @olt(double %x, double %y) nounwind {
+ %c = fcmp olt double %x, %y
+ %d = select i1 %c, double %x, double %y
+ ret double %d
+}
+
+; CHECK: ogt_inverse:
+; CHECK-NEXT: minsd %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @ogt_inverse(double %x, double %y) nounwind {
+ %c = fcmp ogt double %x, %y
+ %d = select i1 %c, double %y, double %x
+ ret double %d
+}
+
+; CHECK: olt_inverse:
+; CHECK-NEXT: maxsd %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @olt_inverse(double %x, double %y) nounwind {
+ %c = fcmp olt double %x, %y
+ %d = select i1 %c, double %y, double %x
+ ret double %d
+}
+
+; CHECK: oge:
+; CHECK-NEXT: ucomisd %xmm1, %xmm0
+define double @oge(double %x, double %y) nounwind {
+ %c = fcmp oge double %x, %y
+ %d = select i1 %c, double %x, double %y
+ ret double %d
+}
+
+; CHECK: ole:
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+define double @ole(double %x, double %y) nounwind {
+ %c = fcmp ole double %x, %y
+ %d = select i1 %c, double %x, double %y
+ ret double %d
+}
+
+; CHECK: oge_inverse:
+; CHECK-NEXT: ucomisd %xmm1, %xmm0
+define double @oge_inverse(double %x, double %y) nounwind {
+ %c = fcmp oge double %x, %y
+ %d = select i1 %c, double %y, double %x
+ ret double %d
+}
+
+; CHECK: ole_inverse:
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+define double @ole_inverse(double %x, double %y) nounwind {
+ %c = fcmp ole double %x, %y
+ %d = select i1 %c, double %y, double %x
+ ret double %d
+}
+
+; CHECK: x_ogt:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ogt(double %x) nounwind {
+ %c = fcmp ogt double %x, 0.000000e+00
+ %d = select i1 %c, double %x, double 0.000000e+00
+ ret double %d
+}
+
+; CHECK: x_olt:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_olt(double %x) nounwind {
+ %c = fcmp olt double %x, 0.000000e+00
+ %d = select i1 %c, double %x, double 0.000000e+00
+ ret double %d
+}
+
+; CHECK: x_ogt_inverse:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ogt_inverse(double %x) nounwind {
+ %c = fcmp ogt double %x, 0.000000e+00
+ %d = select i1 %c, double 0.000000e+00, double %x
+ ret double %d
+}
+
+; CHECK: x_olt_inverse:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_olt_inverse(double %x) nounwind {
+ %c = fcmp olt double %x, 0.000000e+00
+ %d = select i1 %c, double 0.000000e+00, double %x
+ ret double %d
+}
+
+; CHECK: x_oge:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_oge(double %x) nounwind {
+ %c = fcmp oge double %x, 0.000000e+00
+ %d = select i1 %c, double %x, double 0.000000e+00
+ ret double %d
+}
+
+; CHECK: x_ole:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ole(double %x) nounwind {
+ %c = fcmp ole double %x, 0.000000e+00
+ %d = select i1 %c, double %x, double 0.000000e+00
+ ret double %d
+}
+
+; CHECK: x_oge_inverse:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_oge_inverse(double %x) nounwind {
+ %c = fcmp oge double %x, 0.000000e+00
+ %d = select i1 %c, double 0.000000e+00, double %x
+ ret double %d
+}
+
+; CHECK: x_ole_inverse:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ole_inverse(double %x) nounwind {
+ %c = fcmp ole double %x, 0.000000e+00
+ %d = select i1 %c, double 0.000000e+00, double %x
+ ret double %d
+}
+
+; CHECK: ugt:
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+define double @ugt(double %x, double %y) nounwind {
+ %c = fcmp ugt double %x, %y
+ %d = select i1 %c, double %x, double %y
+ ret double %d
+}
+
+; CHECK: ult:
+; CHECK-NEXT: ucomisd %xmm1, %xmm0
+define double @ult(double %x, double %y) nounwind {
+ %c = fcmp ult double %x, %y
+ %d = select i1 %c, double %x, double %y
+ ret double %d
+}
+
+; CHECK: ugt_inverse:
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+define double @ugt_inverse(double %x, double %y) nounwind {
+ %c = fcmp ugt double %x, %y
+ %d = select i1 %c, double %y, double %x
+ ret double %d
+}
+
+; CHECK: ult_inverse:
+; CHECK-NEXT: ucomisd %xmm1, %xmm0
+define double @ult_inverse(double %x, double %y) nounwind {
+ %c = fcmp ult double %x, %y
+ %d = select i1 %c, double %y, double %x
+ ret double %d
+}
+
+; CHECK: uge:
+; CHECK-NEXT: maxsd %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @uge(double %x, double %y) nounwind {
+ %c = fcmp uge double %x, %y
+ %d = select i1 %c, double %x, double %y
+ ret double %d
+}
+
+; CHECK: ule:
+; CHECK-NEXT: minsd %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @ule(double %x, double %y) nounwind {
+ %c = fcmp ule double %x, %y
+ %d = select i1 %c, double %x, double %y
+ ret double %d
+}
+
+; CHECK: uge_inverse:
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @uge_inverse(double %x, double %y) nounwind {
+ %c = fcmp uge double %x, %y
+ %d = select i1 %c, double %y, double %x
+ ret double %d
+}
+
+; CHECK: ule_inverse:
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @ule_inverse(double %x, double %y) nounwind {
+ %c = fcmp ule double %x, %y
+ %d = select i1 %c, double %y, double %x
+ ret double %d
+}
+
+; CHECK: x_ugt:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ugt(double %x) nounwind {
+ %c = fcmp ugt double %x, 0.000000e+00
+ %d = select i1 %c, double %x, double 0.000000e+00
+ ret double %d
+}
+
+; CHECK: x_ult:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ult(double %x) nounwind {
+ %c = fcmp ult double %x, 0.000000e+00
+ %d = select i1 %c, double %x, double 0.000000e+00
+ ret double %d
+}
+
+; CHECK: x_ugt_inverse:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ugt_inverse(double %x) nounwind {
+ %c = fcmp ugt double %x, 0.000000e+00
+ %d = select i1 %c, double 0.000000e+00, double %x
+ ret double %d
+}
+
+; CHECK: x_ult_inverse:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ult_inverse(double %x) nounwind {
+ %c = fcmp ult double %x, 0.000000e+00
+ %d = select i1 %c, double 0.000000e+00, double %x
+ ret double %d
+}
+
+; CHECK: x_uge:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_uge(double %x) nounwind {
+ %c = fcmp uge double %x, 0.000000e+00
+ %d = select i1 %c, double %x, double 0.000000e+00
+ ret double %d
+}
+
+; CHECK: x_ule:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ule(double %x) nounwind {
+ %c = fcmp ule double %x, 0.000000e+00
+ %d = select i1 %c, double %x, double 0.000000e+00
+ ret double %d
+}
+
+; CHECK: x_uge_inverse:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_uge_inverse(double %x) nounwind {
+ %c = fcmp uge double %x, 0.000000e+00
+ %d = select i1 %c, double 0.000000e+00, double %x
+ ret double %d
+}
+
+; CHECK: x_ule_inverse:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ule_inverse(double %x) nounwind {
+ %c = fcmp ule double %x, 0.000000e+00
+ %d = select i1 %c, double 0.000000e+00, double %x
+ ret double %d
+}
+
+; Test a few more misc. cases.
; CHECK: clampTo3k_a:
; CHECK: minsd