summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcin Koscielnicki <koriakin@0x04.net>2016-07-10 14:41:22 +0000
committerMarcin Koscielnicki <koriakin@0x04.net>2016-07-10 14:41:22 +0000
commit80f398414d6a2c461fb9b5ff360ff7dfd5e8f1f8 (patch)
tree8a08a04501302add80827d8f099c8c079ce329e2
parent7ea0bdecfe7547b4a6c32fd1fbcaafe2559106a1 (diff)
[SystemZ] Utilize Test Data Class instructions.
This adds a new SystemZ-specific intrinsic, llvm.s390.tdc.f(32|64|128), which maps straight to the test data class instructions. A new IR pass is added to recognize instructions that can be converted to TDC and perform the necessary replacements. Differential Revision: http://reviews.llvm.org/D21949 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275016 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/IR/IntrinsicsSystemZ.td11
-rw-r--r--lib/Target/SystemZ/CMakeLists.txt1
-rw-r--r--lib/Target/SystemZ/README.txt4
-rw-r--r--lib/Target/SystemZ/SystemZ.h41
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp5
-rw-r--r--lib/Target/SystemZ/SystemZTDC.cpp382
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.cpp3
-rw-r--r--test/CodeGen/SystemZ/tdc-01.ll95
-rw-r--r--test/CodeGen/SystemZ/tdc-02.ll96
-rw-r--r--test/CodeGen/SystemZ/tdc-03.ll139
-rw-r--r--test/CodeGen/SystemZ/tdc-04.ll85
-rw-r--r--test/CodeGen/SystemZ/tdc-05.ll97
-rw-r--r--test/CodeGen/SystemZ/tdc-06.ll48
13 files changed, 1003 insertions, 4 deletions
diff --git a/include/llvm/IR/IntrinsicsSystemZ.td b/include/llvm/IR/IntrinsicsSystemZ.td
index 49de4f9f906..bfc15b9bc09 100644
--- a/include/llvm/IR/IntrinsicsSystemZ.td
+++ b/include/llvm/IR/IntrinsicsSystemZ.td
@@ -374,3 +374,14 @@ let TargetPrefix = "s390" in {
[llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
}
+
+//===----------------------------------------------------------------------===//
+//
+// Misc intrinsics
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "s390" in {
+ def int_s390_tdc : Intrinsic<[llvm_i32_ty], [llvm_anyfloat_ty, llvm_i64_ty],
+ [IntrNoMem]>;
+}
diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt
index 336f037bb73..4b849ad6491 100644
--- a/lib/Target/SystemZ/CMakeLists.txt
+++ b/lib/Target/SystemZ/CMakeLists.txt
@@ -30,6 +30,7 @@ add_llvm_target(SystemZCodeGen
SystemZSubtarget.cpp
SystemZTargetMachine.cpp
SystemZTargetTransformInfo.cpp
+ SystemZTDC.cpp
)
add_subdirectory(AsmParser)
diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt
index 69b72d26020..86a1322c9e2 100644
--- a/lib/Target/SystemZ/README.txt
+++ b/lib/Target/SystemZ/README.txt
@@ -36,10 +36,6 @@ We don't use the BRANCH ON INDEX instructions.
--
-We don't use the TEST DATA CLASS instructions.
-
---
-
We only use MVC, XC and CLC for constant-length block operations.
We could extend them to variable-length operations too,
using EXECUTE RELATIVE LONG.
diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h
index cafe2c5948c..c8ea9641fb6 100644
--- a/lib/Target/SystemZ/SystemZ.h
+++ b/lib/Target/SystemZ/SystemZ.h
@@ -87,6 +87,11 @@ const unsigned CCMASK_VCMP_MIXED = CCMASK_1;
const unsigned CCMASK_VCMP_NONE = CCMASK_3;
const unsigned CCMASK_VCMP = CCMASK_0 | CCMASK_1 | CCMASK_3;
+// Condition-code mask assignments for Test Data Class.
+const unsigned CCMASK_TDC_NOMATCH = CCMASK_0;
+const unsigned CCMASK_TDC_MATCH = CCMASK_1;
+const unsigned CCMASK_TDC = CCMASK_TDC_NOMATCH | CCMASK_TDC_MATCH;
+
// The position of the low CC bit in an IPM result.
const unsigned IPM_CC = 28;
@@ -94,6 +99,41 @@ const unsigned IPM_CC = 28;
const unsigned PFD_READ = 1;
const unsigned PFD_WRITE = 2;
+// Mask assignments for TDC
+const unsigned TDCMASK_ZERO_PLUS = 0x800;
+const unsigned TDCMASK_ZERO_MINUS = 0x400;
+const unsigned TDCMASK_NORMAL_PLUS = 0x200;
+const unsigned TDCMASK_NORMAL_MINUS = 0x100;
+const unsigned TDCMASK_SUBNORMAL_PLUS = 0x080;
+const unsigned TDCMASK_SUBNORMAL_MINUS = 0x040;
+const unsigned TDCMASK_INFINITY_PLUS = 0x020;
+const unsigned TDCMASK_INFINITY_MINUS = 0x010;
+const unsigned TDCMASK_QNAN_PLUS = 0x008;
+const unsigned TDCMASK_QNAN_MINUS = 0x004;
+const unsigned TDCMASK_SNAN_PLUS = 0x002;
+const unsigned TDCMASK_SNAN_MINUS = 0x001;
+
+const unsigned TDCMASK_ZERO = TDCMASK_ZERO_PLUS | TDCMASK_ZERO_MINUS;
+const unsigned TDCMASK_POSITIVE = TDCMASK_NORMAL_PLUS |
+ TDCMASK_SUBNORMAL_PLUS |
+ TDCMASK_INFINITY_PLUS;
+const unsigned TDCMASK_NEGATIVE = TDCMASK_NORMAL_MINUS |
+ TDCMASK_SUBNORMAL_MINUS |
+ TDCMASK_INFINITY_MINUS;
+const unsigned TDCMASK_NAN = TDCMASK_QNAN_PLUS |
+ TDCMASK_QNAN_MINUS |
+ TDCMASK_SNAN_PLUS |
+ TDCMASK_SNAN_MINUS;
+const unsigned TDCMASK_PLUS = TDCMASK_POSITIVE |
+ TDCMASK_ZERO_PLUS |
+ TDCMASK_QNAN_PLUS |
+ TDCMASK_SNAN_PLUS;
+const unsigned TDCMASK_MINUS = TDCMASK_NEGATIVE |
+ TDCMASK_ZERO_MINUS |
+ TDCMASK_QNAN_MINUS |
+ TDCMASK_SNAN_MINUS;
+const unsigned TDCMASK_ALL = TDCMASK_PLUS | TDCMASK_MINUS;
+
// Number of bits in a vector register.
const unsigned VectorBits = 128;
@@ -138,6 +178,7 @@ FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM);
FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM);
+FunctionPass *createSystemZTDCPass();
} // end namespace llvm
#endif
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 2259840d2d1..5e1552f586f 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1444,6 +1444,11 @@ static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
CCValid = SystemZ::CCMASK_VCMP;
return true;
+ case Intrinsic::s390_tdc:
+ Opcode = SystemZISD::TDC;
+ CCValid = SystemZ::CCMASK_TDC;
+ return true;
+
default:
return false;
}
diff --git a/lib/Target/SystemZ/SystemZTDC.cpp b/lib/Target/SystemZ/SystemZTDC.cpp
new file mode 100644
index 00000000000..96a9ef82c12
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZTDC.cpp
@@ -0,0 +1,382 @@
+//===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass looks for instructions that can be replaced by a Test Data Class
+// instruction, and replaces them when profitable.
+//
+// Roughly, the following rules are recognized:
+//
+// 1: fcmp pred X, 0 -> tdc X, mask
+// 2: fcmp pred X, +-inf -> tdc X, mask
+// 3: fcmp pred X, +-minnorm -> tdc X, mask
+// 4: tdc (fabs X), mask -> tdc X, newmask
+// 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit]
+// 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask
+// 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask
+// 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2)
+// 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2)
+// 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2)
+//
+// The pass works in 4 steps:
+//
+// 1. All fcmp and icmp instructions in a function are checked for a match
+// with rules 1-3 and 5-7. Their TDC equivalents are stored in
+// the ConvertedInsts mapping. If the operand of a fcmp instruction is
+// a fabs, it's also folded according to rule 4.
+// 2. All and/or/xor i1 instructions whose both operands have been already
+// mapped are mapped according to rules 8-10. LogicOpsWorklist is used
+// as a queue of instructions to check.
+// 3. All mapped instructions that are considered worthy of conversion (ie.
+// replacing them will actually simplify the final code) are replaced
+// with a call to the s390.tdc intrinsic.
+// 4. All intermediate results of replaced instructions are removed if unused.
+//
+// Instructions that match rules 1-3 are considered unworthy of conversion
+// on their own (since a comparison instruction is superior), but are mapped
+// in the hopes of folding the result using rules 4 and 8-10 (likely removing
+// the original comparison in the process).
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include <deque>
+#include <set>
+
+using namespace llvm;
+
+namespace llvm {
+ void initializeSystemZTDCPassPass(PassRegistry&);
+}
+
+namespace {
+
+class SystemZTDCPass : public FunctionPass {
+public:
+ static char ID;
+ SystemZTDCPass() : FunctionPass(ID) {
+ initializeSystemZTDCPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+private:
+ // Maps seen instructions that can be mapped to a TDC, values are
+ // (TDC operand, TDC mask, worthy flag) triples.
+ MapVector<Instruction *, std::tuple<Value *, int, bool>> ConvertedInsts;
+ // The queue of and/or/xor i1 instructions to be potentially folded.
+ std::vector<BinaryOperator *> LogicOpsWorklist;
+ // Instructions matched while folding, to be removed at the end if unused.
+ std::set<Instruction *> PossibleJunk;
+
+ // Tries to convert a fcmp instruction.
+ void convertFCmp(CmpInst &I);
+
+ // Tries to convert an icmp instruction.
+ void convertICmp(CmpInst &I);
+
+ // Tries to convert an i1 and/or/xor instruction, whose both operands
+ // have been already converted.
+ void convertLogicOp(BinaryOperator &I);
+
+ // Marks an instruction as converted - adds it to ConvertedInsts and adds
+ // any and/or/xor i1 users to the queue.
+ void converted(Instruction *I, Value *V, int Mask, bool Worthy) {
+ ConvertedInsts[I] = std::make_tuple(V, Mask, Worthy);
+ auto &M = *I->getFunction()->getParent();
+ auto &Ctx = M.getContext();
+ for (auto *U : I->users()) {
+ auto *LI = dyn_cast<BinaryOperator>(U);
+ if (LI && LI->getType() == Type::getInt1Ty(Ctx) &&
+ (LI->getOpcode() == Instruction::And ||
+ LI->getOpcode() == Instruction::Or ||
+ LI->getOpcode() == Instruction::Xor)) {
+ LogicOpsWorklist.push_back(LI);
+ }
+ }
+ }
+};
+
+} // end anonymous namespace
+
+char SystemZTDCPass::ID = 0;
+INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc",
+ "SystemZ Test Data Class optimization", false, false)
+
+FunctionPass *llvm::createSystemZTDCPass() {
+ return new SystemZTDCPass();
+}
+
+void SystemZTDCPass::convertFCmp(CmpInst &I) {
+ Value *Op0 = I.getOperand(0);
+ auto *Const = dyn_cast<ConstantFP>(I.getOperand(1));
+ auto Pred = I.getPredicate();
+ // Only comparisons with consts are interesting.
+ if (!Const)
+ return;
+ // Compute the smallest normal number (and its negation).
+ auto &Sem = Op0->getType()->getFltSemantics();
+ APFloat Smallest = APFloat::getSmallestNormalized(Sem);
+ APFloat NegSmallest = Smallest;
+ NegSmallest.changeSign();
+ // Check if Const is one of our recognized consts.
+ int WhichConst;
+ if (Const->isZero()) {
+ // All comparisons with 0 can be converted.
+ WhichConst = 0;
+ } else if (Const->isInfinity()) {
+ // Likewise for infinities.
+ WhichConst = Const->isNegative() ? 2 : 1;
+ } else if (Const->isExactlyValue(Smallest)) {
+ // For Smallest, we cannot do EQ separately from GT.
+ if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE &&
+ (Pred & CmpInst::FCMP_OGE) != 0)
+ return;
+ WhichConst = 3;
+ } else if (Const->isExactlyValue(NegSmallest)) {
+ // Likewise for NegSmallest, we cannot do EQ separately from LT.
+ if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE &&
+ (Pred & CmpInst::FCMP_OLE) != 0)
+ return;
+ WhichConst = 4;
+ } else {
+ // Not one of our special constants.
+ return;
+ }
+ // Partial masks to use for EQ, GT, LT, UN comparisons, respectively.
+ static const int Masks[][4] = {
+ { // 0
+ SystemZ::TDCMASK_ZERO, // eq
+ SystemZ::TDCMASK_POSITIVE, // gt
+ SystemZ::TDCMASK_NEGATIVE, // lt
+ SystemZ::TDCMASK_NAN, // un
+ },
+ { // inf
+ SystemZ::TDCMASK_INFINITY_PLUS, // eq
+ 0, // gt
+ (SystemZ::TDCMASK_ZERO |
+ SystemZ::TDCMASK_NEGATIVE |
+ SystemZ::TDCMASK_NORMAL_PLUS |
+ SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt
+ SystemZ::TDCMASK_NAN, // un
+ },
+ { // -inf
+ SystemZ::TDCMASK_INFINITY_MINUS, // eq
+ (SystemZ::TDCMASK_ZERO |
+ SystemZ::TDCMASK_POSITIVE |
+ SystemZ::TDCMASK_NORMAL_MINUS |
+ SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
+ 0, // lt
+ SystemZ::TDCMASK_NAN, // un
+ },
+ { // minnorm
+ 0, // eq (unsupported)
+ (SystemZ::TDCMASK_NORMAL_PLUS |
+ SystemZ::TDCMASK_INFINITY_PLUS), // gt (actually ge)
+ (SystemZ::TDCMASK_ZERO |
+ SystemZ::TDCMASK_NEGATIVE |
+ SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt
+ SystemZ::TDCMASK_NAN, // un
+ },
+ { // -minnorm
+ 0, // eq (unsupported)
+ (SystemZ::TDCMASK_ZERO |
+ SystemZ::TDCMASK_POSITIVE |
+ SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
+ (SystemZ::TDCMASK_NORMAL_MINUS |
+ SystemZ::TDCMASK_INFINITY_MINUS), // lt (actually le)
+ SystemZ::TDCMASK_NAN, // un
+ }
+ };
+ // Construct the mask as a combination of the partial masks.
+ int Mask = 0;
+ if (Pred & CmpInst::FCMP_OEQ)
+ Mask |= Masks[WhichConst][0];
+ if (Pred & CmpInst::FCMP_OGT)
+ Mask |= Masks[WhichConst][1];
+ if (Pred & CmpInst::FCMP_OLT)
+ Mask |= Masks[WhichConst][2];
+ if (Pred & CmpInst::FCMP_UNO)
+ Mask |= Masks[WhichConst][3];
+ // A lone fcmp is unworthy of tdc conversion on its own, but may become
+ // worthy if combined with fabs.
+ bool Worthy = false;
+ if (CallInst *CI = dyn_cast<CallInst>(Op0)) {
+ Function *F = CI->getCalledFunction();
+ if (F && F->getIntrinsicID() == Intrinsic::fabs) {
+ // Fold with fabs - adjust the mask appropriately.
+ Mask &= SystemZ::TDCMASK_PLUS;
+ Mask |= Mask >> 1;
+ Op0 = CI->getArgOperand(0);
+ // A combination of fcmp with fabs is a win, unless the constant
+ // involved is 0 (which is handled by later passes).
+ Worthy = WhichConst != 0;
+ PossibleJunk.insert(CI);
+ }
+ }
+ converted(&I, Op0, Mask, Worthy);
+}
+
+void SystemZTDCPass::convertICmp(CmpInst &I) {
+ Value *Op0 = I.getOperand(0);
+ auto *Const = dyn_cast<ConstantInt>(I.getOperand(1));
+ auto Pred = I.getPredicate();
+ // All our icmp rules involve comparisons with consts.
+ if (!Const)
+ return;
+ if (auto *Cast = dyn_cast<BitCastInst>(Op0)) {
+ // Check for icmp+bitcast used for signbit.
+ if (!Cast->getSrcTy()->isFloatTy() &&
+ !Cast->getSrcTy()->isDoubleTy() &&
+ !Cast->getSrcTy()->isFP128Ty())
+ return;
+ Value *V = Cast->getOperand(0);
+ int Mask;
+ if (Pred == CmpInst::ICMP_SLT && Const->isZero()) {
+ // icmp slt (bitcast X), 0 - set if sign bit true
+ Mask = SystemZ::TDCMASK_MINUS;
+ } else if (Pred == CmpInst::ICMP_SGT && Const->isMinusOne()) {
+ // icmp sgt (bitcast X), -1 - set if sign bit false
+ Mask = SystemZ::TDCMASK_PLUS;
+ } else {
+ // Not a sign bit check.
+ return;
+ }
+ PossibleJunk.insert(Cast);
+ converted(&I, V, Mask, true);
+ } else if (auto *CI = dyn_cast<CallInst>(Op0)) {
+ // Check if this is a pre-existing call of our tdc intrinsic.
+ Function *F = CI->getCalledFunction();
+ if (!F || F->getIntrinsicID() != Intrinsic::s390_tdc)
+ return;
+ if (!Const->isZero())
+ return;
+ Value *V = CI->getArgOperand(0);
+ auto *MaskC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ // Bail if the mask is not a constant.
+ if (!MaskC)
+ return;
+ int Mask = MaskC->getZExtValue();
+ Mask &= SystemZ::TDCMASK_ALL;
+ if (Pred == CmpInst::ICMP_NE) {
+ // icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC
+ } else if (Pred == CmpInst::ICMP_EQ) {
+ // icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask
+ Mask ^= SystemZ::TDCMASK_ALL;
+ } else {
+ // An unknown comparison - ignore.
+ return;
+ }
+ PossibleJunk.insert(CI);
+ converted(&I, V, Mask, false);
+ }
+}
+
+void SystemZTDCPass::convertLogicOp(BinaryOperator &I) {
+ Value *Op0, *Op1;
+ int Mask0, Mask1;
+ bool Worthy0, Worthy1;
+ std::tie(Op0, Mask0, Worthy0) = ConvertedInsts[cast<Instruction>(I.getOperand(0))];
+ std::tie(Op1, Mask1, Worthy1) = ConvertedInsts[cast<Instruction>(I.getOperand(1))];
+ if (Op0 != Op1)
+ return;
+ int Mask;
+ switch (I.getOpcode()) {
+ case Instruction::And:
+ Mask = Mask0 & Mask1;
+ break;
+ case Instruction::Or:
+ Mask = Mask0 | Mask1;
+ break;
+ case Instruction::Xor:
+ Mask = Mask0 ^ Mask1;
+ break;
+ default:
+ llvm_unreachable("Unknown op in convertLogicOp");
+ }
+ converted(&I, Op0, Mask, true);
+}
+
+bool SystemZTDCPass::runOnFunction(Function &F) {
+ ConvertedInsts.clear();
+ LogicOpsWorklist.clear();
+ PossibleJunk.clear();
+
+ // Look for icmp+fcmp instructions.
+ for (auto &I : instructions(F)) {
+ if (I.getOpcode() == Instruction::FCmp)
+ convertFCmp(cast<CmpInst>(I));
+ else if (I.getOpcode() == Instruction::ICmp)
+ convertICmp(cast<CmpInst>(I));
+ }
+
+ // If none found, bail already.
+ if (ConvertedInsts.empty())
+ return false;
+
+ // Process the queue of logic instructions.
+ while (!LogicOpsWorklist.empty()) {
+ BinaryOperator *Op = LogicOpsWorklist.back();
+ LogicOpsWorklist.pop_back();
+ // If both operands mapped, and the instruction itself not yet mapped,
+ // convert it.
+ if (ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(0))) &&
+ ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(1))) &&
+ !ConvertedInsts.count(Op))
+ convertLogicOp(*Op);
+ }
+
+ // Time to actually replace the instructions. Do it in the reverse order
+ // of finding them, since there's a good chance the earlier ones will be
+ // unused (due to being folded into later ones).
+ Module &M = *F.getParent();
+ auto &Ctx = M.getContext();
+ Value *Zero32 = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
+ bool MadeChange = false;
+ for (auto &It : reverse(ConvertedInsts)) {
+ Instruction *I = It.first;
+ Value *V;
+ int Mask;
+ bool Worthy;
+ std::tie(V, Mask, Worthy) = It.second;
+ if (!I->user_empty()) {
+ // If used and unworthy of conversion, skip it.
+ if (!Worthy)
+ continue;
+ // Call the intrinsic, compare result with 0.
+ Value *TDCFunc = Intrinsic::getDeclaration(&M, Intrinsic::s390_tdc,
+ V->getType());
+ IRBuilder<> IRB(I);
+ Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask);
+ Instruction *TDC = IRB.CreateCall(TDCFunc, {V, MaskVal});
+ Value *ICmp = IRB.CreateICmp(CmpInst::ICMP_NE, TDC, Zero32);
+ I->replaceAllUsesWith(ICmp);
+ }
+ // If unused, or used and converted, remove it.
+ I->eraseFromParent();
+ MadeChange = true;
+ }
+
+ if (!MadeChange)
+ return false;
+
+ // We've actually done something - now clear misc accumulated junk (fabs,
+ // bitcast).
+ for (auto *I : PossibleJunk)
+ if (I->user_empty())
+ I->eraseFromParent();
+
+ return true;
+}
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 66a6e85df37..85a3f6f4a8b 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -122,6 +122,9 @@ public:
} // end anonymous namespace
void SystemZPassConfig::addIRPasses() {
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createSystemZTDCPass());
+
TargetPassConfig::addIRPasses();
}
diff --git a/test/CodeGen/SystemZ/tdc-01.ll b/test/CodeGen/SystemZ/tdc-01.ll
new file mode 100644
index 00000000000..052d895b798
--- /dev/null
+++ b/test/CodeGen/SystemZ/tdc-01.ll
@@ -0,0 +1,95 @@
+; Test the Test Data Class instruction, selected manually via the intrinsic.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i32 @llvm.s390.tdc.f32(float, i64)
+declare i32 @llvm.s390.tdc.f64(double, i64)
+declare i32 @llvm.s390.tdc.f128(fp128, i64)
+
+; Check using as i32 - f32
+define i32 @f1(float %x) {
+; CHECK-LABEL: f1
+; CHECK: tceb %f0, 123
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+ %res = call i32 @llvm.s390.tdc.f32(float %x, i64 123)
+ ret i32 %res
+}
+
+; Check using as i32 - f64
+define i32 @f2(double %x) {
+; CHECK-LABEL: f2
+; CHECK: tcdb %f0, 123
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+ %res = call i32 @llvm.s390.tdc.f64(double %x, i64 123)
+ ret i32 %res
+}
+
+; Check using as i32 - f128
+define i32 @f3(fp128 %x) {
+; CHECK-LABEL: f3
+; CHECK: ld %f0, 0(%r2)
+; CHECK: ld %f2, 8(%r2)
+; CHECK: tcxb %f0, 123
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+ %res = call i32 @llvm.s390.tdc.f128(fp128 %x, i64 123)
+ ret i32 %res
+}
+
+declare void @g()
+
+; Check branch
+define void @f4(float %x) {
+; CHECK-LABEL: f4
+; CHECK: tceb %f0, 123
+; CHECK: jgl g
+; CHECK: br %r14
+ %res = call i32 @llvm.s390.tdc.f32(float %x, i64 123)
+ %cond = icmp ne i32 %res, 0
+ br i1 %cond, label %call, label %exit
+
+call:
+ tail call void @g()
+ br label %exit
+
+exit:
+ ret void
+}
+
+; Check branch negated
+define void @f5(float %x) {
+; CHECK-LABEL: f5
+; CHECK: tceb %f0, 123
+; CHECK: jge g
+; CHECK: br %r14
+ %res = call i32 @llvm.s390.tdc.f32(float %x, i64 123)
+ %cond = icmp eq i32 %res, 0
+ br i1 %cond, label %call, label %exit
+
+call:
+ tail call void @g()
+ br label %exit
+
+exit:
+ ret void
+}
+
+; Check non-const mask
+define void @f6(float %x, i64 %y) {
+; CHECK-LABEL: f6
+; CHECK: tceb %f0, 0(%r2)
+; CHECK: jge g
+; CHECK: br %r14
+ %res = call i32 @llvm.s390.tdc.f32(float %x, i64 %y)
+ %cond = icmp eq i32 %res, 0
+ br i1 %cond, label %call, label %exit
+
+call:
+ tail call void @g()
+ br label %exit
+
+exit:
+ ret void
+}
diff --git a/test/CodeGen/SystemZ/tdc-02.ll b/test/CodeGen/SystemZ/tdc-02.ll
new file mode 100644
index 00000000000..c0c4ac84349
--- /dev/null
+++ b/test/CodeGen/SystemZ/tdc-02.ll
@@ -0,0 +1,96 @@
+; Test the Test Data Class instruction logic operation folding.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i32 @llvm.s390.tdc.f32(float, i64)
+declare i32 @llvm.s390.tdc.f64(double, i64)
+declare i32 @llvm.s390.tdc.f128(fp128, i64)
+
+; Check using or i1
+define i32 @f1(float %x) {
+; CHECK-LABEL: f1
+; CHECK: tceb %f0, 7
+; CHECK-NEXT: ipm [[REG1:%r[0-9]+]]
+; CHECK-NEXT: risbg %r2, [[REG1]], 63, 191, 36
+ %a = call i32 @llvm.s390.tdc.f32(float %x, i64 3)
+ %b = call i32 @llvm.s390.tdc.f32(float %x, i64 6)
+ %a1 = icmp ne i32 %a, 0
+ %b1 = icmp ne i32 %b, 0
+ %res = or i1 %a1, %b1
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
+
+; Check using and i1
+define i32 @f2(double %x) {
+; CHECK-LABEL: f2
+; CHECK: tcdb %f0, 2
+; CHECK-NEXT: ipm [[REG1:%r[0-9]+]]
+; CHECK-NEXT: risbg %r2, [[REG1]], 63, 191, 36
+ %a = call i32 @llvm.s390.tdc.f64(double %x, i64 3)
+ %b = call i32 @llvm.s390.tdc.f64(double %x, i64 6)
+ %a1 = icmp ne i32 %a, 0
+ %b1 = icmp ne i32 %b, 0
+ %res = and i1 %a1, %b1
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
+
+; Check using xor i1
+define i32 @f3(fp128 %x) {
+; CHECK-LABEL: f3
+; CHECK: tcxb %f0, 5
+; CHECK-NEXT: ipm [[REG1:%r[0-9]+]]
+; CHECK-NEXT: risbg %r2, [[REG1]], 63, 191, 36
+ %a = call i32 @llvm.s390.tdc.f128(fp128 %x, i64 3)
+ %b = call i32 @llvm.s390.tdc.f128(fp128 %x, i64 6)
+ %a1 = icmp ne i32 %a, 0
+ %b1 = icmp ne i32 %b, 0
+ %res = xor i1 %a1, %b1
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
+
+; Check using xor i1 - negated test
+define i32 @f4(fp128 %x) {
+; CHECK-LABEL: f4
+; CHECK: tcxb %f0, 4090
+; CHECK-NEXT: ipm [[REG1:%r[0-9]+]]
+; CHECK-NEXT: risbg %r2, [[REG1]], 63, 191, 36
+ %a = call i32 @llvm.s390.tdc.f128(fp128 %x, i64 3)
+ %b = call i32 @llvm.s390.tdc.f128(fp128 %x, i64 6)
+ %a1 = icmp ne i32 %a, 0
+ %b1 = icmp eq i32 %b, 0
+ %res = xor i1 %a1, %b1
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
+
+; Check different first args
+define i32 @f5(float %x, float %y) {
+; CHECK-LABEL: f5
+; CHECK-NOT: tceb {{%f[0-9]+}}, 5
+; CHECK-DAG: tceb %f0, 3
+; CHECK-DAG: tceb %f2, 6
+ %a = call i32 @llvm.s390.tdc.f32(float %x, i64 3)
+ %b = call i32 @llvm.s390.tdc.f32(float %y, i64 6)
+ %a1 = icmp ne i32 %a, 0
+ %b1 = icmp ne i32 %b, 0
+ %res = xor i1 %a1, %b1
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
+
+; Non-const mask (not supported)
+define i32 @f6(float %x, i64 %y) {
+; CHECK-LABEL: f6
+; CHECK-DAG: tceb %f0, 0(%r2)
+; CHECK-DAG: tceb %f0, 6
+ %a = call i32 @llvm.s390.tdc.f32(float %x, i64 %y)
+ %b = call i32 @llvm.s390.tdc.f32(float %x, i64 6)
+ %a1 = icmp ne i32 %a, 0
+ %b1 = icmp ne i32 %b, 0
+ %res = xor i1 %a1, %b1
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
diff --git a/test/CodeGen/SystemZ/tdc-03.ll b/test/CodeGen/SystemZ/tdc-03.ll
new file mode 100644
index 00000000000..95708f1effc
--- /dev/null
+++ b/test/CodeGen/SystemZ/tdc-03.ll
@@ -0,0 +1,139 @@
+; Test the Test Data Class instruction logic operation conversion from
+; compares.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare float @llvm.fabs.f32(float)
+declare double @llvm.fabs.f64(double)
+declare fp128 @llvm.fabs.f128(fp128)
+
+; Compare with 0 (unworthy)
+define i32 @f1(float %x) {
+; CHECK-LABEL: f1
+; CHECK-NOT: tceb
+; CHECK: ltebr {{%f[0-9]+}}, %f0
+; CHECK-NOT: tceb
+ %res = fcmp ugt float %x, 0.0
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
+
+; Compare fabs with 0 (unworthy)
+define i32 @f2(float %x) {
+; CHECK-LABEL: f2
+; CHECK-NOT: tceb
+; CHECK: lpebr {{%f[0-9]+}}, %f0
+; CHECK-NOT: tceb
+ %y = call float @llvm.fabs.f32(float %x)
+ %res = fcmp ugt float %y, 0.0
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
+
+; Compare with inf (unworthy)
+define i32 @f3(float %x) {
+; CHECK-LABEL: f3
+; CHECK-NOT: tceb
+; CHECK: ceb %f0, 0(%r{{[0-9]+}})
+; CHECK-NOT: tceb
+ %res = fcmp ult float %x, 0x7ff0000000000000
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
+
+; Compare fabs with inf
+define i32 @f4(float %x) {
+; CHECK-LABEL: f4
+; CHECK: tceb %f0, 4047
+ %y = call float @llvm.fabs.f32(float %x)
+ %res = fcmp ult float %y, 0x7ff0000000000000
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
+
+; Compare with minnorm (unworthy)
+define i32 @f5(float %x) {
+; CHECK-LABEL: f5
+; CHECK-NOT: tceb
+; CHECK: ceb %f0, 0(%r{{[0-9]+}})
+; CHECK-NOT: tceb
+ %res = fcmp ult float %x, 0x3810000000000000
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
+
+; Compare fabs with minnorm
+define i32 @f6(float %x) {
+; CHECK-LABEL: f6
+; CHECK: tceb %f0, 3279
+ %y = call float @llvm.fabs.f32(float %x)
+ %res = fcmp ult float %y, 0x3810000000000000
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
+
+; Compare fabs with minnorm, unsupported condition
+define i32 @f7(float %x) {
+; CHECK-LABEL: f7
+; CHECK-NOT: tceb
+; CHECK: lpdfr [[REG:%f[0-9]+]], %f0
+; CHECK: ceb [[REG]], 0(%r{{[0-9]+}})
+; CHECK-NOT: tceb
+ %y = call float @llvm.fabs.f32(float %x)
+ %res = fcmp ugt float %y, 0x3810000000000000
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
+
+; Compare fabs with unsupported constant
+define i32 @f8(float %x) {
+; CHECK-LABEL: f8
+; CHECK-NOT: tceb
+; CHECK: lpdfr [[REG:%f[0-9]+]], %f0
+; CHECK: ceb [[REG]], 0(%r{{[0-9]+}})
+; CHECK-NOT: tceb
+ %y = call float @llvm.fabs.f32(float %x)
+ %res = fcmp ult float %y, 0x3ff0000000000000
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
+
+; Compare fabs with minnorm - double
+define i32 @f9(double %x) {
+; CHECK-LABEL: f9
+; CHECK: tcdb %f0, 3279
+ %y = call double @llvm.fabs.f64(double %x)
+ %res = fcmp ult double %y, 0x0010000000000000
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
+
+; Compare fabs with minnorm - long double
+define i32 @f10(fp128 %x) {
+; CHECK-LABEL: f10
+; CHECK: tcxb %f0, 3279
+ %y = call fp128 @llvm.fabs.f128(fp128 %x)
+ %res = fcmp ult fp128 %y, 0xL00000000000000000001000000000000
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
+
+; Compare fabs for one with inf - clang's isfinite
+define i32 @f11(double %x) {
+; CHECK-LABEL: f11
+; CHECK: tcdb %f0, 4032
+ %y = call double @llvm.fabs.f64(double %x)
+ %res = fcmp one double %y, 0x7ff0000000000000
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
+
+; Compare fabs for oeq with inf - clang's isinf
+define i32 @f12(double %x) {
+; CHECK-LABEL: f12
+; CHECK: tcdb %f0, 48
+ %y = call double @llvm.fabs.f64(double %x)
+ %res = fcmp oeq double %y, 0x7ff0000000000000
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
diff --git a/test/CodeGen/SystemZ/tdc-04.ll b/test/CodeGen/SystemZ/tdc-04.ll
new file mode 100644
index 00000000000..929285b0ba8
--- /dev/null
+++ b/test/CodeGen/SystemZ/tdc-04.ll
@@ -0,0 +1,85 @@
+; Test the Test Data Class instruction logic operation conversion from
+; signbit extraction.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+;
+
+; Extract sign bit.
+define i32 @f1(float %x) {
+; CHECK-LABEL: f1
+; CHECK: tceb %f0, 1365
+ %cast = bitcast float %x to i32
+ %res = icmp slt i32 %cast, 0
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
+
+; Extract negated sign bit.
+define i32 @f2(float %x) {
+; CHECK-LABEL: f2
+; CHECK: tceb %f0, 2730
+ %cast = bitcast float %x to i32
+ %res = icmp sgt i32 %cast, -1
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
+
+; Extract sign bit.
+define i32 @f3(double %x) {
+; CHECK-LABEL: f3
+; CHECK: tcdb %f0, 1365
+ %cast = bitcast double %x to i64
+ %res = icmp slt i64 %cast, 0
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
+
+; Extract negated sign bit.
+define i32 @f4(double %x) {
+; CHECK-LABEL: f4
+; CHECK: tcdb %f0, 2730
+ %cast = bitcast double %x to i64
+ %res = icmp sgt i64 %cast, -1
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
+
+; Extract sign bit.
+define i32 @f5(fp128 %x) {
+; CHECK-LABEL: f5
+; CHECK: tcxb %f0, 1365
+ %cast = bitcast fp128 %x to i128
+ %res = icmp slt i128 %cast, 0
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
+
+; Extract negated sign bit.
+define i32 @f6(fp128 %x) {
+; CHECK-LABEL: f6
+; CHECK: tcxb %f0, 2730
+ %cast = bitcast fp128 %x to i128
+ %res = icmp sgt i128 %cast, -1
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
+
+; Wrong const.
+define i32 @f7(float %x) {
+; CHECK-LABEL: f7
+; CHECK-NOT: tceb
+ %cast = bitcast float %x to i32
+ %res = icmp slt i32 %cast, -1
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
+
+; Wrong pred.
+define i32 @f8(float %x) {
+; CHECK-LABEL: f8
+; CHECK-NOT: tceb
+ %cast = bitcast float %x to i32
+ %res = icmp eq i32 %cast, 0
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
diff --git a/test/CodeGen/SystemZ/tdc-05.ll b/test/CodeGen/SystemZ/tdc-05.ll
new file mode 100644
index 00000000000..c639a9b7b47
--- /dev/null
+++ b/test/CodeGen/SystemZ/tdc-05.ll
@@ -0,0 +1,97 @@
+; Test the Test Data Class instruction logic operation conversion from
+; compares, combined with signbit or other compares to ensure worthiness.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+;
+
+declare float @llvm.fabs.f32(float)
+declare double @llvm.fabs.f64(double)
+declare fp128 @llvm.fabs.f128(fp128)
+
+; Compare with 0, extract sign bit
+define i32 @f1(float %x) {
+; CHECK-LABEL: f1
+; CHECK: tceb %f0, 2047
+ %cast = bitcast float %x to i32
+ %sign = icmp slt i32 %cast, 0
+ %fcmp = fcmp ugt float %x, 0.0
+ %res = or i1 %sign, %fcmp
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
+
+; Compare with inf, extract negated sign bit
+define i32 @f2(float %x) {
+; CHECK-LABEL: f2
+; CHECK: tceb %f0, 2698
+ %cast = bitcast float %x to i32
+ %sign = icmp sgt i32 %cast, -1
+ %fcmp = fcmp ult float %x, 0x7ff0000000000000
+ %res = and i1 %sign, %fcmp
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
+
+; Compare with minnorm, extract negated sign bit
+define i32 @f3(float %x) {
+; CHECK-LABEL: f3
+; CHECK: tceb %f0, 2176
+ %cast = bitcast float %x to i32
+ %sign = icmp sgt i32 %cast, -1
+ %fcmp = fcmp olt float %x, 0x3810000000000000
+ %res = and i1 %sign, %fcmp
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
+
+; Test float isnormal, from clang.
+define i32 @f4(float %x) {
+; CHECK-LABEL: f4
+; CHECK: tceb %f0, 768
+ %y = call float @llvm.fabs.f32(float %x)
+ %ord = fcmp ord float %x, 0.0
+ %a = fcmp ult float %y, 0x7ff0000000000000
+ %b = fcmp uge float %y, 0x3810000000000000
+ %c = and i1 %a, %b
+ %res = and i1 %ord, %c
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
+
+; Check for negative 0.
+define i32 @f5(float %x) {
+; CHECK-LABEL: f5
+; CHECK: tceb %f0, 1024
+ %cast = bitcast float %x to i32
+ %sign = icmp slt i32 %cast, 0
+ %fcmp = fcmp oeq float %x, 0.0
+ %res = and i1 %sign, %fcmp
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
+
+; Test isnormal, from clang.
+define i32 @f6(double %x) {
+; CHECK-LABEL: f6
+; CHECK: tcdb %f0, 768
+ %y = call double @llvm.fabs.f64(double %x)
+ %ord = fcmp ord double %x, 0.0
+ %a = fcmp ult double %y, 0x7ff0000000000000
+ %b = fcmp uge double %y, 0x0010000000000000
+ %c = and i1 %ord, %a
+ %res = and i1 %b, %c
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
+
+; Test isinf || isnan, from clang.
+define i32 @f7(double %x) {
+; CHECK-LABEL: f7
+; CHECK: tcdb %f0, 63
+ %y = call double @llvm.fabs.f64(double %x)
+ %a = fcmp oeq double %y, 0x7ff0000000000000
+ %b = fcmp uno double %x, 0.0
+ %res = or i1 %a, %b
+ %xres = zext i1 %res to i32
+ ret i32 %xres
+}
diff --git a/test/CodeGen/SystemZ/tdc-06.ll b/test/CodeGen/SystemZ/tdc-06.ll
new file mode 100644
index 00000000000..11fb1e2916e
--- /dev/null
+++ b/test/CodeGen/SystemZ/tdc-06.ll
@@ -0,0 +1,48 @@
+; Test the Test Data Class instruction, as used by fpclassify.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+;
+
+declare float @llvm.fabs.f32(float)
+declare double @llvm.fabs.f64(double)
+declare fp128 @llvm.fabs.f128(fp128)
+
+define i32 @fpc(double %x) {
+entry:
+; CHECK-LABEL: fpc
+; CHECK: lhi %r2, 5
+; CHECK: ltdbr %f0, %f0
+; CHECK: je [[RET:.L.*]]
+ %testeq = fcmp oeq double %x, 0.000000e+00
+ br i1 %testeq, label %ret, label %nonzero
+
+nonzero:
+; CHECK: lhi %r2, 1
+; CHECK: cdbr %f0, %f0
+; CHECK: jo [[RET]]
+ %testnan = fcmp uno double %x, 0.000000e+00
+ br i1 %testnan, label %ret, label %nonzeroord
+
+nonzeroord:
+; CHECK: lhi %r2, 2
+; CHECK: tcdb %f0, 48
+; CHECK: jl [[RET]]
+ %abs = tail call double @llvm.fabs.f64(double %x)
+ %testinf = fcmp oeq double %abs, 0x7FF0000000000000
+ br i1 %testinf, label %ret, label %finite
+
+finite:
+; CHECK: lhi %r2, 3
+; CHECK: tcdb %f0, 831
+; CHECK: blr %r14
+; CHECK: lhi %r2, 4
+ %testnormal = fcmp uge double %abs, 0x10000000000000
+ %finres = select i1 %testnormal, i32 3, i32 4
+ br label %ret
+
+ret:
+; CHECK: [[RET]]:
+; CHECK: br %r14
+ %res = phi i32 [ 5, %entry ], [ 1, %nonzero ], [ 2, %nonzeroord ], [ %finres, %finite ]
+ ret i32 %res
+}