summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorge Kyriazis <george.kyriazis@intel.com>2018-04-09 11:47:37 -0500
committerGeorge Kyriazis <george.kyriazis@intel.com>2018-04-18 10:51:38 -0500
commit7f34860125be0359dd3e047cd243b2a9e775a28f (patch)
tree88cb8a3a1cd1edda747c4f26b81051fff45710df
parent96ad8f5a2319300870e55f5849e1975fb679b996 (diff)
swr/rast: double-pump in x86 lowering pass
Add support for double-pumping a smaller SIMD width intrinsic. Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp30
1 files changed, 19 insertions, 11 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
index 9423b28032..983b22733f 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
@@ -90,11 +90,14 @@ namespace SwrJit
Instruction* NO_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
Instruction* VPERM_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
Instruction* VGATHER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
+ Instruction* DOUBLE_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst, Intrinsic::ID intrin);
+
+ static Intrinsic::ID DOUBLE = (Intrinsic::ID)-1;
static std::map<std::string, X86Intrinsic> intrinsicMap2[] = {
// 256 wide 512 wide
{ // AVX
- {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
+ {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, DOUBLE}, NO_EMU}},
{"meta.intrinsic.VPERMPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
{"meta.intrinsic.VPERMD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
{"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
@@ -104,7 +107,7 @@ namespace SwrJit
{"meta.intrinsic.VCVTPH2PS", {{Intrinsic::x86_vcvtph2ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
},
{ // AVX2
- {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
+ {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, DOUBLE}, NO_EMU}},
{"meta.intrinsic.VPERMPS", {{Intrinsic::x86_avx2_permps, Intrinsic::not_intrinsic}, VPERM_EMU}},
{"meta.intrinsic.VPERMD", {{Intrinsic::x86_avx2_permd, Intrinsic::not_intrinsic}, VPERM_EMU}},
{"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
@@ -226,7 +229,15 @@ namespace SwrJit
// Check if there is a native intrinsic for this instruction
Intrinsic::ID id = intrinsic.intrin[vecWidth];
- if (id != Intrinsic::not_intrinsic)
+ if (id == DOUBLE)
+ {
+ // Double pump the next smaller SIMD intrinsic
+ SWR_ASSERT(vecWidth != 0, "Cannot double pump smallest SIMD width.");
+ Intrinsic::ID id2 = intrinsic.intrin[vecWidth - 1];
+ SWR_ASSERT(id2 != Intrinsic::not_intrinsic, "Cannot find intrinsic to double pump.");
+ return DOUBLE_EMU(this, mTarget, vecWidth, pCallInst, id2);
+ }
+ else if (id != Intrinsic::not_intrinsic)
{
Function* pIntrin = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, id);
SmallVector<Value*, 8> args;
@@ -488,28 +499,25 @@ namespace SwrJit
return cast<Instruction>(v32Gather);
}
-#if 0
// Double pump input using Intrin template arg. This blindly extracts lower and upper 256 from each vector argument and
// calls the 256 wide intrinsic, then merges the results to 512 wide
- template<Intrinsic::ID Intrin>
- Value* EMU_512(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
+ Instruction* DOUBLE_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst, Intrinsic::ID intrin)
{
auto B = pThis->B;
SWR_ASSERT(width == W512);
Value* result[2];
- Function* pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrin);
+ Function* pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, intrin);
for (uint32_t i = 0; i < 2; ++i)
{
SmallVector<Value*, 8> args;
for (auto& arg : pCallInst->arg_operands())
{
- args.push_back(arg.get()->getType()->isVectorTy ? B->EXTRACT_16(arg.get(), i) : arg.get());
+ args.push_back(arg.get()->getType()->isVectorTy() ? B->EXTRACT_16(arg.get(), i) : arg.get());
}
- result[i] = B->CALL(pX86IntrinFunc, args);
+ result[i] = B->CALLA(pX86IntrinFunc, args);
}
- return B->JOIN_16(result[0], result[1]);
+ return cast<Instruction>(B->JOIN_16(result[0], result[1]));
}
-#endif
}