diff options
author | Alexey Sotkin <alexey.sotkin@intel.com> | 2017-07-10 17:39:01 +0300 |
---|---|---|
committer | Alexey Sotkin <alexey.sotkin@intel.com> | 2018-04-03 17:17:38 +0300 |
commit | 5d0e2816c2649937ad731f7b72d04311e7fa2e65 (patch) | |
tree | 4d1af522f407dde941a01346bdc04db7c2204c43 | |
parent | 223c78694394a61819ebc3e9b834dd8a38ec8678 (diff) |
Upgrade SPIRV translator from LLVM 3.8 to LLVM 4.0
Change-Id: I28e88c5b38463e14a4af4fc0e49e8b95c8debef0
-rw-r--r-- | lib/SPIRV/SPIRVReader.cpp | 429 | ||||
-rw-r--r-- | test/ExecutionMode_SPIR_to_SPIRV.ll | 6 | ||||
-rw-r--r-- | test/image_without_access_qualifier.spt | 94 | ||||
-rw-r--r-- | test/transcoding/check_ro_qualifier.ll | 13 | ||||
-rw-r--r-- | test/transcoding/check_wo_qualifier.ll | 13 | ||||
-rw-r--r-- | test/transcoding/cl-types.ll | 37 | ||||
-rw-r--r-- | test/transcoding/device_execution.ll | 5 | ||||
-rw-r--r-- | test/transcoding/device_execution_multiple_blocks.ll | 28 | ||||
-rw-r--r-- | test/transcoding/device_execution_overloading.ll | 12 | ||||
-rw-r--r-- | test/transcoding/device_execution_simple_local_memory.ll | 12 | ||||
-rw-r--r-- | test/transcoding/device_execution_vaargs.ll | 231 | ||||
-rw-r--r-- | test/transcoding/spirv-types.ll | 39 | ||||
-rw-r--r-- | test/vec_type_hint.ll | 32 |
13 files changed, 709 insertions, 242 deletions
diff --git a/lib/SPIRV/SPIRVReader.cpp b/lib/SPIRV/SPIRVReader.cpp index efc8952..808ea58 100644 --- a/lib/SPIRV/SPIRVReader.cpp +++ b/lib/SPIRV/SPIRVReader.cpp @@ -124,12 +124,12 @@ dumpLLVM(Module *M, const std::string &FName) { }
static MDNode*
-getMDNodeStringIntVec(LLVMContext *Context, const std::string& Str,
- const std::vector<SPIRVWord>& IntVals) {
+getMDNodeStringIntVec(LLVMContext *Context,
+ const std::vector<SPIRVWord>& IntVals) {
std::vector<Metadata*> ValueVec;
- ValueVec.push_back(MDString::get(*Context, Str));
for (auto &I:IntVals)
- ValueVec.push_back(ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(*Context), I)));
+ ValueVec.push_back(ConstantAsMetadata::get(
+ ConstantInt::get(Type::getInt32Ty(*Context), I)));
return MDNode::get(*Context, ValueVec);
}
@@ -168,15 +168,14 @@ addNamedMetadataStringSet(LLVMContext *Context, Module *M, }
static void
-addOCLKernelArgumentMetadata(LLVMContext *Context,
- std::vector<llvm::Metadata*> &KernelMD, const std::string &MDName,
- SPIRVFunction *BF, std::function<Metadata *(SPIRVFunctionParameter *)>Func){
+addOCLKernelArgumentMetadata(LLVMContext *Context, const std::string &MDName,
+ SPIRVFunction *BF, llvm::Function *Fn,
+ std::function<Metadata *(SPIRVFunctionParameter *)>Func) {
std::vector<Metadata*> ValueVec;
- ValueVec.push_back(MDString::get(*Context, MDName));
BF->foreachArgument([&](SPIRVFunctionParameter *Arg) {
ValueVec.push_back(Func(Arg));
});
- KernelMD.push_back(MDNode::get(*Context, ValueVec));
+ Fn->setMetadata(MDName, MDNode::get(*Context, ValueVec));
}
class SPIRVToLLVMDbgTran {
@@ -297,12 +296,17 @@ public: bool CreatePlaceHolder = true);
Value *transValueWithoutDecoration(SPIRVValue *, Function *F, BasicBlock *,
bool CreatePlaceHolder = true);
+ Value *transDeviceEvent(SPIRVValue *BV, Function *F, BasicBlock *BB);
+ Value *transEnqueuedBlock(SPIRVValue *BF, SPIRVValue *BC, SPIRVValue *BCSize,
+ SPIRVValue *BCAligment, Function *F, BasicBlock *BB);
bool transDecoration(SPIRVValue *, Value *);
bool transAlign(SPIRVValue *, Value *);
Instruction *transOCLBuiltinFromExtInst(SPIRVExtInst *BC, BasicBlock *BB);
std::vector<Value *> transValue(const std::vector<SPIRVValue *>&, Function *F,
BasicBlock *);
Function *transFunction(SPIRVFunction *F);
+ Instruction *transEnqueueKernelBI(SPIRVInstruction *BI, BasicBlock *BB);
+ Instruction *transWGSizeBI(SPIRVInstruction *BI, BasicBlock *BB);
bool transFPContractMetadata();
bool transKernelMetadata();
bool transNonTemporalMetadata(Instruction *I);
@@ -328,13 +332,6 @@ public: /// need to be post-processed to return the struct through sret argument.
bool postProcessOCLBuiltinReturnStruct(Function *F);
- /// \brief Post-process OpenCL builtin functions having block argument.
- ///
- /// These functions are translated to functions with function pointer type
- /// argument first, then post-processed to have block argument.
- bool postProcessOCLBuiltinWithFuncPointer(Function *F,
- Function::arg_iterator I);
-
/// \brief Post-process OpenCL builtin functions having array argument.
///
/// These functions are translated to functions with array type argument
@@ -387,6 +384,7 @@ public: typedef DenseMap<SPIRVType *, Type *> SPIRVToLLVMTypeMap;
typedef DenseMap<SPIRVValue *, Value *> SPIRVToLLVMValueMap;
+ typedef DenseMap<SPIRVValue *, Value *> SPIRVBlockToLLVMStructMap;
typedef DenseMap<SPIRVFunction *, Function *> SPIRVToLLVMFunctionMap;
typedef DenseMap<GlobalVariable *, SPIRVBuiltinVariableKind> BuiltinVarMap;
@@ -402,6 +400,7 @@ private: SPIRVToLLVMTypeMap TypeMap;
SPIRVToLLVMValueMap ValueMap;
SPIRVToLLVMFunctionMap FuncMap;
+ SPIRVBlockToLLVMStructMap BlockMap;
SPIRVToLLVMPlaceholderMap PlaceholderMap;
SPIRVToLLVMDbgTran DbgTran;
@@ -781,6 +780,7 @@ SPIRVToLLVM::transType(SPIRVType *T, bool IsClassMember) { return 0;
}
+
std::string
SPIRVToLLVM::transTypeToOCLTypeName(SPIRVType *T, bool IsSigned) {
switch(T->getOpCode()) {
@@ -906,30 +906,30 @@ void SPIRVToLLVM::setLLVMLoopMetadata(SPIRVLoopMerge* LM, BranchInst* BI) {
if (!LM)
return;
- llvm::MDString *Name = nullptr; - auto Temp = MDNode::getTemporary(*Context, None); - auto Self = MDNode::get(*Context, Temp); - Self->replaceOperandWith(0, Self); - MDNode::deleteTemporary(Temp); - - if (LM->getLoopControl() == LoopControlMaskNone) { - BI->setMetadata("llvm.loop", Self); - return; - } - else if(LM->getLoopControl() == LoopControlUnrollMask) - Name = llvm::MDString::get(*Context, "llvm.loop.unroll.full"); - else if (LM->getLoopControl() == LoopControlDontUnrollMask) - Name = llvm::MDString::get(*Context, "llvm.loop.unroll.disable"); - else - return; - - std::vector<llvm::Metadata *> OpValues(1, Name); - SmallVector<llvm::Metadata *, 2> Metadata; - Metadata.push_back(llvm::MDNode::get(*Context, Self)); - Metadata.push_back(llvm::MDNode::get(*Context, OpValues)); - - llvm::MDNode *Node = llvm::MDNode::get(*Context, Metadata); - Node->replaceOperandWith(0, Node); + llvm::MDString *Name = nullptr;
+ auto Temp = MDNode::getTemporary(*Context, None);
+ auto Self = MDNode::get(*Context, Temp);
+ Self->replaceOperandWith(0, Self);
+ MDNode::deleteTemporary(Temp);
+
+ if (LM->getLoopControl() == LoopControlMaskNone) {
+ BI->setMetadata("llvm.loop", Self);
+ return;
+ }
+ else if(LM->getLoopControl() == LoopControlUnrollMask)
+ Name = llvm::MDString::get(*Context, "llvm.loop.unroll.full");
+ else if (LM->getLoopControl() == LoopControlDontUnrollMask)
+ Name = llvm::MDString::get(*Context, "llvm.loop.unroll.disable");
+ else
+ return;
+
+ std::vector<llvm::Metadata *> OpValues(1, Name);
+ SmallVector<llvm::Metadata *, 2> Metadata;
+ Metadata.push_back(llvm::MDNode::get(*Context, Self));
+ Metadata.push_back(llvm::MDNode::get(*Context, OpValues));
+
+ llvm::MDNode *Node = llvm::MDNode::get(*Context, Metadata);
+ Node->replaceOperandWith(0, Node);
BI->setMetadata("llvm.loop", Node);
}
@@ -969,6 +969,19 @@ SPIRVToLLVM::transValue(SPIRVValue *BV, Function *F, BasicBlock *BB, }
Value *
+SPIRVToLLVM::transDeviceEvent(SPIRVValue *BV, Function *F, BasicBlock *BB) {
+ auto Val = transValue(BV, F, BB, false);
+ auto Ty = dyn_cast<PointerType>(Val->getType());
+ assert(Ty && "Invalid Device Event");
+ if (Ty->getAddressSpace() == SPIRAS_Generic)
+ return Val;
+
+ IRBuilder<> Builder(BB);
+ auto EventTy = PointerType::get(Ty->getElementType(), SPIRAS_Generic);
+ return Builder.CreateAddrSpaceCast(Val, EventTy);
+}
+
+Value *
SPIRVToLLVM::transConvertInst(SPIRVValue* BV, Function* F, BasicBlock* BB) {
SPIRVUnary* BC = static_cast<SPIRVUnary*>(BV);
auto Src = transValue(BC->getOperand(0), F, BB, BB ? true : false);
@@ -1060,16 +1073,6 @@ SPIRVToLLVM::postProcessOCL() { for (auto I = M->begin(), E = M->end(); I != E;) {
auto F = I++;
if (F->hasName() && F->isDeclaration()) {
- DEBUG(dbgs() << "[postProcessOCL func ptr] " << *F << '\n');
- auto AI = F->arg_begin();
- if (hasFunctionPointerArg(&(*F), AI) && isDecoratedSPIRVFunc(&(*F)))
- if (!postProcessOCLBuiltinWithFuncPointer(&(*F), AI))
- return false;
- }
- }
- for (auto I = M->begin(), E = M->end(); I != E;) {
- auto F = I++;
- if (F->hasName() && F->isDeclaration()) {
DEBUG(dbgs() << "[postProcessOCL array arg] " << *F << '\n');
if (hasArrayArg(&(*F)) && oclIsBuiltin(F->getName(), &DemangledName, isCPP))
if (!postProcessOCLBuiltinWithArrayArguments(&(*F), DemangledName))
@@ -1107,40 +1110,6 @@ SPIRVToLLVM::postProcessOCLBuiltinReturnStruct(Function *F) { }
bool
-SPIRVToLLVM::postProcessOCLBuiltinWithFuncPointer(Function* F,
- Function::arg_iterator I) {
- auto Name = undecorateSPIRVFunction(F->getName());
- std::set<Value *> InvokeFuncPtrs;
- mutateFunctionOCL (F, [=, &InvokeFuncPtrs](
- CallInst *CI, std::vector<Value *> &Args) {
- auto ALoc = std::find_if(Args.begin(), Args.end(), [](Value * elem) {
- return isFunctionPointerType(elem->getType());
- });
- assert(ALoc != Args.end() && "Buit-in must accept a pointer to function");
- assert(isa<Function>(*ALoc) && "Invalid function pointer usage");
- Value *Ctx = ALoc[1];
- Value *CtxLen = ALoc[2];
- Value *CtxAlign = ALoc[3];
- if (Name == kOCLBuiltinName::EnqueueKernel)
- assert(Args.end() - ALoc > 3);
- else
- assert(Args.end() - ALoc > 0);
- // Erase arguments what are hanled by "spir_block_bind" according to SPIR 2.0
- Args.erase(ALoc + 1, ALoc + 4);
-
- InvokeFuncPtrs.insert(*ALoc);
- // There will be as many calls to spir_block_bind as how much device execution
- // bult-ins using this block. This doesn't contradict SPIR 2.0 specification.
- *ALoc = addBlockBind(M, cast<Function>(removeCast(*ALoc)),
- Ctx, CtxLen, CtxAlign, CI);
- return Name;
- });
- for (auto &I:InvokeFuncPtrs)
- eraseIfNoUse(I);
- return true;
-}
-
-bool
SPIRVToLLVM::postProcessOCLBuiltinWithArrayArguments(Function* F,
const std::string &DemangledName) {
DEBUG(dbgs() << "[postProcessOCLBuiltinWithArrayArguments] " << *F << '\n');
@@ -1482,8 +1451,8 @@ SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, else if (LinkageTy == GlobalValue::CommonLinkage)
// In LLVM variables with common linkage type must be initilized by 0
Initializer = Constant::getNullValue(Ty);
- else if (BVar->getStorageClass() == SPIRVStorageClassKind::StorageClassWorkgroup) - Initializer = dyn_cast<Constant>(UndefValue::get(Ty)); + else if (BVar->getStorageClass() == SPIRVStorageClassKind::StorageClassWorkgroup)
+ Initializer = dyn_cast<Constant>(UndefValue::get(Ty));
SPIRVStorageClassKind BS = BVar->getStorageClass();
if (BS == StorageClassFunction && !Init) {
@@ -1955,7 +1924,13 @@ SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, case OpSignBitSet :
return mapValue(BV,
transOCLRelational(static_cast<SPIRVInstruction *>(BV), BB));
-
+ case OpEnqueueKernel :
+ return mapValue(BV, transEnqueueKernelBI(
+ static_cast<SPIRVInstruction *>(BV), BB));
+ case OpGetKernelWorkGroupSize :
+ case OpGetKernelPreferredWorkGroupSizeMultiple :
+ return mapValue(BV, transWGSizeBI(
+ static_cast<SPIRVInstruction *>(BV), BB));
default: {
auto OC = BV->getOpCode();
if (isSPIRVCmpInstTransToLLVMInst(static_cast<SPIRVInstruction*>(BV))) {
@@ -2129,6 +2104,249 @@ SPIRVToLLVM::transOCLBuiltinPostproc(SPIRVInstruction* BI, return CI;
}
+static void adaptBlockInvoke(Function *Invoke, Type *BlockStructTy) {
+ // As first argument block invoke takes a pointer to captured data.
+ // We pass to block invoke whole block structure, not only captured data
+ // as it expected. So we need to update original function to unpack expected
+ // captured data and use it instead of an original argument
+ //
+ // %block = bitcast i8 addrspace(4)* to <{ ..., [X x i8] }> addrspace(4)*
+ // %block.1 = addrspacecast %block to <{ ..., [X x i8] }>*
+ // %captured = getelementptr <{ ..., [X x i8] }>, i32 0, i32 5
+ // %captured.1 = bitcast %captured to i8*
+
+ BasicBlock *BB = &(Invoke->getEntryBlock());
+ BB->splitBasicBlock(BB->begin(), "invoke");
+ auto FirstArg = &*(Invoke->arg_begin());
+ IRBuilder<> Builder(BB, BB->begin());
+
+ auto FirstArgTy = dyn_cast<PointerType>(FirstArg->getType());
+ assert(FirstArgTy && "Expects that first argument of invoke is a pointer");
+ unsigned FirstArgAS = FirstArgTy->getAddressSpace();
+
+ auto Int8PtrTy =
+ Type::getInt8PtrTy(Invoke->getParent()->getContext(), FirstArgAS);
+ auto BlockStructPtrTy = PointerType::get(BlockStructTy, FirstArgAS);
+
+ auto Int32Ty = Type::getInt32Ty(Invoke->getParent()->getContext());
+ Value *CapturedGEPIndices[2] = { ConstantInt::get(Int32Ty, 0),
+ ConstantInt::get(Int32Ty, 5) };
+ auto BlockToStructCast =
+ Builder.CreateBitCast(FirstArg, BlockStructPtrTy, "block");
+ auto CapturedGEP = Builder.CreateGEP(BlockToStructCast, CapturedGEPIndices);
+ auto CapturedToInt8Cast = Builder.CreateBitCast(CapturedGEP, Int8PtrTy);
+
+ FirstArg->replaceUsesOutsideBlock(CapturedToInt8Cast, BB);
+}
+
+static Type *getOrCreateBlockDescTy(Module *M) {
+ // Get or create block descriptor type which contains block size
+ // in the last element: %struct.__block_descriptor = type { i64, i64 }
+ auto BlockDescTy = M->getTypeByName("struct.__block_descriptor");
+ if (BlockDescTy)
+ return BlockDescTy;
+
+ auto Int64Ty = Type::getInt64Ty(M->getContext());
+ Type *BlockDescElements[2] = {/*Reserved*/ Int64Ty, /*Block size*/ Int64Ty };
+ return StructType::create(M->getContext(), BlockDescElements,
+ "struct.__block_descriptor");
+}
+
+Value *
+SPIRVToLLVM::transEnqueuedBlock(SPIRVValue *SInvoke, SPIRVValue *SCaptured,
+ SPIRVValue *SCaptSize, SPIRVValue *SCaptAlignment,
+ Function *LBI, BasicBlock *LBB) {
+ // Search if that block have been already translated
+ auto Loc = BlockMap.find(SInvoke);
+ if (Loc != BlockMap.end())
+ return Loc->second;
+
+ IRBuilder<> Builder(LBB);
+ const DataLayout &DL = M->getDataLayout();
+
+ // Translate block and its arguments from SPIRV values to LLVM
+ auto LInvoke = transFunction(static_cast<SPIRVFunction *>(SInvoke));
+ auto LCaptured = transValue(SCaptured, LBI, LBB, false);
+ auto LCaptSize =
+ dyn_cast<ConstantInt>(transValue(SCaptSize, LBI, LBB, false));
+ auto LCaptAlignment =
+ dyn_cast<ConstantInt>(transValue(SCaptAlignment, LBI, LBB, false));
+
+ // Create basic types
+ auto Int8Ty = Type::getInt8Ty(*Context);
+ auto Int32Ty = Type::getInt32Ty(*Context);
+ auto Int8PtrTy = Type::getInt8PtrTy(*Context, SPIRAS_Private);
+ auto Int8PtrTyGen = Type::getInt8PtrTy(*Context, SPIRAS_Generic);
+ auto BlockDescTy = getOrCreateBlockDescTy(M);
+ auto BlockDescPtrTy = BlockDescTy->getPointerTo(SPIRAS_Private);
+
+ // Create a block as structure:
+ // <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>
+ SmallVector<Type *, 8> BlockEls =
+ { /*isa*/ Int8PtrTy, /*flags*/ Int32Ty, /*reserved*/ Int32Ty,
+ /*invoke*/ Int8PtrTy, /*block_descriptor*/ BlockDescPtrTy };
+
+ // Add captured if any
+ // <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [X x i8] }>
+ // Note: captured data stored in structure as array of char
+ if (LCaptSize->getZExtValue() > 0)
+ BlockEls.push_back(ArrayType::get(Int8Ty, LCaptSize->getZExtValue()));
+
+ auto BlockTy = StructType::get(*Context, BlockEls, /*isPacked*/ true);
+
+ // Allocate block on the stack, then store data to it
+ auto BlockAlloca = Builder.CreateAlloca(BlockTy, nullptr, "block");
+ BlockAlloca->setAlignment(DL.getPrefTypeAlignment(BlockTy));
+
+ auto getIndices = [Int32Ty](int a, int b) -> SmallVector<Value *, 2> {
+ return { ConstantInt::get(Int32Ty, a), ConstantInt::get(Int32Ty, b) };
+ };
+
+ // 1. isa, flags and reserved fields isn't used in current implementation
+ // Fill them the same way as clang does
+ auto IsaGEP = Builder.CreateGEP(BlockAlloca, getIndices(0, 0));
+ Builder.CreateStore(ConstantPointerNull::get(Int8PtrTy), IsaGEP);
+ auto FlagsGEP = Builder.CreateGEP(BlockAlloca, getIndices(0, 1));
+ Builder.CreateStore(ConstantInt::get(Int32Ty, 1342177280), FlagsGEP);
+ auto ReservedGEP = Builder.CreateGEP(BlockAlloca, getIndices(0, 2));
+ Builder.CreateStore(ConstantInt::get(Int32Ty, 0), ReservedGEP);
+
+ // 2. Store pointer to block invoke to the structure
+ auto InvokeCast = Builder.CreateBitCast(LInvoke, Int8PtrTy, "invoke");
+ auto InvokeGEP = Builder.CreateGEP(BlockAlloca, getIndices(0, 3));
+ Builder.CreateStore(InvokeCast, InvokeGEP);
+
+ // 3. Create and store a pointer to the block descriptor global value
+ uint64_t SizeOfBlock = DL.getTypeAllocSize(BlockTy);
+
+ auto Int64Ty = Type::getInt64Ty(*Context);
+ Constant *BlockDescEls[2] = { ConstantInt::get(Int64Ty, 0),
+ ConstantInt::get(Int64Ty, SizeOfBlock) };
+ auto BlockDesc =
+ ConstantStruct::get(dyn_cast<StructType>(BlockDescTy), BlockDescEls);
+
+ auto BlockDescGV =
+ new GlobalVariable(*M, BlockDescTy, true, GlobalValue::InternalLinkage,
+ BlockDesc, "__block_descriptor_spirv");
+ auto BlockDescGEP =
+ Builder.CreateGEP(BlockAlloca, getIndices(0, 4), "block.descriptor");
+ Builder.CreateStore(BlockDescGV, BlockDescGEP);
+
+ // 4. Copy captured data to the structure
+ if (LCaptSize->getZExtValue() > 0) {
+ auto CapturedGEP =
+ Builder.CreateGEP(BlockAlloca, getIndices(0, 5), "block.captured");
+ auto CapturedGEPCast = Builder.CreateBitCast(CapturedGEP, Int8PtrTy);
+
+ // We can't make any guesses about type of captured data, so
+ // let's copy it through memcpy
+ std::string MemCpyName = "llvm.memcpy.p0i8.p0i8";
+ MemCpyName += (LCaptSize->getType()->getBitWidth() == 32) ? ".i32" : ".i64";
+ SmallVector<Type*, 8> MemCpyArgTys = {
+ Int8PtrTy, Int8PtrTy, // src and dst pointers
+ LCaptSize->getType(), // size
+ Int32Ty, // alignment
+ Type::getInt1Ty(*Context) // isVolatile
+ };
+
+ FunctionType *MemCpyTy =
+ FunctionType::get(Type::getVoidTy(*Context), MemCpyArgTys, false);
+ Function *MemCpy =
+ dyn_cast<Function>(M->getOrInsertFunction(MemCpyName, MemCpyTy));
+ assert(MemCpy && "Can't create memcpy intrinsic");
+ MemCpy->setLinkage(GlobalValue::ExternalLinkage);
+ SmallVector<Value*, 8> MemCpyArgs =
+ { CapturedGEPCast, LCaptured, LCaptSize, LCaptAlignment,
+ ConstantInt::get(Type::getInt1Ty(*Context), SCaptured->isVolatile()) };
+ Builder.CreateCall(MemCpy, MemCpyArgs);
+
+ // Fix invoke function to correctly process its first argument
+ adaptBlockInvoke(LInvoke, BlockTy);
+ }
+ auto BlockCast = Builder.CreateBitCast(BlockAlloca, Int8PtrTy);
+ auto BlockCastGen = Builder.CreateAddrSpaceCast(BlockCast, Int8PtrTyGen);
+ BlockMap[SInvoke] = BlockCastGen;
+ return BlockCastGen;
+}
+
+Instruction *
+SPIRVToLLVM::transEnqueueKernelBI(SPIRVInstruction *BI, BasicBlock *BB) {
+ Type *IntTy = Type::getInt32Ty(*Context);
+
+ // Find or create enqueue kernel BI declaration
+ auto Ops = BI->getOperands();
+ bool hasVaargs = Ops.size() > 10;
+
+ std::string FName = hasVaargs ? "__enqueue_kernel_events_vaargs"
+ : "__enqueue_kernel_basic_events";
+ Function* F = M->getFunction(FName);
+ if (!F) {
+ Type *EventTy = PointerType::get(
+ getOrCreateOpaquePtrType(M, SPIR_TYPE_NAME_CLK_EVENT_T, SPIRAS_Private),
+ SPIRAS_Generic);
+
+ SmallVector<Type *, 8> Tys = {
+ transType(Ops[0]->getType()), // queue
+ IntTy, // flags
+ transType(Ops[2]->getType()), // ndrange
+ IntTy, EventTy, EventTy, // events
+ Type::getInt8PtrTy(*Context, SPIRAS_Generic) // block
+ };
+ if (hasVaargs)
+ Tys.push_back(IntTy); // Number of variadics if any
+
+ FunctionType* FT = FunctionType::get(IntTy, Tys, hasVaargs);
+ F = Function::Create(FT, GlobalValue::ExternalLinkage, FName, M);
+ if (isFuncNoUnwind())
+ F->addFnAttr(Attribute::NoUnwind);
+ }
+
+ // Create call to enqueue kernel BI
+ SmallVector<Value *, 8> Args = {
+ transValue(Ops[0], F, BB, false), // queue
+ transValue(Ops[1], F, BB, false), // flags
+ transValue(Ops[2], F, BB, false), // ndrange
+ transValue(Ops[3], F, BB, false), // events number
+ transDeviceEvent(Ops[4], F, BB), // event_wait_list
+ transDeviceEvent(Ops[5], F, BB), // event_ret
+ transEnqueuedBlock(Ops[6], Ops[7], Ops[8], Ops[9], F, BB) // block
+ };
+
+ if (hasVaargs) {
+ Args.push_back(ConstantInt::get(IntTy, Ops.size() - 10)); // Number of vaargs
+ for (unsigned i = 10; i < Ops.size(); ++i)
+ Args.push_back(transValue(Ops[i], F, BB, false));
+ }
+ auto Call = CallInst::Create(F, Args, "", BB);
+ setName(Call, BI);
+ setAttrByCalledFunc(Call);
+ return Call;
+}
+
+Instruction *
+SPIRVToLLVM::transWGSizeBI(SPIRVInstruction *BI, BasicBlock *BB) {
+ std::string FName =
+ (BI->getOpCode() == OpGetKernelWorkGroupSize)
+ ? "__get_kernel_work_group_size_impl"
+ : "__get_kernel_preferred_work_group_multiple_impl";
+
+ Function* F = M->getFunction(FName);
+ if (!F) {
+ auto Int8PtrTyGen = Type::getInt8PtrTy(*Context, SPIRAS_Generic);
+ FunctionType* FT =
+ FunctionType::get(Type::getInt32Ty(*Context), Int8PtrTyGen, false);
+ F = Function::Create(FT, GlobalValue::ExternalLinkage, FName, M);
+ if (isFuncNoUnwind())
+ F->addFnAttr(Attribute::NoUnwind);
+ }
+ auto Ops = BI->getOperands();
+ auto Block = transEnqueuedBlock(Ops[0], Ops[1], Ops[2], Ops[3], F, BB);
+ auto Call = CallInst::Create(F, Block, "", BB);
+ setName(Call, BI);
+ setAttrByCalledFunc(Call);
+ return Call;
+}
+
Instruction *
SPIRVToLLVM::transBuiltinFromInst(const std::string& FuncName,
SPIRVInstruction* BI, BasicBlock* BB) {
@@ -2378,19 +2596,15 @@ SPIRVToLLVM::transNonTemporalMetadata(Instruction *I) { bool
SPIRVToLLVM::transKernelMetadata() {
- NamedMDNode *KernelMDs = M->getOrInsertNamedMetadata(SPIR_MD_KERNELS);
for (unsigned I = 0, E = BM->getNumFunctions(); I != E; ++I) {
SPIRVFunction *BF = BM->getFunction(I);
Function *F = static_cast<Function *>(getTranslatedValue(BF));
assert(F && "Invalid translated function");
if (F->getCallingConv() != CallingConv::SPIR_KERNEL)
continue;
- std::vector<llvm::Metadata*> KernelMD;
- KernelMD.push_back(ValueAsMetadata::get(F));
// Generate metadata for kernel_arg_address_spaces
- addOCLKernelArgumentMetadata(Context, KernelMD,
- SPIR_MD_KERNEL_ARG_ADDR_SPACE, BF,
+ addOCLKernelArgumentMetadata(Context, SPIR_MD_KERNEL_ARG_ADDR_SPACE, BF, F,
[=](SPIRVFunctionParameter *Arg){
SPIRVType *ArgTy = Arg->getType();
SPIRAddressSpace AS = SPIRAS_Private;
@@ -2402,9 +2616,8 @@ SPIRVToLLVM::transKernelMetadata() { ConstantInt::get(Type::getInt32Ty(*Context), AS));
});
// Generate metadata for kernel_arg_access_qual
- addOCLKernelArgumentMetadata(Context, KernelMD,
- SPIR_MD_KERNEL_ARG_ACCESS_QUAL, BF,
- [=](SPIRVFunctionParameter *Arg){
+ addOCLKernelArgumentMetadata(Context, SPIR_MD_KERNEL_ARG_ACCESS_QUAL, BF, F,
+ [=](SPIRVFunctionParameter *Arg){
std::string Qual;
auto T = Arg->getType();
if (T->isTypeOCLImage()) {
@@ -2418,14 +2631,12 @@ SPIRVToLLVM::transKernelMetadata() { return MDString::get(*Context, Qual);
});
// Generate metadata for kernel_arg_type
- addOCLKernelArgumentMetadata(Context, KernelMD,
- SPIR_MD_KERNEL_ARG_TYPE, BF,
+ addOCLKernelArgumentMetadata(Context, SPIR_MD_KERNEL_ARG_TYPE, BF, F,
[=](SPIRVFunctionParameter *Arg){
return transOCLKernelArgTypeName(Arg);
});
// Generate metadata for kernel_arg_type_qual
- addOCLKernelArgumentMetadata(Context, KernelMD,
- SPIR_MD_KERNEL_ARG_TYPE_QUAL, BF,
+ addOCLKernelArgumentMetadata(Context, SPIR_MD_KERNEL_ARG_TYPE_QUAL, BF, F,
[=](SPIRVFunctionParameter *Arg){
std::string Qual;
if (Arg->hasDecorate(DecorationVolatile))
@@ -2451,8 +2662,7 @@ SPIRVToLLVM::transKernelMetadata() { return MDString::get(*Context, Qual);
});
// Generate metadata for kernel_arg_base_type
- addOCLKernelArgumentMetadata(Context, KernelMD,
- SPIR_MD_KERNEL_ARG_BASE_TYPE, BF,
+ addOCLKernelArgumentMetadata(Context, SPIR_MD_KERNEL_ARG_BASE_TYPE, BF, F,
[=](SPIRVFunctionParameter *Arg){
return transOCLKernelArgTypeName(Arg);
});
@@ -2463,37 +2673,32 @@ SPIRVToLLVM::transKernelMetadata() { ArgHasName &= !Arg->getName().empty();
});
if (ArgHasName)
- addOCLKernelArgumentMetadata(Context, KernelMD,
- SPIR_MD_KERNEL_ARG_NAME, BF,
+ addOCLKernelArgumentMetadata(Context, SPIR_MD_KERNEL_ARG_NAME, BF, F,
[=](SPIRVFunctionParameter *Arg){
return MDString::get(*Context, Arg->getName());
});
}
// Generate metadata for reqd_work_group_size
if (auto EM = BF->getExecutionMode(ExecutionModeLocalSize)) {
- KernelMD.push_back(getMDNodeStringIntVec(Context,
- kSPIR2MD::WGSize, EM->getLiterals()));
+ F->setMetadata(kSPIR2MD::WGSize,
+ getMDNodeStringIntVec(Context, EM->getLiterals()));
}
// Generate metadata for work_group_size_hint
if (auto EM = BF->getExecutionMode(ExecutionModeLocalSizeHint)) {
- KernelMD.push_back(getMDNodeStringIntVec(Context,
- kSPIR2MD::WGSizeHint, EM->getLiterals()));
+ F->setMetadata(kSPIR2MD::WGSizeHint,
+ getMDNodeStringIntVec(Context, EM->getLiterals()));
}
// Generate metadata for vec_type_hint
if (auto EM = BF->getExecutionMode(ExecutionModeVecTypeHint)) {
std::vector<Metadata*> MetadataVec;
- MetadataVec.push_back(MDString::get(*Context, kSPIR2MD::VecTyHint));
Type *VecHintTy = decodeVecTypeHint(*Context, EM->getLiterals()[0]);
assert(VecHintTy);
MetadataVec.push_back(ValueAsMetadata::get(UndefValue::get(VecHintTy)));
MetadataVec.push_back(
ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(*Context),
1)));
- KernelMD.push_back(MDNode::get(*Context, MetadataVec));
+ F->setMetadata(kSPIR2MD::VecTyHint, MDNode::get(*Context, MetadataVec));
}
-
- llvm::MDNode *Node = MDNode::get(*Context, KernelMD);
- KernelMDs->addOperand(Node);
}
return true;
}
diff --git a/test/ExecutionMode_SPIR_to_SPIRV.ll b/test/ExecutionMode_SPIR_to_SPIRV.ll index b8b2bb8..960b7f5 100644 --- a/test/ExecutionMode_SPIR_to_SPIRV.ll +++ b/test/ExecutionMode_SPIR_to_SPIRV.ll @@ -15,11 +15,9 @@ ; LLVM => SPIRV => LLVM checks
; CHECK-LLVM: define spir_kernel void @worker()
-; CHECK-LLVM: !opencl.kernels = !{![[KERNEL_MD:[0-9]+]]}
-
-; CHECK-LLVM-DAG: ![[EXEC_MODE_MD:[0-9]+]] = !{!"work_group_size_hint", i32 128, i32 10, i32 1}
-; CHECK-LLVM-DAG: ![[KERNEL_MD]] = !{void ()* @worker, {{.*}}![[EXEC_MODE_MD]]{{.*}}}
+; CHECK-LLVM-SAME: !work_group_size_hint [[WG:![0-9]+]]
+; CHECK-LLVM: [[WG]] = !{i32 128, i32 10, i32 1}
target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
target triple = "spir-unknown-unknown"
diff --git a/test/image_without_access_qualifier.spt b/test/image_without_access_qualifier.spt index fe88e76..98c9dbf 100644 --- a/test/image_without_access_qualifier.spt +++ b/test/image_without_access_qualifier.spt @@ -1,13 +1,13 @@ -119734787 65536 393230 34 0 -2 Capability Addresses -2 Capability Linkage -2 Capability Kernel -2 Capability Int64 -2 Capability ImageBasic +119734787 65536 393230 34 0 +2 Capability Addresses +2 Capability Linkage +2 Capability Kernel +2 Capability Int64 +2 Capability ImageBasic 5 ExtInstImport 1 "OpenCL.std" -3 MemoryModel 2 2 +3 MemoryModel 2 2 6 EntryPoint 6 13 "image_test" -3 Source 3 200000 +3 Source 3 200000 11 Name 5 "__spirv_BuiltInGlobalInvocationId" 4 Name 14 "srcImg" 4 Name 15 "samp" @@ -22,51 +22,51 @@ 7 Name 29 "TempSampledImage" 5 Name 30 "call4.old" 5 Name 33 "arrayidx" -4 Decorate 5 BuiltIn 28 -3 Decorate 5 Constant -13 Decorate 5 LinkageAttributes "__spirv_BuiltInGlobalInvocationId" Import -4 TypeInt 2 64 0 -4 TypeInt 9 32 0 -4 TypeVector 3 2 3 -4 TypePointer 4 0 3 -2 TypeVoid 6 -9 TypeImage 7 6 1 0 0 0 0 0 -2 TypeSampler 8 -4 TypeVector 10 9 4 -4 TypePointer 11 5 10 -6 TypeFunction 12 6 7 8 11 -4 TypeVector 23 9 2 -3 TypeSampledImage 28 7 -3 TypeFloat 31 32 -4 Variable 4 5 0 -3 Undef 23 24 -4 Constant 31 32 0 +4 Decorate 5 BuiltIn 28 +3 Decorate 5 Constant +13 Decorate 5 LinkageAttributes "__spirv_BuiltInGlobalInvocationId" Import +4 TypeInt 2 64 0 +4 TypeInt 9 32 0 +4 TypeVector 3 2 3 +4 TypePointer 4 0 3 +2 TypeVoid 6 +9 TypeImage 7 6 1 0 0 0 0 0 +2 TypeSampler 8 +4 TypeVector 10 9 4 +4 TypePointer 11 5 10 +6 TypeFunction 12 6 7 8 11 +4 TypeVector 23 9 2 +3 TypeSampledImage 28 7 +3 TypeFloat 31 32 +4 Variable 4 5 0 +3 Undef 23 24 +4 Constant 31 32 0 -5 Function 6 13 0 12 -3 FunctionParameter 7 14 -3 FunctionParameter 8 15 -3 FunctionParameter 11 16 +5 Function 6 13 0 12 +3 FunctionParameter 7 14 +3 FunctionParameter 8 15 +3 FunctionParameter 11 16 -2 Label 17 -4 Load 3 18 5 -5 CompositeExtract 2 19 18 0 -4 Load 3 20 5 -5 CompositeExtract 2 21 20 1 -4 UConvert 9 22 19 -6 CompositeInsert 23 25 22 24 0 -4 UConvert 9 26 21 -6 CompositeInsert 23 27 26 25 1 -5 SampledImage 28 29 14 15 -7 ImageSampleExplicitLod 10 30 29 27 2 32 -5 InBoundsPtrAccessChain 11 33 16 19 -5 Store 33 30 2 16 -1 Return +2 Label 17 +4 Load 3 18 5 +5 CompositeExtract 2 19 18 0 +4 Load 3 20 5 +5 CompositeExtract 2 21 20 1 +4 UConvert 9 22 19 +6 CompositeInsert 23 25 22 24 0 +4 UConvert 9 26 21 +6 CompositeInsert 23 27 26 25 1 +5 SampledImage 28 29 14 15 +7 ImageSampleExplicitLod 10 30 29 27 2 32 +5 InBoundsPtrAccessChain 11 33 16 19 +5 Store 33 30 2 16 +1 Return -1 FunctionEnd +1 FunctionEnd ; RUN: llvm-spirv %s -to-binary -o %t.spv ; RUN: llvm-spirv -r %t.spv -o %t.bc ; RUN: llvm-dis < %t.bc | FileCheck %s --check-prefix=CHECK-LLVM ; CHECK-LLVM: %opencl.image2d_t = type opaque -; CHECK-LLVM: !{{[0-9]*}} = !{!"kernel_arg_access_qual", !"read_only", !"none", !"none"} +; CHECK-LLVM: !{{[0-9]*}} = !{!"read_only", !"none", !"none"} diff --git a/test/transcoding/check_ro_qualifier.ll b/test/transcoding/check_ro_qualifier.ll index 8096e8c..1092f15 100644 --- a/test/transcoding/check_ro_qualifier.ll +++ b/test/transcoding/check_ro_qualifier.ll @@ -5,13 +5,18 @@ ; CHECK-LLVM: opencl.image2d_array_ro_t = type opaque
; CHECK-LLVM: define spir_kernel void @sample_kernel(%opencl.image2d_array_ro_t addrspace(1)
+; CHECK-LLVM-SAME: !kernel_arg_access_qual [[AQ:![0-9]+]]
+; CHECK-LLVM-SAME: !kernel_arg_type [[TYPE:![0-9]+]]
+; CHECK-LLVM-SAME: !kernel_arg_base_type [[BT:![0-9]+]]
+
; CHECK-LLVM: call spir_func <2 x i32> @_Z13get_image_dimPU3AS125opencl.image2d_array_ro_t(%opencl.image2d_array_ro_t addrspace(1)
; CHECK-LLVM: call spir_func i64 @_Z20get_image_array_sizePU3AS125opencl.image2d_array_ro_t(%opencl.image2d_array_ro_t addrspace(1)
; CHECK-LLVM: declare spir_func <2 x i32> @_Z13get_image_dimPU3AS125opencl.image2d_array_ro_t(%opencl.image2d_array_ro_t addrspace(1)
; CHECK-LLVM: declare spir_func i64 @_Z20get_image_array_sizePU3AS125opencl.image2d_array_ro_t(%opencl.image2d_array_ro_t addrspace(1)
-; CHECK-LLVM: !{{[0-9]+}} = !{void (%opencl.image2d_array_ro_t
-; CHECK-LLVM: !{{[0-9]+}} = !{!"kernel_arg_type", !"image2d_array_ro_t"}
-; CHECK-LLVM: !{{[0-9]+}} = !{!"kernel_arg_base_type", !"image2d_array_ro_t"}
+
+; CHECK-LLVM-DAG: [[AQ]] = !{!"read_only"}
+; CHECK-LLVM-DAG: [[TYPE]] = !{!"image2d_array_ro_t"}
+; CHECK-LLVM-DAG: [[BT]] = !{!"image2d_array_ro_t"}
; ModuleID = 'out.ll'
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
@@ -58,4 +63,4 @@ attributes #0 = { nounwind } !7 = !{i32 1, i32 2}
!8 = !{}
!9 = !{!"cl_images"}
-!10 = !{i16 6, i16 14}
\ No newline at end of file +!10 = !{i16 6, i16 14}
diff --git a/test/transcoding/check_wo_qualifier.ll b/test/transcoding/check_wo_qualifier.ll index 1f85d43..217070e 100644 --- a/test/transcoding/check_wo_qualifier.ll +++ b/test/transcoding/check_wo_qualifier.ll @@ -5,14 +5,17 @@ ; CHECK-LLVM: opencl.image2d_array_wo_t = type opaque
; CHECK-LLVM: define spir_kernel void @sample_kernel(%opencl.image2d_array_wo_t addrspace(1)
+; CHECK-LLVM-SAME: !kernel_arg_access_qual [[AQ:![0-9]+]]
+; CHECK-LLVM-SAME: !kernel_arg_type [[TYPE:![0-9]+]]
+; CHECK-LLVM-SAME: !kernel_arg_base_type [[BT:![0-9]+]]
+
; CHECK-LLVM: call spir_func <2 x i32> @_Z13get_image_dimPU3AS125opencl.image2d_array_wo_t(%opencl.image2d_array_wo_t addrspace(1)
; CHECK-LLVM: call spir_func i64 @_Z20get_image_array_sizePU3AS125opencl.image2d_array_wo_t(%opencl.image2d_array_wo_t addrspace(1)
; CHECK-LLVM: declare spir_func <2 x i32> @_Z13get_image_dimPU3AS125opencl.image2d_array_wo_t(%opencl.image2d_array_wo_t addrspace(1)
; CHECK-LLVM: declare spir_func i64 @_Z20get_image_array_sizePU3AS125opencl.image2d_array_wo_t(%opencl.image2d_array_wo_t addrspace(1)
-; CHECK-LLVM: !{{[0-9]+}} = !{void (%opencl.image2d_array_wo_t
-; CHECK-LLVM: !{{[0-9]+}} = !{!"kernel_arg_type", !"image2d_array_wo_t"}
-; CHECK-LLVM: !{{[0-9]+}} = !{!"kernel_arg_base_type", !"image2d_array_wo_t"}
-
+; CHECK-LLVM-DAG: [[AQ]] = !{!"write_only"}
+; CHECK-LLVM-DAG: [[TYPE]] = !{!"image2d_array_wo_t"}
+; CHECK-LLVM-DAG: [[BT]] = !{!"image2d_array_wo_t"}
; ModuleID = 'out.ll'
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
@@ -59,4 +62,4 @@ attributes #0 = { nounwind } !7 = !{i32 1, i32 2}
!8 = !{}
!9 = !{!"cl_images"}
-!10 = !{i16 6, i16 14}
\ No newline at end of file +!10 = !{i16 6, i16 14}
diff --git a/test/transcoding/cl-types.ll b/test/transcoding/cl-types.ll index 6c5a5bd..b466560 100644 --- a/test/transcoding/cl-types.ll +++ b/test/transcoding/cl-types.ll @@ -72,17 +72,22 @@ target triple = "spir-unknown-unknown" ; CHECK-SPIRV: 3 FunctionParameter [[IMG2D_RW]] {{[0-9]+}} ; CHECK-SPIRV: 3 FunctionParameter [[SAMP]] [[SAMP_ARG:[0-9]+]] -; CHECK-LLVM: define spir_kernel void @foo( -; CHECK-LLVM: %opencl.pipe_t addrspace(1)* nocapture %a, -; CHECK-LLVM: %opencl.pipe_t addrspace(1)* nocapture %b, -; CHECK-LLVM: %opencl.image1d_t addrspace(1)* nocapture %c1, -; CHECK-LLVM: %opencl.image2d_t addrspace(1)* nocapture %d1, -; CHECK-LLVM: %opencl.image3d_t addrspace(1)* nocapture %e1, -; CHECK-LLVM: %opencl.image2d_array_t addrspace(1)* nocapture %f1, -; CHECK-LLVM: %opencl.image1d_buffer_t addrspace(1)* nocapture %g1, -; CHECK-LLVM: %opencl.image1d_t addrspace(1)* nocapture %c2, -; CHECK-LLVM: %opencl.image2d_t addrspace(1)* nocapture %d3, -; CHECK-LLVM: i32 %s) +; CHECK-LLVM: define spir_kernel void @foo( +; CHECK-LLVM-SAME: %opencl.pipe_t addrspace(1)* nocapture %a, +; CHECK-LLVM-SAME: %opencl.pipe_t addrspace(1)* nocapture %b, +; CHECK-LLVM-SAME: %opencl.image1d_t addrspace(1)* nocapture %c1, +; CHECK-LLVM-SAME: %opencl.image2d_t addrspace(1)* nocapture %d1, +; CHECK-LLVM-SAME: %opencl.image3d_t addrspace(1)* nocapture %e1, +; CHECK-LLVM-SAME: %opencl.image2d_array_t addrspace(1)* nocapture %f1, +; CHECK-LLVM-SAME: %opencl.image1d_buffer_t addrspace(1)* nocapture %g1, +; CHECK-LLVM-SAME: %opencl.image1d_t addrspace(1)* nocapture %c2, +; CHECK-LLVM-SAME: %opencl.image2d_t addrspace(1)* nocapture %d3, +; CHECK-LLVM-SAME: i32 %s) +; CHECK-LLVM-SAME: !kernel_arg_addr_space [[AS:![0-9]+]] +; CHECK-LLVM-SAME: !kernel_arg_access_qual [[AQ:![0-9]+]] +; CHECK-LLVM-SAME: !kernel_arg_type [[TYPE:![0-9]+]] +; CHECK-LLVM-SAME: !kernel_arg_type_qual [[TQ:![0-9]+]] +; CHECK-LLVM-SAME: !kernel_arg_base_type [[BT:![0-9]+]] ; Function Attrs: nounwind readnone define spir_kernel void @foo( @@ -122,11 +127,11 @@ attributes #0 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-point !opencl.compiler.options = !{!8} !llvm.ident = !{!10} -; CHECK-LLVM-DAG: {{![0-9]+}} = !{!"kernel_arg_addr_space", i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 0} -; CHECK-LLVM-DAG: {{![0-9]+}} = !{!"kernel_arg_access_qual", !"read_only", !"write_only", !"read_only", !"read_only", !"read_only", !"read_only", !"read_only", !"write_only", !"read_write", !"none"} -; CHECK-LLVM-DAG: {{![0-9]+}} = !{!"kernel_arg_type", !"pipe", !"pipe", !"image1d_t", !"image2d_t", !"image3d_t", !"image2d_array_t", !"image1d_buffer_t", !"image1d_t", !"image2d_t", !"sampler_t"} -; CHECK-LLVM-DAG: {{![0-9]+}} = !{!"kernel_arg_base_type", !"pipe", !"pipe", !"image1d_t", !"image2d_t", !"image3d_t", !"image2d_array_t", !"image1d_buffer_t", !"image1d_t", !"image2d_t", !"sampler_t"} -; CHECK-LLVM-DAG: {{![0-9]+}} = !{!"kernel_arg_type_qual", !"pipe", !"pipe", !"", !"", !"", !"", !"", !"", !"", !""} +; CHECK-LLVM-DAG: [[AS]] = !{i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 0} +; CHECK-LLVM-DAG: [[AQ]] = !{!"read_only", !"write_only", !"read_only", !"read_only", !"read_only", !"read_only", !"read_only", !"write_only", !"read_write", !"none"} +; CHECK-LLVM-DAG: [[TYPE]] = !{!"pipe", !"pipe", !"image1d_t", !"image2d_t", !"image3d_t", !"image2d_array_t", !"image1d_buffer_t", !"image1d_t", !"image2d_t", !"sampler_t"} +; CHECK-LLVM-DAG: [[BT]] = !{!"pipe", !"pipe", !"image1d_t", !"image2d_t", !"image3d_t", !"image2d_array_t", !"image1d_buffer_t", !"image1d_t", !"image2d_t", !"sampler_t"} +; CHECK-LLVM-DAG: [[TQ]] = !{!"pipe", !"pipe", !"", !"", !"", !"", !"", !"", !"", !""} !0 = !{void (%opencl.pipe_t addrspace(1)*, %opencl.pipe_t addrspace(1)*, %opencl.image1d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*, %opencl.image3d_t addrspace(1)*, %opencl.image2d_array_t addrspace(1)*, %opencl.image1d_buffer_t addrspace(1)*, %opencl.image1d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*, i32)* @foo, !1, !2, !3, !4, !5} !1 = !{!"kernel_arg_addr_space", i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 0} diff --git a/test/transcoding/device_execution.ll b/test/transcoding/device_execution.ll index f947db3..538a1b2 100644 --- a/test/transcoding/device_execution.ll +++ b/test/transcoding/device_execution.ll @@ -24,8 +24,9 @@ ; RUN: llvm-spirv -r %t.spv -o %t.bc ; RUN: llvm-dis < %t.bc | FileCheck %s -; CHECK-NOT: @_Z14enqueue_kernel{{.*}}{{S_|S0_|S1_}} -; CHECK: @_Z14enqueue_kernel9ocl_queue{{22kernel_enqueue_flags_t|i}}9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvvE +; CHECK-NOT: @_Z14enqueue_kernel9ocl_queue{{22kernel_enqueue_flags_t|i}}9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvvE +; CHECK-NOT: @spir_block_bind +; CHECK: call i32 @__enqueue_kernel_basic_events declare spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvvE(%opencl.queue_t*, i32, %struct.ndrange_t* byval, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, %opencl.block*) #1 target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" diff --git a/test/transcoding/device_execution_multiple_blocks.ll b/test/transcoding/device_execution_multiple_blocks.ll index 69f3707..ac06106 100644 --- a/test/transcoding/device_execution_multiple_blocks.ll +++ b/test/transcoding/device_execution_multiple_blocks.ll @@ -68,30 +68,40 @@ entry: ; Function Attrs: nounwind define spir_kernel void @enqueue_block_get_kernel_preferred_work_group_size_multiple(i32 addrspace(1)* %res) #0 { +; CHECK: [[CTX:%.*]] = bitcast %0* %captured to i8* +; CHECK: [[BLOCK:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>, align 8 +; CHECK: store i8* {{.*}} @__enqueue_block_get_kernel_preferred_work_group_size_multiple_block_invoke +; CHECK: [[CAPTUREDGEP:%.*]] = getelementptr <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>* [[BLOCK]], i32 0, i32 5 +; CHECK: [[CAPTUREDCAST:%.*]] = bitcast [8 x i8]* [[CAPTUREDGEP]] to i8* +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[CAPTUREDCAST]], i8* [[CTX]], i32 8, i32 8, i1 false) +; CHECK: [[BLOCKBCST:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>* [[BLOCK]] to i8* +; CHECK: [[BLOCKADDRCST:%.*]] = addrspacecast i8* [[BLOCKBCST]] to i8 addrspace(4)* +; CHECK: call i32 @__get_kernel_work_group_size_impl(i8 addrspace(4)* [[BLOCKADDRCST]]) +; CHECK: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8 addrspace(4)* [[BLOCKADDRCST]]) +; CHECK: call i32 @__enqueue_kernel_basic_events({{.*}} i8 addrspace(4)* [[BLOCKADDRCST]]) + +; CHECK: [[BLOCK2:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>, align 8 +; CHECK: store i8* {{.*}} @kernelBlockNoCtx_block_invoke +; CHECK: [[BLOCKBCST2:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK2]] to i8* +; CHECK: [[BLOCKADDRCST2:%.*]] = addrspacecast i8* [[BLOCKBCST2]] to i8 addrspace(4)* +; CHECK-NOT: call void @llvm.memcpy +; CHECK: call i32 @__enqueue_kernel_basic_events({{.*}} i8 addrspace(4)* [[BLOCKADDRCST2]]) + entry: %captured = alloca <{ i32 addrspace(1)* }>, align 8 %ndrange = alloca %struct.ndrange_t, align 8 %block.captured = getelementptr inbounds <{ i32 addrspace(1)* }>, <{ i32 addrspace(1)* }>* %captured, i64 0, i32 0 store i32 addrspace(1)* %res, i32 addrspace(1)** %block.captured, align 8 %0 = bitcast <{ i32 addrspace(1)* }>* %captured to i8* -; CHECK: [[CTX:.*]] = bitcast %0* %captured to i8* %1 = call %opencl.block* @spir_block_bind(i8* bitcast (void (i8*)* @__enqueue_block_get_kernel_preferred_work_group_size_multiple_block_invoke to i8*), i32 8, i32 8, i8* %0) #2 -; CHECK: [[BLOCK0:.*]] = call {{.*}} @spir_block_bind({{.*}}@__enqueue_block_get_kernel_preferred_work_group_size_multiple_block_invoke{{.*}}, i32 8, i32 8, i8*[[CTX]]) -; CHECK: call {{.*}} @_Z26get_kernel_work_group_sizeU13block_pointerFvvE(%opencl.block*[[BLOCK0]]) %call = call spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvvE(%opencl.block* %1) #2 -; CHECK: [[BLOCK1:.*]] = call {{.*}} @spir_block_bind({{.*}}@__enqueue_block_get_kernel_preferred_work_group_size_multiple_block_invoke{{.*}}, i32 8, i32 8, i8*[[CTX]]) -; CHECK: call {{.*}} @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvvE(%opencl.block*[[BLOCK1]]) %call1 = call spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvvE(%opencl.block* %1) #2 %div = udiv i32 %call, %call1 %call2 = call spir_func %opencl.queue_t* @get_default_queue() #2 %conv = zext i32 %div to i64 %conv3 = zext i32 %call to i64 call spir_func void @_Z10ndrange_1Dmm(%struct.ndrange_t* sret %ndrange, i64 %conv, i64 %conv3) #2 -; CHECK: [[BLOCK2:.*]] = call {{.*}} @spir_block_bind({{.*}}@__enqueue_block_get_kernel_preferred_work_group_size_multiple_block_invoke{{.*}}, i32 8, i32 8, i8*[[CTX]]) -; CHECK: call {{.*}} @_Z14enqueue_kernel{{.*}}, %opencl.block*[[BLOCK2]]) %call4 = call spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tU13block_pointerFvvE(%opencl.queue_t* %call2, i32 241, %struct.ndrange_t* byval %ndrange, %opencl.block* %1) #2 -; CHECK: [[BLOCK3:.*]] = call {{.*}} @spir_block_bind({{.*}}@kernelBlockNoCtx_block_invoke{{.*}}, i32 0, i32 0, i8* null) -; CHECK: call {{.*}} @_Z14enqueue_kernel{{.*}}, %opencl.block*[[BLOCK3]]) %2 = call %opencl.block* @spir_block_bind(i8* bitcast (void (i8*)* @kernelBlockNoCtx_block_invoke to i8*), i32 0, i32 0, i8* null) #2 %call5 = call spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tU13block_pointerFvvE(%opencl.queue_t* %call2, i32 241, %struct.ndrange_t* byval %ndrange, %opencl.block* %2) #2 ret void diff --git a/test/transcoding/device_execution_overloading.ll b/test/transcoding/device_execution_overloading.ll index b719bca..e5ee309 100644 --- a/test/transcoding/device_execution_overloading.ll +++ b/test/transcoding/device_execution_overloading.ll @@ -70,13 +70,13 @@ entry: ret void } -; CHECK: @_Z26get_kernel_work_group_sizeU13block_pointerFvPU3AS3vzE -; CHECK: @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvPU3AS3vzE -; CHECK: @_Z14enqueue_kernel9ocl_queue{{.*}}9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvPU3AS3vzEjz +; CHECK: @__get_kernel_work_group_size_impl +; CHECK: @__get_kernel_preferred_work_group_multiple_impl +; CHECK: @__enqueue_kernel_events_vaargs -; CHECK: @_Z26get_kernel_work_group_sizeU13block_pointerFvvE -; CHECK: @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvvE -; CHECK: @_Z14enqueue_kernel9ocl_queue{{.*}}9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvvE +; CHECK: @__get_kernel_work_group_size_impl +; CHECK: @__get_kernel_preferred_work_group_multiple_impl +; CHECK: @__enqueue_kernel_basic_events ; Function Attrs: nounwind define spir_kernel void @host_kernel(i32 %size, float addrspace(1)* %ptr) #0 { diff --git a/test/transcoding/device_execution_simple_local_memory.ll b/test/transcoding/device_execution_simple_local_memory.ll index cc615e5..51601eb 100644 --- a/test/transcoding/device_execution_simple_local_memory.ll +++ b/test/transcoding/device_execution_simple_local_memory.ll @@ -18,8 +18,8 @@ ;; bash$ export PATH_TO_INCLUDE= $PATH_TO_GEN/lib/clang/3.6.1/include ;; bash$ $PATH_TO_GEN/bin/clang -cc1 -x cl -cl-std=CL2.0 -triple spir64-unknonw-unknown -emit-llvm -include opencl-20.h repro.cl -o device_execution.ll -;; 1. Check mangling of device execution built-ins for blocks with local memory arguments -;; 2. Check there is an enqueue_kernel with ellipsis +;; Check that device enqueue BIs wasn't mangled + ; RUN: llvm-as %s -o %t.bc ; RUN: llvm-spirv %t.bc -o %t.spv @@ -61,11 +61,11 @@ entry: %0 = call %opencl.block* @spir_block_bind(i8* bitcast (void (i8*, i8 addrspace(3)*, i8 addrspace(3)*)* @__host_kernel_block_invoke to i8*), i32 0, i32 0, i8* null) store %opencl.block* %0, %opencl.block** %block, align 8 %1 = load %opencl.block*, %opencl.block** %block, align 8 -; CHECK: call {{.*}} @_Z26get_kernel_work_group_sizeU13block_pointerFvPU3AS3vzE +; CHECK: call {{.*}} @__get_kernel_work_group_size_impl %call = call spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvPU3AS3vzE(%opencl.block* %1) store i32 %call, i32* %wgSize, align 4 %2 = load %opencl.block*, %opencl.block** %block, align 8 -; CHECK: call {{.*}} @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvPU3AS3vzE +; CHECK: call {{.*}} @__get_kernel_preferred_work_group_multiple_impl %call1 = call spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvPU3AS3vzE(%opencl.block* %2) store i32 %call1, i32* %prefMul, align 4 %call2 = call spir_func %opencl.queue_t* @_Z17get_default_queuev() @@ -75,7 +75,7 @@ entry: %5 = load i32, i32* %wgSize, align 4 %6 = load i32, i32* %prefMul, align 4 %mul = mul i32 %5, %6 -; CHECK: call {{.*}} @_Z14enqueue_kernel{{.*}}U13block_pointerFvPU3AS3vzEjz({{.*}}, %opencl.block* {{.*}}, i32 {{.*}}, i32 {{.*}}) +; CHECK: call {{.*}} @__enqueue_kernel_events_vaargs({{.*}}, i32 {{.*}}, i32 {{.*}}) %call3 = call spir_func i32 (%opencl.queue_t*, i32, %struct.ndrange_t*, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, %opencl.block*, i32, ...) @_Z14enqueue_kernel9ocl_queuei9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvPU3AS3vzEjz(%opencl.queue_t* %call2, i32 241, %struct.ndrange_t* byval %agg.tmp, i32 0, %opencl.clk_event_t** null, %opencl.clk_event_t** null, %opencl.block* %3, i32 %4, i32 %mul) ret void } @@ -108,7 +108,7 @@ declare spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvPU3AS3vzE declare spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvPU3AS3vzE(%opencl.block*) #1 -; CHECK: declare {{.*}} @_Z14enqueue_kernel{{.*}}U13block_pointerFvPU3AS3vzEjz({{.*}}, %opencl.block*, i32, ...) +; CHECK: declare {{.*}} @__enqueue_kernel_events_vaargs{{.*}}({{.*}}, i32, ...) declare spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvPU3AS3vzEjz(%opencl.queue_t*, i32, %struct.ndrange_t* byval, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, %opencl.block*, i32, ...) #1 declare spir_func %opencl.queue_t* @_Z17get_default_queuev() #1 diff --git a/test/transcoding/device_execution_vaargs.ll b/test/transcoding/device_execution_vaargs.ll new file mode 100644 index 0000000..5c307e6 --- /dev/null +++ b/test/transcoding/device_execution_vaargs.ll @@ -0,0 +1,231 @@ +;; bash$ cat device_execution_overloading.cl +;; void device_kernel_with_local_args(__local float* ptr0, __local float* ptr1) { +;; *ptr0 = 0; +;; *ptr1 = 1; +;; } +;; +;; void device_kernel(__global float* ptr) { +;; *ptr = 3; +;; } +;; +;; __kernel void host_kernel(uint size, __global float* ptr) { +;; void(^block_with_local)(__local void*, __local void*) = ^(__local void* ptr0, __local void* ptr1){ +;; device_kernel_with_local_args(ptr0, ptr1); +;; }; +;; +;; void(^block)(void) = ^{ +;; device_kernel(ptr); +;; }; +;; +;n; uint wgSize = get_kernel_work_group_size(block_with_local); +;; uint prefMul = get_kernel_preferred_work_group_size_multiple(block_with_local); +;; enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(1), +;; 0, NULL, NULL, block_with_local, size, wgSize * prefMul); +;; +;; wgSize = get_kernel_work_group_size(block); +;; prefMul = get_kernel_preferred_work_group_size_multiple(block); +;; enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(1), +;; 0, NULL, NULL, block); +;; } +;; bash$ +;;$PATH_TO_GEN/bin/clang -cc1 -x cl -O0 -cl-std=CL2.0 -triple spir64-unknonw-unknown -include $PATH_TO_GEN/lib/clang/3.6.1/include/opencl-20.h -emit-llvm device_execution_overloading.cl -o device_execution_overloading.ll + +;; Test enqueue_kernel with and wthout vaargs + +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-spirv %t.bc -o %t.spv +; RUN: llvm-spirv -r %t.spv -o %t.bc +; RUN: llvm-dis < %t.bc | FileCheck %s + +; CHECK: %struct.__block_descriptor = type { i64, i64 } + +; CHECK: [[BLOCK:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>, align 8 +; CHECK: [[INVOKEGEP:%.*]] = getelementptr <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], i32 0, i32 3 +; CHECK: store i8* bitcast (void (i8*, i8 addrspace(3)*, i8 addrspace(3)*)* @__host_kernel_block_invoke to i8*), i8** [[INVOKEGEP]] +; CHECK: [[BLOCKDESCGEP:%.*]] = getelementptr <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], i32 0, i32 4 +; CHECK: store %struct.__block_descriptor* @__block_descriptor_spirv, %struct.__block_descriptor** [[BLOCKDESCGEP]] +; CHECK: [[BLOCKBCAST:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]] to i8* +; CHECK: [[BLOCKADDRCAST:%.*]] = addrspacecast i8* [[BLOCKBCAST]] to i8 addrspace(4)* +; CHECK: call i32 @__get_kernel_work_group_size_impl(i8 addrspace(4)* [[BLOCKADDRCAST]]) +; CHECK: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8 addrspace(4)* [[BLOCKADDRCAST]]) +; CHECK: call i32 {{.*}} @__enqueue_kernel_events_vaargs + +; CHECK: [[BLOCK2:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>, align 8 +; CHECK: [[INVOKEGEP2:%.*]] = getelementptr <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>* [[BLOCK2]], i32 0, i32 3 +; CHECK: store i8* bitcast (void (i8*)* @__host_kernel_block_invoke_2 to i8*), i8** [[INVOKEGEP2]] +; CHECK: [[BLOCKDESCGEP2:%.*]] = getelementptr <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>* [[BLOCK2]], i32 0, i32 4 +; CHECK: store %struct.__block_descriptor* @__block_descriptor_spirv.1, %struct.__block_descriptor** [[BLOCKDESCGEP2]] +; CHECK: [[CAPTUREDGEP:%.*]] = getelementptr <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>* [[BLOCK2]], i32 0, i32 5 +; CHECK: [[CAPTUREDCAST:%.*]] = bitcast [8 x i8]* [[CAPTUREDGEP]] to i8* +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[CAPTUREDCAST]], i8* %0, i32 8, i32 8, i1 false) +; CHECK: [[BLOCKBCAST2:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>* [[BLOCK2]] to i8* +; CHECK: [[BLOCKADDRCAST2:%.*]] = addrspacecast i8* [[BLOCKBCAST2]] to i8 addrspace(4)* +; CHECK: call i32 @__get_kernel_work_group_size_impl(i8 addrspace(4)* [[BLOCKADDRCAST2]]) +; CHECK: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8 addrspace(4)* [[BLOCKADDRCAST2]]) #0 +; CHECK: call i32 @__enqueue_kernel_basic_events + +; CHECK: define internal spir_func void @__host_kernel_block_invoke( +; CHECK-NOT: bitcast i8* %.block_descriptor to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [0 x i8] }>* + +; CHECK: define internal spir_func void @__host_kernel_block_invoke_2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[INV_BLOCK:%.*]] = bitcast i8* %.block_descriptor to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>* +; CHECK-NEXT: [[INV_CAPTUREDGEP:%.*]] = getelementptr <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>* [[INV_BLOCK]], i32 0, i32 5 +; CHECK-NEXT: bitcast [8 x i8]* [[INV_CAPTUREDGEP]] to i8* +; CHECK-NEXT: br label %invoke + +; ModuleID = 'device_execution_overloading.cl' +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir64-unknonw-unknown" + +%opencl.block = type opaque +%struct.ndrange_t = type { i32, [3 x i64], [3 x i64], [3 x i64] } +%opencl.queue_t = type opaque +%opencl.clk_event_t = type opaque + +; Function Attrs: nounwind +define spir_func void @device_kernel_with_local_args(float addrspace(3)* %ptr0, float addrspace(3)* %ptr1) #0 { +entry: + %ptr0.addr = alloca float addrspace(3)*, align 8 + %ptr1.addr = alloca float addrspace(3)*, align 8 + store float addrspace(3)* %ptr0, float addrspace(3)** %ptr0.addr, align 8 + store float addrspace(3)* %ptr1, float addrspace(3)** %ptr1.addr, align 8 + %0 = load float addrspace(3)*, float addrspace(3)** %ptr0.addr, align 8 + store float 0.000000e+00, float addrspace(3)* %0, align 4 + %1 = load float addrspace(3)*, float addrspace(3)** %ptr1.addr, align 8 + store float 1.000000e+00, float addrspace(3)* %1, align 4 + ret void +} + +; Function Attrs: nounwind +define spir_func void @device_kernel(float addrspace(1)* %ptr) #0 { +entry: + %ptr.addr = alloca float addrspace(1)*, align 8 + store float addrspace(1)* %ptr, float addrspace(1)** %ptr.addr, align 8 + %0 = load float addrspace(1)*, float addrspace(1)** %ptr.addr, align 8 + store float 3.000000e+00, float addrspace(1)* %0, align 4 + ret void +} +; Function Attrs: nounwind +define spir_kernel void @host_kernel(i32 %size, float addrspace(1)* %ptr) #0 { +entry: + %size.addr = alloca i32, align 4 + %ptr.addr = alloca float addrspace(1)*, align 8 + %block_with_local = alloca %opencl.block*, align 8 + %block = alloca %opencl.block*, align 8 + %captured = alloca <{ float addrspace(1)* }>, align 8 + %wgSize = alloca i32, align 4 + %prefMul = alloca i32, align 4 + %agg.tmp = alloca %struct.ndrange_t, align 8 + %agg.tmp8 = alloca %struct.ndrange_t, align 8 + store i32 %size, i32* %size.addr, align 4 + store float addrspace(1)* %ptr, float addrspace(1)** %ptr.addr, align 8 + %0 = call %opencl.block* @spir_block_bind(i8* bitcast (void (i8*, i8 addrspace(3)*, i8 addrspace(3)*)* @__host_kernel_block_invoke to i8*), i32 0, i32 0, i8* null) + store %opencl.block* %0, %opencl.block** %block_with_local, align 8 + %block.captured = getelementptr inbounds <{ float addrspace(1)* }>, <{ float addrspace(1)* }>* %captured, i32 0, i32 0 + %1 = load float addrspace(1)*, float addrspace(1)** %ptr.addr, align 8 + store float addrspace(1)* %1, float addrspace(1)** %block.captured, align 8 + %2 = bitcast <{ float addrspace(1)* }>* %captured to i8* + %3 = call %opencl.block* @spir_block_bind(i8* bitcast (void (i8*)* @__host_kernel_block_invoke_2 to i8*), i32 8, i32 8, i8* %2) + store %opencl.block* %3, %opencl.block** %block, align 8 + %4 = load %opencl.block*, %opencl.block** %block_with_local, align 8 + %call = call spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvPU3AS3vzE(%opencl.block* %4) + store i32 %call, i32* %wgSize, align 4 + %5 = load %opencl.block*, %opencl.block** %block_with_local, align 8 + %call2 = call spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvPU3AS3vzE(%opencl.block* %5) + store i32 %call2, i32* %prefMul, align 4 + %call3 = call spir_func %opencl.queue_t* @_Z17get_default_queuev() + call spir_func void @_Z10ndrange_1Dm(%struct.ndrange_t* sret %agg.tmp, i64 1) + %6 = load %opencl.block*, %opencl.block** %block_with_local, align 8 + %7 = load i32, i32* %size.addr, align 4 + %8 = load i32, i32* %wgSize, align 4 + %9 = load i32, i32* %prefMul, align 4 + %mul = mul i32 %8, %9 + %call4 = call spir_func i32 (%opencl.queue_t*, i32, %struct.ndrange_t*, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, %opencl.block*, i32, ...) @_Z14enqueue_kernel9ocl_queuei9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvPU3AS3vzEjz(%opencl.queue_t* %call3, i32 241, %struct.ndrange_t* byval %agg.tmp, i32 0, %opencl.clk_event_t** null, %opencl.clk_event_t** null, %opencl.block* %6, i32 %7, i32 %mul) + %10 = load %opencl.block*, %opencl.block** %block, align 8 + %call5 = call spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvvE(%opencl.block* %10) + store i32 %call5, i32* %wgSize, align 4 + %11 = load %opencl.block*, %opencl.block** %block, align 8 + %call6 = call spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvvE(%opencl.block* %11) + store i32 %call6, i32* %prefMul, align 4 + %call7 = call spir_func %opencl.queue_t* @_Z17get_default_queuev() + call spir_func void @_Z10ndrange_1Dm(%struct.ndrange_t* sret %agg.tmp8, i64 1) + %12 = load %opencl.block*, %opencl.block** %block, align 8 + %call9 = call spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvvE(%opencl.queue_t* %call7, i32 241, %struct.ndrange_t* byval %agg.tmp8, i32 0, %opencl.clk_event_t** null, %opencl.clk_event_t** null, %opencl.block* %12) + ret void +} + +; Function Attrs: nounwind +define internal spir_func void @__host_kernel_block_invoke(i8* %.block_descriptor, i8 addrspace(3)* %ptr0, i8 addrspace(3)* %ptr1) #0 { +entry: + %.block_descriptor.addr = alloca i8*, align 8 + %ptr0.addr = alloca i8 addrspace(3)*, align 8 + %ptr1.addr = alloca i8 addrspace(3)*, align 8 + %block.addr = alloca <{}>*, align 8 + store i8* %.block_descriptor, i8** %.block_descriptor.addr, align 8 + %0 = load i8*, i8** %.block_descriptor.addr + store i8 addrspace(3)* %ptr0, i8 addrspace(3)** %ptr0.addr, align 8 + store i8 addrspace(3)* %ptr1, i8 addrspace(3)** %ptr1.addr, align 8 + %block = bitcast i8* %.block_descriptor to <{}>* + store <{}>* %block, <{}>** %block.addr, align 8 + %1 = load i8 addrspace(3)*, i8 addrspace(3)** %ptr0.addr, align 8 + %2 = bitcast i8 addrspace(3)* %1 to float addrspace(3)* + %3 = load i8 addrspace(3)*, i8 addrspace(3)** %ptr1.addr, align 8 + %4 = bitcast i8 addrspace(3)* %3 to float addrspace(3)* + call spir_func void @device_kernel_with_local_args(float addrspace(3)* %2, float addrspace(3)* %4) + ret void +} + +declare %opencl.block* @spir_block_bind(i8*, i32, i32, i8*) + +; Function Attrs: nounwind +define internal spir_func void @__host_kernel_block_invoke_2(i8* %.block_descriptor) #0 { +entry: + %.block_descriptor.addr = alloca i8*, align 8 + %block.addr = alloca <{ float addrspace(1)* }>*, align 8 + store i8* %.block_descriptor, i8** %.block_descriptor.addr, align 8 + %0 = load i8*, i8** %.block_descriptor.addr + %block = bitcast i8* %.block_descriptor to <{ float addrspace(1)* }>* + store <{ float addrspace(1)* }>* %block, <{ float addrspace(1)* }>** %block.addr, align 8 + %block.capture.addr = getelementptr inbounds <{ float addrspace(1)* }>, <{ float addrspace(1)* }>* %block, i32 0, i32 0 + %1 = load float addrspace(1)*, float addrspace(1)** %block.capture.addr, align 8 + call spir_func void @device_kernel(float addrspace(1)* %1) + ret void +} + +declare spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvPU3AS3vzE(%opencl.block*) #1 + +declare spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvPU3AS3vzE(%opencl.block*) #1 + +declare spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvPU3AS3vzEjz(%opencl.queue_t*, i32, %struct.ndrange_t* byval, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, %opencl.block*, i32, ...) #1 + +declare spir_func %opencl.queue_t* @_Z17get_default_queuev() #1 + +declare spir_func void @_Z10ndrange_1Dm(%struct.ndrange_t* sret, i64) #1 + +declare spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvvE(%opencl.block*) #1 + +declare spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvvE(%opencl.block*) #1 + +declare spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvvE(%opencl.queue_t*, i32, %struct.ndrange_t* byval, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, %opencl.block*) #1 + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!opencl.kernels = !{!0} +!opencl.enable.FP_CONTRACT = !{} +!opencl.spir.version = !{!6} +!opencl.ocl.version = !{!7} +!opencl.used.extensions = !{!8} +!opencl.used.optional.core.features = !{!8} +!opencl.compiler.options = !{!8} + +!0 = !{void (i32, float addrspace(1)*)* @host_kernel, !1, !2, !3, !4, !5} +!1 = !{!"kernel_arg_addr_space", i32 0, i32 1} +!2 = !{!"kernel_arg_access_qual", !"none", !"none"} +!3 = !{!"kernel_arg_type", !"uint", !"float*"} +!4 = !{!"kernel_arg_base_type", !"uint", !"float*"} +!5 = !{!"kernel_arg_type_qual", !"", !""} +!6 = !{i32 1, i32 2} +!7 = !{i32 2, i32 0} +!8 = !{} diff --git a/test/transcoding/spirv-types.ll b/test/transcoding/spirv-types.ll index 7539dd7..67e5bca 100644 --- a/test/transcoding/spirv-types.ll +++ b/test/transcoding/spirv-types.ll @@ -57,7 +57,7 @@ target triple = "spir-unknown-unknown" %spirv.Pipe._0 = type opaque ; read_only pipe %spirv.Pipe._1 = type opaque ; write_only pipe %spirv.Image._void_0_0_0_0_0_0_0 = type opaque ; read_only image1d_t -%spirv.Image._int_1_0_0_0_0_0_0 = type opaque ; read_only image2d_t +%spirv.Image._int_1_0_0_0_0_0_0 = type opaque ; read_only image2d_t %spirv.Image._uint_2_0_0_0_0_0_0 = type opaque ; read_only image3d_t %spirv.Image._float_1_1_0_0_0_0_0 = type opaque; read_only image2d_depth_t %spirv.Image._half_1_0_1_0_0_0_0 = type opaque ; read_only image2d_array_t @@ -82,17 +82,22 @@ target triple = "spir-unknown-unknown" ; CHECK-SPIRV: 3 FunctionParameter [[IMG1D_WR]] {{[0-9]+}} ; CHECK-SPIRV: 3 FunctionParameter [[IMG2D_RW]] {{[0-9]+}} -; CHECK-LLVM: define spir_kernel void @foo( -; CHECK-LLVM: %opencl.pipe_t addrspace(1)* nocapture %a, -; CHECK-LLVM: %opencl.pipe_t addrspace(1)* nocapture %b, -; CHECK-LLVM: %opencl.image1d_t addrspace(1)* nocapture %c1, -; CHECK-LLVM: %opencl.image2d_t addrspace(1)* nocapture %d1, -; CHECK-LLVM: %opencl.image3d_t addrspace(1)* nocapture %e1, -; CHECK-LLVM: %opencl.image2d_array_t addrspace(1)* nocapture %f1, -; CHECK-LLVM: %opencl.image1d_buffer_t addrspace(1)* nocapture %g1, -; CHECK-LLVM: %opencl.image1d_t addrspace(1)* nocapture %c2, -; CHECK-LLVM: %opencl.image2d_t addrspace(1)* nocapture %d3) - +; CHECK-LLVM: define spir_kernel void @foo( +; CHECK-LLVM-SAME: %opencl.pipe_t addrspace(1)* nocapture %a, +; CHECK-LLVM-SAME: %opencl.pipe_t addrspace(1)* nocapture %b, +; CHECK-LLVM-SAME: %opencl.image1d_t addrspace(1)* nocapture %c1, +; CHECK-LLVM-SAME: %opencl.image2d_t addrspace(1)* nocapture %d1, +; CHECK-LLVM-SAME: %opencl.image3d_t addrspace(1)* nocapture %e1, +; CHECK-LLVM-SAME: %opencl.image2d_array_t addrspace(1)* nocapture %f1, +; CHECK-LLVM-SAME: %opencl.image1d_buffer_t addrspace(1)* nocapture %g1, +; CHECK-LLVM-SAME: %opencl.image1d_t addrspace(1)* nocapture %c2, +; CHECK-LLVM-SAME: %opencl.image2d_t addrspace(1)* nocapture %d3) +; CHECK-LLVM-SAME: !kernel_arg_addr_space [[AS:![0-9]+]] +; CHECK-LLVM-SAME: !kernel_arg_access_qual [[AQ:![0-9]+]] +; CHECK-LLVM-SAME: !kernel_arg_type [[TYPE:![0-9]+]] +; CHECK-LLVM-SAME: !kernel_arg_type_qual [[TQ:![0-9]+]] +; CHECK-LLVM-SAME: !kernel_arg_base_type [[BT:![0-9]+]] + ; Function Attrs: nounwind readnone define spir_kernel void @foo( %spirv.Pipe._0 addrspace(1)* nocapture %a, @@ -161,11 +166,11 @@ attributes #0 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-point !opencl.compiler.options = !{!8} !llvm.ident = !{!10} -; CHECK-LLVM-DAG: {{![0-9]+}} = !{!"kernel_arg_addr_space", i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1} -; CHECK-LLVM-DAG: {{![0-9]+}} = !{!"kernel_arg_access_qual", !"read_only", !"write_only", !"read_only", !"read_only", !"read_only", !"read_only", !"read_only", !"write_only", !"read_write"} -; CHECK-LLVM-DAG: {{![0-9]+}} = !{!"kernel_arg_type", !"pipe", !"pipe", !"image1d_t", !"image2d_t", !"image3d_t", !"image2d_array_t", !"image1d_buffer_t", !"image1d_t", !"image2d_t"} -; CHECK-LLVM-DAG: {{![0-9]+}} = !{!"kernel_arg_base_type", !"pipe", !"pipe", !"image1d_t", !"image2d_t", !"image3d_t", !"image2d_array_t", !"image1d_buffer_t", !"image1d_t", !"image2d_t"} -; CHECK-LLVM-DAG: {{![0-9]+}} = !{!"kernel_arg_type_qual", !"pipe", !"pipe", !"", !"", !"", !"", !"", !"", !""} +; CHECK-LLVM-DAG: [[AS]] = !{i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1} +; CHECK-LLVM-DAG: [[AQ]] = !{!"read_only", !"write_only", !"read_only", !"read_only", !"read_only", !"read_only", !"read_only", !"write_only", !"read_write"} +; CHECK-LLVM-DAG: [[TYPE]] = !{!"pipe", !"pipe", !"image1d_t", !"image2d_t", !"image3d_t", !"image2d_array_t", !"image1d_buffer_t", !"image1d_t", !"image2d_t"} +; CHECK-LLVM-DAG: [[BT]] = !{!"pipe", !"pipe", !"image1d_t", !"image2d_t", !"image3d_t", !"image2d_array_t", !"image1d_buffer_t", !"image1d_t", !"image2d_t"} +; CHECK-LLVM-DAG: [[TQ]] = !{!"pipe", !"pipe", !"", !"", !"", !"", !"", !"", !""} !0 = !{void (%spirv.Pipe._0 addrspace(1)*, %spirv.Pipe._1 addrspace(1)*, %spirv.Image._void_0_0_0_0_0_0_0 addrspace(1)*, %spirv.Image._int_1_0_0_0_0_0_0 addrspace(1)*, %spirv.Image._uint_2_0_0_0_0_0_0 addrspace(1)*, %spirv.Image._half_1_0_1_0_0_0_0 addrspace(1)*, %spirv.Image._float_5_0_0_0_0_0_0 addrspace(1)*, %spirv.Image._void_0_0_0_0_0_0_1 addrspace(1)*, %spirv.Image._void_1_0_0_0_0_0_2 addrspace(1)*)* @foo, !1, !2, !3, !4, !5} !1 = !{!"kernel_arg_addr_space", i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1} diff --git a/test/vec_type_hint.ll b/test/vec_type_hint.ll index e3bc812..6afef29 100644 --- a/test/vec_type_hint.ll +++ b/test/vec_type_hint.ll @@ -1,15 +1,15 @@ ; kernel ; __attribute__((vec_type_hint(float4))) ; void test_float() {} -; +; ; kernel ; __attribute__((vec_type_hint(double))) ; void test_double() {} -; +; ; kernel ; __attribute__((vec_type_hint(uint4))) ; void test_uint() {} -; +; ; kernel ; __attribute__((vec_type_hint(int8))) ; void test_int() {} @@ -26,33 +26,41 @@ ; CHECK-SPIRV: {{[0-9]+}} EntryPoint {{[0-9]+}} {{[0-9]+}} "test_double" ; CHECK-SPIRV: {{[0-9]+}} EntryPoint {{[0-9]+}} {{[0-9]+}} "test_uint" ; CHECK-SPIRV: {{[0-9]+}} EntryPoint {{[0-9]+}} {{[0-9]+}} "test_int" -; CHECK-SPIRV: {{[0-9]+}} ExecutionMode {{[0-9]+}} 30 {{[0-9]+}} ; CHECK-SPIRV: {{[0-9]+}} ExecutionMode {{[0-9]+}} 30 {{[0-9]+}} -; CHECK-SPIRV: {{[0-9]+}} ExecutionMode {{[0-9]+}} 30 {{[0-9]+}} -; CHECK-SPIRV: {{[0-9]+}} ExecutionMode {{[0-9]+}} 30 {{[0-9]+}} +; CHECK-SPIRV: {{[0-9]+}} ExecutionMode {{[0-9]+}} 30 {{[0-9]+}} +; CHECK-SPIRV: {{[0-9]+}} ExecutionMode {{[0-9]+}} 30 {{[0-9]+}} +; CHECK-SPIRV: {{[0-9]+}} ExecutionMode {{[0-9]+}} 30 {{[0-9]+}} ; ModuleID = 'vec_type_hint.cl' target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" target triple = "spir64-unknown-unknown" +; CHECK-LLVM: define spir_kernel void @test_float() +; CHECK-LLVM-SAME: !vec_type_hint [[VFLOAT:![0-9]+]] ; Function Attrs: nounwind define spir_kernel void @test_float() #0 { entry: ret void } +; CHECK-LLVM: define spir_kernel void @test_double() +; CHECK-LLVM-SAME: !vec_type_hint [[VDOUBLE:![0-9]+]] ; Function Attrs: nounwind define spir_kernel void @test_double() #0 { entry: ret void } +; CHECK-LLVM: define spir_kernel void @test_uint() +; CHECK-LLVM-SAME: !vec_type_hint [[VUINT:![0-9]+]] ; Function Attrs: nounwind define spir_kernel void @test_uint() #0 { entry: ret void } +; CHECK-LLVM: define spir_kernel void @test_int() +; CHECK-LLVM-SAME: !vec_type_hint [[VINT:![0-9]+]] ; Function Attrs: nounwind define spir_kernel void @test_int() #0 { entry: @@ -70,14 +78,10 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"= !opencl.compiler.options = !{!15} !llvm.ident = !{!16} -; CHECK-LLVM: @test_float -; CHECK-LLVM: {!"vec_type_hint", <4 x float> undef, i32 1} -; CHECK-LLVM: @test_double -; CHECK-LLVM: {!"vec_type_hint", double undef, i32 1} -; CHECK-LLVM: @test_uint -; CHECK-LLVM: {!"vec_type_hint", <4 x i32> undef, i32 1} -; CHECK-LLVM: @test_int -; CHECK-LLVM: {!"vec_type_hint", <8 x i32> undef, i32 1} +; CHECK-LLVM: [[VFLOAT]] = !{<4 x float> undef, i32 1} +; CHECK-LLVM: [[VDOUBLE]] = !{double undef, i32 1} +; CHECK-LLVM: [[VUINT]] = !{<4 x i32> undef, i32 1} +; CHECK-LLVM: [[VINT]] = !{<8 x i32> undef, i32 1} !0 = !{void ()* @test_float, !1, !2, !3, !4, !5, !6} !1 = !{!"kernel_arg_addr_space"} |