summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexey Sotkin <alexey.sotkin@intel.com>2017-07-10 17:39:01 +0300
committerAlexey Sotkin <alexey.sotkin@intel.com>2018-04-03 17:17:38 +0300
commit5d0e2816c2649937ad731f7b72d04311e7fa2e65 (patch)
tree4d1af522f407dde941a01346bdc04db7c2204c43
parent223c78694394a61819ebc3e9b834dd8a38ec8678 (diff)
Upgrade SPIRV translator from LLVM 3.8 to LLVM 4.0
Change-Id: I28e88c5b38463e14a4af4fc0e49e8b95c8debef0
-rw-r--r--lib/SPIRV/SPIRVReader.cpp429
-rw-r--r--test/ExecutionMode_SPIR_to_SPIRV.ll6
-rw-r--r--test/image_without_access_qualifier.spt94
-rw-r--r--test/transcoding/check_ro_qualifier.ll13
-rw-r--r--test/transcoding/check_wo_qualifier.ll13
-rw-r--r--test/transcoding/cl-types.ll37
-rw-r--r--test/transcoding/device_execution.ll5
-rw-r--r--test/transcoding/device_execution_multiple_blocks.ll28
-rw-r--r--test/transcoding/device_execution_overloading.ll12
-rw-r--r--test/transcoding/device_execution_simple_local_memory.ll12
-rw-r--r--test/transcoding/device_execution_vaargs.ll231
-rw-r--r--test/transcoding/spirv-types.ll39
-rw-r--r--test/vec_type_hint.ll32
13 files changed, 709 insertions, 242 deletions
diff --git a/lib/SPIRV/SPIRVReader.cpp b/lib/SPIRV/SPIRVReader.cpp
index efc8952..808ea58 100644
--- a/lib/SPIRV/SPIRVReader.cpp
+++ b/lib/SPIRV/SPIRVReader.cpp
@@ -124,12 +124,12 @@ dumpLLVM(Module *M, const std::string &FName) {
}
static MDNode*
-getMDNodeStringIntVec(LLVMContext *Context, const std::string& Str,
- const std::vector<SPIRVWord>& IntVals) {
+getMDNodeStringIntVec(LLVMContext *Context,
+ const std::vector<SPIRVWord>& IntVals) {
std::vector<Metadata*> ValueVec;
- ValueVec.push_back(MDString::get(*Context, Str));
for (auto &I:IntVals)
- ValueVec.push_back(ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(*Context), I)));
+ ValueVec.push_back(ConstantAsMetadata::get(
+ ConstantInt::get(Type::getInt32Ty(*Context), I)));
return MDNode::get(*Context, ValueVec);
}
@@ -168,15 +168,14 @@ addNamedMetadataStringSet(LLVMContext *Context, Module *M,
}
static void
-addOCLKernelArgumentMetadata(LLVMContext *Context,
- std::vector<llvm::Metadata*> &KernelMD, const std::string &MDName,
- SPIRVFunction *BF, std::function<Metadata *(SPIRVFunctionParameter *)>Func){
+addOCLKernelArgumentMetadata(LLVMContext *Context, const std::string &MDName,
+ SPIRVFunction *BF, llvm::Function *Fn,
+ std::function<Metadata *(SPIRVFunctionParameter *)>Func) {
std::vector<Metadata*> ValueVec;
- ValueVec.push_back(MDString::get(*Context, MDName));
BF->foreachArgument([&](SPIRVFunctionParameter *Arg) {
ValueVec.push_back(Func(Arg));
});
- KernelMD.push_back(MDNode::get(*Context, ValueVec));
+ Fn->setMetadata(MDName, MDNode::get(*Context, ValueVec));
}
class SPIRVToLLVMDbgTran {
@@ -297,12 +296,17 @@ public:
bool CreatePlaceHolder = true);
Value *transValueWithoutDecoration(SPIRVValue *, Function *F, BasicBlock *,
bool CreatePlaceHolder = true);
+ Value *transDeviceEvent(SPIRVValue *BV, Function *F, BasicBlock *BB);
+ Value *transEnqueuedBlock(SPIRVValue *BF, SPIRVValue *BC, SPIRVValue *BCSize,
+ SPIRVValue *BCAligment, Function *F, BasicBlock *BB);
bool transDecoration(SPIRVValue *, Value *);
bool transAlign(SPIRVValue *, Value *);
Instruction *transOCLBuiltinFromExtInst(SPIRVExtInst *BC, BasicBlock *BB);
std::vector<Value *> transValue(const std::vector<SPIRVValue *>&, Function *F,
BasicBlock *);
Function *transFunction(SPIRVFunction *F);
+ Instruction *transEnqueueKernelBI(SPIRVInstruction *BI, BasicBlock *BB);
+ Instruction *transWGSizeBI(SPIRVInstruction *BI, BasicBlock *BB);
bool transFPContractMetadata();
bool transKernelMetadata();
bool transNonTemporalMetadata(Instruction *I);
@@ -328,13 +332,6 @@ public:
/// need to be post-processed to return the struct through sret argument.
bool postProcessOCLBuiltinReturnStruct(Function *F);
- /// \brief Post-process OpenCL builtin functions having block argument.
- ///
- /// These functions are translated to functions with function pointer type
- /// argument first, then post-processed to have block argument.
- bool postProcessOCLBuiltinWithFuncPointer(Function *F,
- Function::arg_iterator I);
-
/// \brief Post-process OpenCL builtin functions having array argument.
///
/// These functions are translated to functions with array type argument
@@ -387,6 +384,7 @@ public:
typedef DenseMap<SPIRVType *, Type *> SPIRVToLLVMTypeMap;
typedef DenseMap<SPIRVValue *, Value *> SPIRVToLLVMValueMap;
+ typedef DenseMap<SPIRVValue *, Value *> SPIRVBlockToLLVMStructMap;
typedef DenseMap<SPIRVFunction *, Function *> SPIRVToLLVMFunctionMap;
typedef DenseMap<GlobalVariable *, SPIRVBuiltinVariableKind> BuiltinVarMap;
@@ -402,6 +400,7 @@ private:
SPIRVToLLVMTypeMap TypeMap;
SPIRVToLLVMValueMap ValueMap;
SPIRVToLLVMFunctionMap FuncMap;
+ SPIRVBlockToLLVMStructMap BlockMap;
SPIRVToLLVMPlaceholderMap PlaceholderMap;
SPIRVToLLVMDbgTran DbgTran;
@@ -781,6 +780,7 @@ SPIRVToLLVM::transType(SPIRVType *T, bool IsClassMember) {
return 0;
}
+
std::string
SPIRVToLLVM::transTypeToOCLTypeName(SPIRVType *T, bool IsSigned) {
switch(T->getOpCode()) {
@@ -906,30 +906,30 @@ void
SPIRVToLLVM::setLLVMLoopMetadata(SPIRVLoopMerge* LM, BranchInst* BI) {
if (!LM)
return;
- llvm::MDString *Name = nullptr;
- auto Temp = MDNode::getTemporary(*Context, None);
- auto Self = MDNode::get(*Context, Temp);
- Self->replaceOperandWith(0, Self);
- MDNode::deleteTemporary(Temp);
-
- if (LM->getLoopControl() == LoopControlMaskNone) {
- BI->setMetadata("llvm.loop", Self);
- return;
- }
- else if(LM->getLoopControl() == LoopControlUnrollMask)
- Name = llvm::MDString::get(*Context, "llvm.loop.unroll.full");
- else if (LM->getLoopControl() == LoopControlDontUnrollMask)
- Name = llvm::MDString::get(*Context, "llvm.loop.unroll.disable");
- else
- return;
-
- std::vector<llvm::Metadata *> OpValues(1, Name);
- SmallVector<llvm::Metadata *, 2> Metadata;
- Metadata.push_back(llvm::MDNode::get(*Context, Self));
- Metadata.push_back(llvm::MDNode::get(*Context, OpValues));
-
- llvm::MDNode *Node = llvm::MDNode::get(*Context, Metadata);
- Node->replaceOperandWith(0, Node);
+ llvm::MDString *Name = nullptr;
+ auto Temp = MDNode::getTemporary(*Context, None);
+ auto Self = MDNode::get(*Context, Temp);
+ Self->replaceOperandWith(0, Self);
+ MDNode::deleteTemporary(Temp);
+
+ if (LM->getLoopControl() == LoopControlMaskNone) {
+ BI->setMetadata("llvm.loop", Self);
+ return;
+ }
+ else if(LM->getLoopControl() == LoopControlUnrollMask)
+ Name = llvm::MDString::get(*Context, "llvm.loop.unroll.full");
+ else if (LM->getLoopControl() == LoopControlDontUnrollMask)
+ Name = llvm::MDString::get(*Context, "llvm.loop.unroll.disable");
+ else
+ return;
+
+ std::vector<llvm::Metadata *> OpValues(1, Name);
+ SmallVector<llvm::Metadata *, 2> Metadata;
+ Metadata.push_back(llvm::MDNode::get(*Context, Self));
+ Metadata.push_back(llvm::MDNode::get(*Context, OpValues));
+
+ llvm::MDNode *Node = llvm::MDNode::get(*Context, Metadata);
+ Node->replaceOperandWith(0, Node);
BI->setMetadata("llvm.loop", Node);
}
@@ -969,6 +969,19 @@ SPIRVToLLVM::transValue(SPIRVValue *BV, Function *F, BasicBlock *BB,
}
Value *
+SPIRVToLLVM::transDeviceEvent(SPIRVValue *BV, Function *F, BasicBlock *BB) {
+ auto Val = transValue(BV, F, BB, false);
+ auto Ty = dyn_cast<PointerType>(Val->getType());
+ assert(Ty && "Invalid Device Event");
+ if (Ty->getAddressSpace() == SPIRAS_Generic)
+ return Val;
+
+ IRBuilder<> Builder(BB);
+ auto EventTy = PointerType::get(Ty->getElementType(), SPIRAS_Generic);
+ return Builder.CreateAddrSpaceCast(Val, EventTy);
+}
+
+Value *
SPIRVToLLVM::transConvertInst(SPIRVValue* BV, Function* F, BasicBlock* BB) {
SPIRVUnary* BC = static_cast<SPIRVUnary*>(BV);
auto Src = transValue(BC->getOperand(0), F, BB, BB ? true : false);
@@ -1060,16 +1073,6 @@ SPIRVToLLVM::postProcessOCL() {
for (auto I = M->begin(), E = M->end(); I != E;) {
auto F = I++;
if (F->hasName() && F->isDeclaration()) {
- DEBUG(dbgs() << "[postProcessOCL func ptr] " << *F << '\n');
- auto AI = F->arg_begin();
- if (hasFunctionPointerArg(&(*F), AI) && isDecoratedSPIRVFunc(&(*F)))
- if (!postProcessOCLBuiltinWithFuncPointer(&(*F), AI))
- return false;
- }
- }
- for (auto I = M->begin(), E = M->end(); I != E;) {
- auto F = I++;
- if (F->hasName() && F->isDeclaration()) {
DEBUG(dbgs() << "[postProcessOCL array arg] " << *F << '\n');
if (hasArrayArg(&(*F)) && oclIsBuiltin(F->getName(), &DemangledName, isCPP))
if (!postProcessOCLBuiltinWithArrayArguments(&(*F), DemangledName))
@@ -1107,40 +1110,6 @@ SPIRVToLLVM::postProcessOCLBuiltinReturnStruct(Function *F) {
}
bool
-SPIRVToLLVM::postProcessOCLBuiltinWithFuncPointer(Function* F,
- Function::arg_iterator I) {
- auto Name = undecorateSPIRVFunction(F->getName());
- std::set<Value *> InvokeFuncPtrs;
- mutateFunctionOCL (F, [=, &InvokeFuncPtrs](
- CallInst *CI, std::vector<Value *> &Args) {
- auto ALoc = std::find_if(Args.begin(), Args.end(), [](Value * elem) {
- return isFunctionPointerType(elem->getType());
- });
- assert(ALoc != Args.end() && "Buit-in must accept a pointer to function");
- assert(isa<Function>(*ALoc) && "Invalid function pointer usage");
- Value *Ctx = ALoc[1];
- Value *CtxLen = ALoc[2];
- Value *CtxAlign = ALoc[3];
- if (Name == kOCLBuiltinName::EnqueueKernel)
- assert(Args.end() - ALoc > 3);
- else
- assert(Args.end() - ALoc > 0);
- // Erase arguments what are hanled by "spir_block_bind" according to SPIR 2.0
- Args.erase(ALoc + 1, ALoc + 4);
-
- InvokeFuncPtrs.insert(*ALoc);
- // There will be as many calls to spir_block_bind as how much device execution
- // bult-ins using this block. This doesn't contradict SPIR 2.0 specification.
- *ALoc = addBlockBind(M, cast<Function>(removeCast(*ALoc)),
- Ctx, CtxLen, CtxAlign, CI);
- return Name;
- });
- for (auto &I:InvokeFuncPtrs)
- eraseIfNoUse(I);
- return true;
-}
-
-bool
SPIRVToLLVM::postProcessOCLBuiltinWithArrayArguments(Function* F,
const std::string &DemangledName) {
DEBUG(dbgs() << "[postProcessOCLBuiltinWithArrayArguments] " << *F << '\n');
@@ -1482,8 +1451,8 @@ SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F,
else if (LinkageTy == GlobalValue::CommonLinkage)
// In LLVM variables with common linkage type must be initilized by 0
Initializer = Constant::getNullValue(Ty);
- else if (BVar->getStorageClass() == SPIRVStorageClassKind::StorageClassWorkgroup)
- Initializer = dyn_cast<Constant>(UndefValue::get(Ty));
+ else if (BVar->getStorageClass() == SPIRVStorageClassKind::StorageClassWorkgroup)
+ Initializer = dyn_cast<Constant>(UndefValue::get(Ty));
SPIRVStorageClassKind BS = BVar->getStorageClass();
if (BS == StorageClassFunction && !Init) {
@@ -1955,7 +1924,13 @@ SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F,
case OpSignBitSet :
return mapValue(BV,
transOCLRelational(static_cast<SPIRVInstruction *>(BV), BB));
-
+ case OpEnqueueKernel :
+ return mapValue(BV, transEnqueueKernelBI(
+ static_cast<SPIRVInstruction *>(BV), BB));
+ case OpGetKernelWorkGroupSize :
+ case OpGetKernelPreferredWorkGroupSizeMultiple :
+ return mapValue(BV, transWGSizeBI(
+ static_cast<SPIRVInstruction *>(BV), BB));
default: {
auto OC = BV->getOpCode();
if (isSPIRVCmpInstTransToLLVMInst(static_cast<SPIRVInstruction*>(BV))) {
@@ -2129,6 +2104,249 @@ SPIRVToLLVM::transOCLBuiltinPostproc(SPIRVInstruction* BI,
return CI;
}
+static void adaptBlockInvoke(Function *Invoke, Type *BlockStructTy) {
+ // As first argument block invoke takes a pointer to captured data.
+ // We pass to block invoke whole block structure, not only captured data
+ // as it expected. So we need to update original function to unpack expected
+ // captured data and use it instead of an original argument
+ //
+ // %block = bitcast i8 addrspace(4)* to <{ ..., [X x i8] }> addrspace(4)*
+ // %block.1 = addrspacecast %block to <{ ..., [X x i8] }>*
+ // %captured = getelementptr <{ ..., [X x i8] }>, i32 0, i32 5
+ // %captured.1 = bitcast %captured to i8*
+
+ BasicBlock *BB = &(Invoke->getEntryBlock());
+ BB->splitBasicBlock(BB->begin(), "invoke");
+ auto FirstArg = &*(Invoke->arg_begin());
+ IRBuilder<> Builder(BB, BB->begin());
+
+ auto FirstArgTy = dyn_cast<PointerType>(FirstArg->getType());
+ assert(FirstArgTy && "Expects that first argument of invoke is a pointer");
+ unsigned FirstArgAS = FirstArgTy->getAddressSpace();
+
+ auto Int8PtrTy =
+ Type::getInt8PtrTy(Invoke->getParent()->getContext(), FirstArgAS);
+ auto BlockStructPtrTy = PointerType::get(BlockStructTy, FirstArgAS);
+
+ auto Int32Ty = Type::getInt32Ty(Invoke->getParent()->getContext());
+ Value *CapturedGEPIndices[2] = { ConstantInt::get(Int32Ty, 0),
+ ConstantInt::get(Int32Ty, 5) };
+ auto BlockToStructCast =
+ Builder.CreateBitCast(FirstArg, BlockStructPtrTy, "block");
+ auto CapturedGEP = Builder.CreateGEP(BlockToStructCast, CapturedGEPIndices);
+ auto CapturedToInt8Cast = Builder.CreateBitCast(CapturedGEP, Int8PtrTy);
+
+ FirstArg->replaceUsesOutsideBlock(CapturedToInt8Cast, BB);
+}
+
+static Type *getOrCreateBlockDescTy(Module *M) {
+ // Get or create block descriptor type which contains block size
+ // in the last element: %struct.__block_descriptor = type { i64, i64 }
+ auto BlockDescTy = M->getTypeByName("struct.__block_descriptor");
+ if (BlockDescTy)
+ return BlockDescTy;
+
+ auto Int64Ty = Type::getInt64Ty(M->getContext());
+ Type *BlockDescElements[2] = {/*Reserved*/ Int64Ty, /*Block size*/ Int64Ty };
+ return StructType::create(M->getContext(), BlockDescElements,
+ "struct.__block_descriptor");
+}
+
+Value *
+SPIRVToLLVM::transEnqueuedBlock(SPIRVValue *SInvoke, SPIRVValue *SCaptured,
+ SPIRVValue *SCaptSize, SPIRVValue *SCaptAlignment,
+ Function *LBI, BasicBlock *LBB) {
+ // Search if that block have been already translated
+ auto Loc = BlockMap.find(SInvoke);
+ if (Loc != BlockMap.end())
+ return Loc->second;
+
+ IRBuilder<> Builder(LBB);
+ const DataLayout &DL = M->getDataLayout();
+
+ // Translate block and its arguments from SPIRV values to LLVM
+ auto LInvoke = transFunction(static_cast<SPIRVFunction *>(SInvoke));
+ auto LCaptured = transValue(SCaptured, LBI, LBB, false);
+ auto LCaptSize =
+ dyn_cast<ConstantInt>(transValue(SCaptSize, LBI, LBB, false));
+ auto LCaptAlignment =
+ dyn_cast<ConstantInt>(transValue(SCaptAlignment, LBI, LBB, false));
+
+ // Create basic types
+ auto Int8Ty = Type::getInt8Ty(*Context);
+ auto Int32Ty = Type::getInt32Ty(*Context);
+ auto Int8PtrTy = Type::getInt8PtrTy(*Context, SPIRAS_Private);
+ auto Int8PtrTyGen = Type::getInt8PtrTy(*Context, SPIRAS_Generic);
+ auto BlockDescTy = getOrCreateBlockDescTy(M);
+ auto BlockDescPtrTy = BlockDescTy->getPointerTo(SPIRAS_Private);
+
+ // Create a block as structure:
+ // <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>
+ SmallVector<Type *, 8> BlockEls =
+ { /*isa*/ Int8PtrTy, /*flags*/ Int32Ty, /*reserved*/ Int32Ty,
+ /*invoke*/ Int8PtrTy, /*block_descriptor*/ BlockDescPtrTy };
+
+ // Add captured if any
+ // <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [X x i8] }>
+ // Note: captured data stored in structure as array of char
+ if (LCaptSize->getZExtValue() > 0)
+ BlockEls.push_back(ArrayType::get(Int8Ty, LCaptSize->getZExtValue()));
+
+ auto BlockTy = StructType::get(*Context, BlockEls, /*isPacked*/ true);
+
+ // Allocate block on the stack, then store data to it
+ auto BlockAlloca = Builder.CreateAlloca(BlockTy, nullptr, "block");
+ BlockAlloca->setAlignment(DL.getPrefTypeAlignment(BlockTy));
+
+ auto getIndices = [Int32Ty](int a, int b) -> SmallVector<Value *, 2> {
+ return { ConstantInt::get(Int32Ty, a), ConstantInt::get(Int32Ty, b) };
+ };
+
+ // 1. isa, flags and reserved fields isn't used in current implementation
+ // Fill them the same way as clang does
+ auto IsaGEP = Builder.CreateGEP(BlockAlloca, getIndices(0, 0));
+ Builder.CreateStore(ConstantPointerNull::get(Int8PtrTy), IsaGEP);
+ auto FlagsGEP = Builder.CreateGEP(BlockAlloca, getIndices(0, 1));
+ Builder.CreateStore(ConstantInt::get(Int32Ty, 1342177280), FlagsGEP);
+ auto ReservedGEP = Builder.CreateGEP(BlockAlloca, getIndices(0, 2));
+ Builder.CreateStore(ConstantInt::get(Int32Ty, 0), ReservedGEP);
+
+ // 2. Store pointer to block invoke to the structure
+ auto InvokeCast = Builder.CreateBitCast(LInvoke, Int8PtrTy, "invoke");
+ auto InvokeGEP = Builder.CreateGEP(BlockAlloca, getIndices(0, 3));
+ Builder.CreateStore(InvokeCast, InvokeGEP);
+
+ // 3. Create and store a pointer to the block descriptor global value
+ uint64_t SizeOfBlock = DL.getTypeAllocSize(BlockTy);
+
+ auto Int64Ty = Type::getInt64Ty(*Context);
+ Constant *BlockDescEls[2] = { ConstantInt::get(Int64Ty, 0),
+ ConstantInt::get(Int64Ty, SizeOfBlock) };
+ auto BlockDesc =
+ ConstantStruct::get(dyn_cast<StructType>(BlockDescTy), BlockDescEls);
+
+ auto BlockDescGV =
+ new GlobalVariable(*M, BlockDescTy, true, GlobalValue::InternalLinkage,
+ BlockDesc, "__block_descriptor_spirv");
+ auto BlockDescGEP =
+ Builder.CreateGEP(BlockAlloca, getIndices(0, 4), "block.descriptor");
+ Builder.CreateStore(BlockDescGV, BlockDescGEP);
+
+ // 4. Copy captured data to the structure
+ if (LCaptSize->getZExtValue() > 0) {
+ auto CapturedGEP =
+ Builder.CreateGEP(BlockAlloca, getIndices(0, 5), "block.captured");
+ auto CapturedGEPCast = Builder.CreateBitCast(CapturedGEP, Int8PtrTy);
+
+ // We can't make any guesses about type of captured data, so
+ // let's copy it through memcpy
+ std::string MemCpyName = "llvm.memcpy.p0i8.p0i8";
+ MemCpyName += (LCaptSize->getType()->getBitWidth() == 32) ? ".i32" : ".i64";
+ SmallVector<Type*, 8> MemCpyArgTys = {
+ Int8PtrTy, Int8PtrTy, // src and dst pointers
+ LCaptSize->getType(), // size
+ Int32Ty, // alignment
+ Type::getInt1Ty(*Context) // isVolatile
+ };
+
+ FunctionType *MemCpyTy =
+ FunctionType::get(Type::getVoidTy(*Context), MemCpyArgTys, false);
+ Function *MemCpy =
+ dyn_cast<Function>(M->getOrInsertFunction(MemCpyName, MemCpyTy));
+ assert(MemCpy && "Can't create memcpy intrinsic");
+ MemCpy->setLinkage(GlobalValue::ExternalLinkage);
+ SmallVector<Value*, 8> MemCpyArgs =
+ { CapturedGEPCast, LCaptured, LCaptSize, LCaptAlignment,
+ ConstantInt::get(Type::getInt1Ty(*Context), SCaptured->isVolatile()) };
+ Builder.CreateCall(MemCpy, MemCpyArgs);
+
+ // Fix invoke function to correctly process its first argument
+ adaptBlockInvoke(LInvoke, BlockTy);
+ }
+ auto BlockCast = Builder.CreateBitCast(BlockAlloca, Int8PtrTy);
+ auto BlockCastGen = Builder.CreateAddrSpaceCast(BlockCast, Int8PtrTyGen);
+ BlockMap[SInvoke] = BlockCastGen;
+ return BlockCastGen;
+}
+
+Instruction *
+SPIRVToLLVM::transEnqueueKernelBI(SPIRVInstruction *BI, BasicBlock *BB) {
+ Type *IntTy = Type::getInt32Ty(*Context);
+
+ // Find or create enqueue kernel BI declaration
+ auto Ops = BI->getOperands();
+ bool hasVaargs = Ops.size() > 10;
+
+ std::string FName = hasVaargs ? "__enqueue_kernel_events_vaargs"
+ : "__enqueue_kernel_basic_events";
+ Function* F = M->getFunction(FName);
+ if (!F) {
+ Type *EventTy = PointerType::get(
+ getOrCreateOpaquePtrType(M, SPIR_TYPE_NAME_CLK_EVENT_T, SPIRAS_Private),
+ SPIRAS_Generic);
+
+ SmallVector<Type *, 8> Tys = {
+ transType(Ops[0]->getType()), // queue
+ IntTy, // flags
+ transType(Ops[2]->getType()), // ndrange
+ IntTy, EventTy, EventTy, // events
+ Type::getInt8PtrTy(*Context, SPIRAS_Generic) // block
+ };
+ if (hasVaargs)
+ Tys.push_back(IntTy); // Number of variadics if any
+
+ FunctionType* FT = FunctionType::get(IntTy, Tys, hasVaargs);
+ F = Function::Create(FT, GlobalValue::ExternalLinkage, FName, M);
+ if (isFuncNoUnwind())
+ F->addFnAttr(Attribute::NoUnwind);
+ }
+
+ // Create call to enqueue kernel BI
+ SmallVector<Value *, 8> Args = {
+ transValue(Ops[0], F, BB, false), // queue
+ transValue(Ops[1], F, BB, false), // flags
+ transValue(Ops[2], F, BB, false), // ndrange
+ transValue(Ops[3], F, BB, false), // events number
+ transDeviceEvent(Ops[4], F, BB), // event_wait_list
+ transDeviceEvent(Ops[5], F, BB), // event_ret
+ transEnqueuedBlock(Ops[6], Ops[7], Ops[8], Ops[9], F, BB) // block
+ };
+
+ if (hasVaargs) {
+ Args.push_back(ConstantInt::get(IntTy, Ops.size() - 10)); // Number of vaargs
+ for (unsigned i = 10; i < Ops.size(); ++i)
+ Args.push_back(transValue(Ops[i], F, BB, false));
+ }
+ auto Call = CallInst::Create(F, Args, "", BB);
+ setName(Call, BI);
+ setAttrByCalledFunc(Call);
+ return Call;
+}
+
+Instruction *
+SPIRVToLLVM::transWGSizeBI(SPIRVInstruction *BI, BasicBlock *BB) {
+ std::string FName =
+ (BI->getOpCode() == OpGetKernelWorkGroupSize)
+ ? "__get_kernel_work_group_size_impl"
+ : "__get_kernel_preferred_work_group_multiple_impl";
+
+ Function* F = M->getFunction(FName);
+ if (!F) {
+ auto Int8PtrTyGen = Type::getInt8PtrTy(*Context, SPIRAS_Generic);
+ FunctionType* FT =
+ FunctionType::get(Type::getInt32Ty(*Context), Int8PtrTyGen, false);
+ F = Function::Create(FT, GlobalValue::ExternalLinkage, FName, M);
+ if (isFuncNoUnwind())
+ F->addFnAttr(Attribute::NoUnwind);
+ }
+ auto Ops = BI->getOperands();
+ auto Block = transEnqueuedBlock(Ops[0], Ops[1], Ops[2], Ops[3], F, BB);
+ auto Call = CallInst::Create(F, Block, "", BB);
+ setName(Call, BI);
+ setAttrByCalledFunc(Call);
+ return Call;
+}
+
Instruction *
SPIRVToLLVM::transBuiltinFromInst(const std::string& FuncName,
SPIRVInstruction* BI, BasicBlock* BB) {
@@ -2378,19 +2596,15 @@ SPIRVToLLVM::transNonTemporalMetadata(Instruction *I) {
bool
SPIRVToLLVM::transKernelMetadata() {
- NamedMDNode *KernelMDs = M->getOrInsertNamedMetadata(SPIR_MD_KERNELS);
for (unsigned I = 0, E = BM->getNumFunctions(); I != E; ++I) {
SPIRVFunction *BF = BM->getFunction(I);
Function *F = static_cast<Function *>(getTranslatedValue(BF));
assert(F && "Invalid translated function");
if (F->getCallingConv() != CallingConv::SPIR_KERNEL)
continue;
- std::vector<llvm::Metadata*> KernelMD;
- KernelMD.push_back(ValueAsMetadata::get(F));
// Generate metadata for kernel_arg_address_spaces
- addOCLKernelArgumentMetadata(Context, KernelMD,
- SPIR_MD_KERNEL_ARG_ADDR_SPACE, BF,
+ addOCLKernelArgumentMetadata(Context, SPIR_MD_KERNEL_ARG_ADDR_SPACE, BF, F,
[=](SPIRVFunctionParameter *Arg){
SPIRVType *ArgTy = Arg->getType();
SPIRAddressSpace AS = SPIRAS_Private;
@@ -2402,9 +2616,8 @@ SPIRVToLLVM::transKernelMetadata() {
ConstantInt::get(Type::getInt32Ty(*Context), AS));
});
// Generate metadata for kernel_arg_access_qual
- addOCLKernelArgumentMetadata(Context, KernelMD,
- SPIR_MD_KERNEL_ARG_ACCESS_QUAL, BF,
- [=](SPIRVFunctionParameter *Arg){
+ addOCLKernelArgumentMetadata(Context, SPIR_MD_KERNEL_ARG_ACCESS_QUAL, BF, F,
+ [=](SPIRVFunctionParameter *Arg){
std::string Qual;
auto T = Arg->getType();
if (T->isTypeOCLImage()) {
@@ -2418,14 +2631,12 @@ SPIRVToLLVM::transKernelMetadata() {
return MDString::get(*Context, Qual);
});
// Generate metadata for kernel_arg_type
- addOCLKernelArgumentMetadata(Context, KernelMD,
- SPIR_MD_KERNEL_ARG_TYPE, BF,
+ addOCLKernelArgumentMetadata(Context, SPIR_MD_KERNEL_ARG_TYPE, BF, F,
[=](SPIRVFunctionParameter *Arg){
return transOCLKernelArgTypeName(Arg);
});
// Generate metadata for kernel_arg_type_qual
- addOCLKernelArgumentMetadata(Context, KernelMD,
- SPIR_MD_KERNEL_ARG_TYPE_QUAL, BF,
+ addOCLKernelArgumentMetadata(Context, SPIR_MD_KERNEL_ARG_TYPE_QUAL, BF, F,
[=](SPIRVFunctionParameter *Arg){
std::string Qual;
if (Arg->hasDecorate(DecorationVolatile))
@@ -2451,8 +2662,7 @@ SPIRVToLLVM::transKernelMetadata() {
return MDString::get(*Context, Qual);
});
// Generate metadata for kernel_arg_base_type
- addOCLKernelArgumentMetadata(Context, KernelMD,
- SPIR_MD_KERNEL_ARG_BASE_TYPE, BF,
+ addOCLKernelArgumentMetadata(Context, SPIR_MD_KERNEL_ARG_BASE_TYPE, BF, F,
[=](SPIRVFunctionParameter *Arg){
return transOCLKernelArgTypeName(Arg);
});
@@ -2463,37 +2673,32 @@ SPIRVToLLVM::transKernelMetadata() {
ArgHasName &= !Arg->getName().empty();
});
if (ArgHasName)
- addOCLKernelArgumentMetadata(Context, KernelMD,
- SPIR_MD_KERNEL_ARG_NAME, BF,
+ addOCLKernelArgumentMetadata(Context, SPIR_MD_KERNEL_ARG_NAME, BF, F,
[=](SPIRVFunctionParameter *Arg){
return MDString::get(*Context, Arg->getName());
});
}
// Generate metadata for reqd_work_group_size
if (auto EM = BF->getExecutionMode(ExecutionModeLocalSize)) {
- KernelMD.push_back(getMDNodeStringIntVec(Context,
- kSPIR2MD::WGSize, EM->getLiterals()));
+ F->setMetadata(kSPIR2MD::WGSize,
+ getMDNodeStringIntVec(Context, EM->getLiterals()));
}
// Generate metadata for work_group_size_hint
if (auto EM = BF->getExecutionMode(ExecutionModeLocalSizeHint)) {
- KernelMD.push_back(getMDNodeStringIntVec(Context,
- kSPIR2MD::WGSizeHint, EM->getLiterals()));
+ F->setMetadata(kSPIR2MD::WGSizeHint,
+ getMDNodeStringIntVec(Context, EM->getLiterals()));
}
// Generate metadata for vec_type_hint
if (auto EM = BF->getExecutionMode(ExecutionModeVecTypeHint)) {
std::vector<Metadata*> MetadataVec;
- MetadataVec.push_back(MDString::get(*Context, kSPIR2MD::VecTyHint));
Type *VecHintTy = decodeVecTypeHint(*Context, EM->getLiterals()[0]);
assert(VecHintTy);
MetadataVec.push_back(ValueAsMetadata::get(UndefValue::get(VecHintTy)));
MetadataVec.push_back(
ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(*Context),
1)));
- KernelMD.push_back(MDNode::get(*Context, MetadataVec));
+ F->setMetadata(kSPIR2MD::VecTyHint, MDNode::get(*Context, MetadataVec));
}
-
- llvm::MDNode *Node = MDNode::get(*Context, KernelMD);
- KernelMDs->addOperand(Node);
}
return true;
}
diff --git a/test/ExecutionMode_SPIR_to_SPIRV.ll b/test/ExecutionMode_SPIR_to_SPIRV.ll
index b8b2bb8..960b7f5 100644
--- a/test/ExecutionMode_SPIR_to_SPIRV.ll
+++ b/test/ExecutionMode_SPIR_to_SPIRV.ll
@@ -15,11 +15,9 @@
; LLVM => SPIRV => LLVM checks
; CHECK-LLVM: define spir_kernel void @worker()
-; CHECK-LLVM: !opencl.kernels = !{![[KERNEL_MD:[0-9]+]]}
-
-; CHECK-LLVM-DAG: ![[EXEC_MODE_MD:[0-9]+]] = !{!"work_group_size_hint", i32 128, i32 10, i32 1}
-; CHECK-LLVM-DAG: ![[KERNEL_MD]] = !{void ()* @worker, {{.*}}![[EXEC_MODE_MD]]{{.*}}}
+; CHECK-LLVM-SAME: !work_group_size_hint [[WG:![0-9]+]]
+; CHECK-LLVM: [[WG]] = !{i32 128, i32 10, i32 1}
target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
target triple = "spir-unknown-unknown"
diff --git a/test/image_without_access_qualifier.spt b/test/image_without_access_qualifier.spt
index fe88e76..98c9dbf 100644
--- a/test/image_without_access_qualifier.spt
+++ b/test/image_without_access_qualifier.spt
@@ -1,13 +1,13 @@
-119734787 65536 393230 34 0
-2 Capability Addresses
-2 Capability Linkage
-2 Capability Kernel
-2 Capability Int64
-2 Capability ImageBasic
+119734787 65536 393230 34 0
+2 Capability Addresses
+2 Capability Linkage
+2 Capability Kernel
+2 Capability Int64
+2 Capability ImageBasic
5 ExtInstImport 1 "OpenCL.std"
-3 MemoryModel 2 2
+3 MemoryModel 2 2
6 EntryPoint 6 13 "image_test"
-3 Source 3 200000
+3 Source 3 200000
11 Name 5 "__spirv_BuiltInGlobalInvocationId"
4 Name 14 "srcImg"
4 Name 15 "samp"
@@ -22,51 +22,51 @@
7 Name 29 "TempSampledImage"
5 Name 30 "call4.old"
5 Name 33 "arrayidx"
-4 Decorate 5 BuiltIn 28
-3 Decorate 5 Constant
-13 Decorate 5 LinkageAttributes "__spirv_BuiltInGlobalInvocationId" Import
-4 TypeInt 2 64 0
-4 TypeInt 9 32 0
-4 TypeVector 3 2 3
-4 TypePointer 4 0 3
-2 TypeVoid 6
-9 TypeImage 7 6 1 0 0 0 0 0
-2 TypeSampler 8
-4 TypeVector 10 9 4
-4 TypePointer 11 5 10
-6 TypeFunction 12 6 7 8 11
-4 TypeVector 23 9 2
-3 TypeSampledImage 28 7
-3 TypeFloat 31 32
-4 Variable 4 5 0
-3 Undef 23 24
-4 Constant 31 32 0
+4 Decorate 5 BuiltIn 28
+3 Decorate 5 Constant
+13 Decorate 5 LinkageAttributes "__spirv_BuiltInGlobalInvocationId" Import
+4 TypeInt 2 64 0
+4 TypeInt 9 32 0
+4 TypeVector 3 2 3
+4 TypePointer 4 0 3
+2 TypeVoid 6
+9 TypeImage 7 6 1 0 0 0 0 0
+2 TypeSampler 8
+4 TypeVector 10 9 4
+4 TypePointer 11 5 10
+6 TypeFunction 12 6 7 8 11
+4 TypeVector 23 9 2
+3 TypeSampledImage 28 7
+3 TypeFloat 31 32
+4 Variable 4 5 0
+3 Undef 23 24
+4 Constant 31 32 0
-5 Function 6 13 0 12
-3 FunctionParameter 7 14
-3 FunctionParameter 8 15
-3 FunctionParameter 11 16
+5 Function 6 13 0 12
+3 FunctionParameter 7 14
+3 FunctionParameter 8 15
+3 FunctionParameter 11 16
-2 Label 17
-4 Load 3 18 5
-5 CompositeExtract 2 19 18 0
-4 Load 3 20 5
-5 CompositeExtract 2 21 20 1
-4 UConvert 9 22 19
-6 CompositeInsert 23 25 22 24 0
-4 UConvert 9 26 21
-6 CompositeInsert 23 27 26 25 1
-5 SampledImage 28 29 14 15
-7 ImageSampleExplicitLod 10 30 29 27 2 32
-5 InBoundsPtrAccessChain 11 33 16 19
-5 Store 33 30 2 16
-1 Return
+2 Label 17
+4 Load 3 18 5
+5 CompositeExtract 2 19 18 0
+4 Load 3 20 5
+5 CompositeExtract 2 21 20 1
+4 UConvert 9 22 19
+6 CompositeInsert 23 25 22 24 0
+4 UConvert 9 26 21
+6 CompositeInsert 23 27 26 25 1
+5 SampledImage 28 29 14 15
+7 ImageSampleExplicitLod 10 30 29 27 2 32
+5 InBoundsPtrAccessChain 11 33 16 19
+5 Store 33 30 2 16
+1 Return
-1 FunctionEnd
+1 FunctionEnd
; RUN: llvm-spirv %s -to-binary -o %t.spv
; RUN: llvm-spirv -r %t.spv -o %t.bc
; RUN: llvm-dis < %t.bc | FileCheck %s --check-prefix=CHECK-LLVM
; CHECK-LLVM: %opencl.image2d_t = type opaque
-; CHECK-LLVM: !{{[0-9]*}} = !{!"kernel_arg_access_qual", !"read_only", !"none", !"none"}
+; CHECK-LLVM: !{{[0-9]*}} = !{!"read_only", !"none", !"none"}
diff --git a/test/transcoding/check_ro_qualifier.ll b/test/transcoding/check_ro_qualifier.ll
index 8096e8c..1092f15 100644
--- a/test/transcoding/check_ro_qualifier.ll
+++ b/test/transcoding/check_ro_qualifier.ll
@@ -5,13 +5,18 @@
; CHECK-LLVM: opencl.image2d_array_ro_t = type opaque
; CHECK-LLVM: define spir_kernel void @sample_kernel(%opencl.image2d_array_ro_t addrspace(1)
+; CHECK-LLVM-SAME: !kernel_arg_access_qual [[AQ:![0-9]+]]
+; CHECK-LLVM-SAME: !kernel_arg_type [[TYPE:![0-9]+]]
+; CHECK-LLVM-SAME: !kernel_arg_base_type [[BT:![0-9]+]]
+
; CHECK-LLVM: call spir_func <2 x i32> @_Z13get_image_dimPU3AS125opencl.image2d_array_ro_t(%opencl.image2d_array_ro_t addrspace(1)
; CHECK-LLVM: call spir_func i64 @_Z20get_image_array_sizePU3AS125opencl.image2d_array_ro_t(%opencl.image2d_array_ro_t addrspace(1)
; CHECK-LLVM: declare spir_func <2 x i32> @_Z13get_image_dimPU3AS125opencl.image2d_array_ro_t(%opencl.image2d_array_ro_t addrspace(1)
; CHECK-LLVM: declare spir_func i64 @_Z20get_image_array_sizePU3AS125opencl.image2d_array_ro_t(%opencl.image2d_array_ro_t addrspace(1)
-; CHECK-LLVM: !{{[0-9]+}} = !{void (%opencl.image2d_array_ro_t
-; CHECK-LLVM: !{{[0-9]+}} = !{!"kernel_arg_type", !"image2d_array_ro_t"}
-; CHECK-LLVM: !{{[0-9]+}} = !{!"kernel_arg_base_type", !"image2d_array_ro_t"}
+
+; CHECK-LLVM-DAG: [[AQ]] = !{!"read_only"}
+; CHECK-LLVM-DAG: [[TYPE]] = !{!"image2d_array_ro_t"}
+; CHECK-LLVM-DAG: [[BT]] = !{!"image2d_array_ro_t"}
; ModuleID = 'out.ll'
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
@@ -58,4 +63,4 @@ attributes #0 = { nounwind }
!7 = !{i32 1, i32 2}
!8 = !{}
!9 = !{!"cl_images"}
-!10 = !{i16 6, i16 14} \ No newline at end of file
+!10 = !{i16 6, i16 14}
diff --git a/test/transcoding/check_wo_qualifier.ll b/test/transcoding/check_wo_qualifier.ll
index 1f85d43..217070e 100644
--- a/test/transcoding/check_wo_qualifier.ll
+++ b/test/transcoding/check_wo_qualifier.ll
@@ -5,14 +5,17 @@
; CHECK-LLVM: opencl.image2d_array_wo_t = type opaque
; CHECK-LLVM: define spir_kernel void @sample_kernel(%opencl.image2d_array_wo_t addrspace(1)
+; CHECK-LLVM-SAME: !kernel_arg_access_qual [[AQ:![0-9]+]]
+; CHECK-LLVM-SAME: !kernel_arg_type [[TYPE:![0-9]+]]
+; CHECK-LLVM-SAME: !kernel_arg_base_type [[BT:![0-9]+]]
+
; CHECK-LLVM: call spir_func <2 x i32> @_Z13get_image_dimPU3AS125opencl.image2d_array_wo_t(%opencl.image2d_array_wo_t addrspace(1)
; CHECK-LLVM: call spir_func i64 @_Z20get_image_array_sizePU3AS125opencl.image2d_array_wo_t(%opencl.image2d_array_wo_t addrspace(1)
; CHECK-LLVM: declare spir_func <2 x i32> @_Z13get_image_dimPU3AS125opencl.image2d_array_wo_t(%opencl.image2d_array_wo_t addrspace(1)
; CHECK-LLVM: declare spir_func i64 @_Z20get_image_array_sizePU3AS125opencl.image2d_array_wo_t(%opencl.image2d_array_wo_t addrspace(1)
-; CHECK-LLVM: !{{[0-9]+}} = !{void (%opencl.image2d_array_wo_t
-; CHECK-LLVM: !{{[0-9]+}} = !{!"kernel_arg_type", !"image2d_array_wo_t"}
-; CHECK-LLVM: !{{[0-9]+}} = !{!"kernel_arg_base_type", !"image2d_array_wo_t"}
-
+; CHECK-LLVM-DAG: [[AQ]] = !{!"write_only"}
+; CHECK-LLVM-DAG: [[TYPE]] = !{!"image2d_array_wo_t"}
+; CHECK-LLVM-DAG: [[BT]] = !{!"image2d_array_wo_t"}
; ModuleID = 'out.ll'
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
@@ -59,4 +62,4 @@ attributes #0 = { nounwind }
!7 = !{i32 1, i32 2}
!8 = !{}
!9 = !{!"cl_images"}
-!10 = !{i16 6, i16 14} \ No newline at end of file
+!10 = !{i16 6, i16 14}
diff --git a/test/transcoding/cl-types.ll b/test/transcoding/cl-types.ll
index 6c5a5bd..b466560 100644
--- a/test/transcoding/cl-types.ll
+++ b/test/transcoding/cl-types.ll
@@ -72,17 +72,22 @@ target triple = "spir-unknown-unknown"
; CHECK-SPIRV: 3 FunctionParameter [[IMG2D_RW]] {{[0-9]+}}
; CHECK-SPIRV: 3 FunctionParameter [[SAMP]] [[SAMP_ARG:[0-9]+]]
-; CHECK-LLVM: define spir_kernel void @foo(
-; CHECK-LLVM: %opencl.pipe_t addrspace(1)* nocapture %a,
-; CHECK-LLVM: %opencl.pipe_t addrspace(1)* nocapture %b,
-; CHECK-LLVM: %opencl.image1d_t addrspace(1)* nocapture %c1,
-; CHECK-LLVM: %opencl.image2d_t addrspace(1)* nocapture %d1,
-; CHECK-LLVM: %opencl.image3d_t addrspace(1)* nocapture %e1,
-; CHECK-LLVM: %opencl.image2d_array_t addrspace(1)* nocapture %f1,
-; CHECK-LLVM: %opencl.image1d_buffer_t addrspace(1)* nocapture %g1,
-; CHECK-LLVM: %opencl.image1d_t addrspace(1)* nocapture %c2,
-; CHECK-LLVM: %opencl.image2d_t addrspace(1)* nocapture %d3,
-; CHECK-LLVM: i32 %s)
+; CHECK-LLVM: define spir_kernel void @foo(
+; CHECK-LLVM-SAME: %opencl.pipe_t addrspace(1)* nocapture %a,
+; CHECK-LLVM-SAME: %opencl.pipe_t addrspace(1)* nocapture %b,
+; CHECK-LLVM-SAME: %opencl.image1d_t addrspace(1)* nocapture %c1,
+; CHECK-LLVM-SAME: %opencl.image2d_t addrspace(1)* nocapture %d1,
+; CHECK-LLVM-SAME: %opencl.image3d_t addrspace(1)* nocapture %e1,
+; CHECK-LLVM-SAME: %opencl.image2d_array_t addrspace(1)* nocapture %f1,
+; CHECK-LLVM-SAME: %opencl.image1d_buffer_t addrspace(1)* nocapture %g1,
+; CHECK-LLVM-SAME: %opencl.image1d_t addrspace(1)* nocapture %c2,
+; CHECK-LLVM-SAME: %opencl.image2d_t addrspace(1)* nocapture %d3,
+; CHECK-LLVM-SAME: i32 %s)
+; CHECK-LLVM-SAME: !kernel_arg_addr_space [[AS:![0-9]+]]
+; CHECK-LLVM-SAME: !kernel_arg_access_qual [[AQ:![0-9]+]]
+; CHECK-LLVM-SAME: !kernel_arg_type [[TYPE:![0-9]+]]
+; CHECK-LLVM-SAME: !kernel_arg_type_qual [[TQ:![0-9]+]]
+; CHECK-LLVM-SAME: !kernel_arg_base_type [[BT:![0-9]+]]
; Function Attrs: nounwind readnone
define spir_kernel void @foo(
@@ -122,11 +127,11 @@ attributes #0 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-point
!opencl.compiler.options = !{!8}
!llvm.ident = !{!10}
-; CHECK-LLVM-DAG: {{![0-9]+}} = !{!"kernel_arg_addr_space", i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 0}
-; CHECK-LLVM-DAG: {{![0-9]+}} = !{!"kernel_arg_access_qual", !"read_only", !"write_only", !"read_only", !"read_only", !"read_only", !"read_only", !"read_only", !"write_only", !"read_write", !"none"}
-; CHECK-LLVM-DAG: {{![0-9]+}} = !{!"kernel_arg_type", !"pipe", !"pipe", !"image1d_t", !"image2d_t", !"image3d_t", !"image2d_array_t", !"image1d_buffer_t", !"image1d_t", !"image2d_t", !"sampler_t"}
-; CHECK-LLVM-DAG: {{![0-9]+}} = !{!"kernel_arg_base_type", !"pipe", !"pipe", !"image1d_t", !"image2d_t", !"image3d_t", !"image2d_array_t", !"image1d_buffer_t", !"image1d_t", !"image2d_t", !"sampler_t"}
-; CHECK-LLVM-DAG: {{![0-9]+}} = !{!"kernel_arg_type_qual", !"pipe", !"pipe", !"", !"", !"", !"", !"", !"", !"", !""}
+; CHECK-LLVM-DAG: [[AS]] = !{i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 0}
+; CHECK-LLVM-DAG: [[AQ]] = !{!"read_only", !"write_only", !"read_only", !"read_only", !"read_only", !"read_only", !"read_only", !"write_only", !"read_write", !"none"}
+; CHECK-LLVM-DAG: [[TYPE]] = !{!"pipe", !"pipe", !"image1d_t", !"image2d_t", !"image3d_t", !"image2d_array_t", !"image1d_buffer_t", !"image1d_t", !"image2d_t", !"sampler_t"}
+; CHECK-LLVM-DAG: [[BT]] = !{!"pipe", !"pipe", !"image1d_t", !"image2d_t", !"image3d_t", !"image2d_array_t", !"image1d_buffer_t", !"image1d_t", !"image2d_t", !"sampler_t"}
+; CHECK-LLVM-DAG: [[TQ]] = !{!"pipe", !"pipe", !"", !"", !"", !"", !"", !"", !"", !""}
!0 = !{void (%opencl.pipe_t addrspace(1)*, %opencl.pipe_t addrspace(1)*, %opencl.image1d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*, %opencl.image3d_t addrspace(1)*, %opencl.image2d_array_t addrspace(1)*, %opencl.image1d_buffer_t addrspace(1)*, %opencl.image1d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*, i32)* @foo, !1, !2, !3, !4, !5}
!1 = !{!"kernel_arg_addr_space", i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 0}
diff --git a/test/transcoding/device_execution.ll b/test/transcoding/device_execution.ll
index f947db3..538a1b2 100644
--- a/test/transcoding/device_execution.ll
+++ b/test/transcoding/device_execution.ll
@@ -24,8 +24,9 @@
; RUN: llvm-spirv -r %t.spv -o %t.bc
; RUN: llvm-dis < %t.bc | FileCheck %s
-; CHECK-NOT: @_Z14enqueue_kernel{{.*}}{{S_|S0_|S1_}}
-; CHECK: @_Z14enqueue_kernel9ocl_queue{{22kernel_enqueue_flags_t|i}}9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvvE
+; CHECK-NOT: @_Z14enqueue_kernel9ocl_queue{{22kernel_enqueue_flags_t|i}}9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvvE
+; CHECK-NOT: @spir_block_bind
+; CHECK: call i32 @__enqueue_kernel_basic_events
declare spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvvE(%opencl.queue_t*, i32, %struct.ndrange_t* byval, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, %opencl.block*) #1
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
diff --git a/test/transcoding/device_execution_multiple_blocks.ll b/test/transcoding/device_execution_multiple_blocks.ll
index 69f3707..ac06106 100644
--- a/test/transcoding/device_execution_multiple_blocks.ll
+++ b/test/transcoding/device_execution_multiple_blocks.ll
@@ -68,30 +68,40 @@ entry:
; Function Attrs: nounwind
define spir_kernel void @enqueue_block_get_kernel_preferred_work_group_size_multiple(i32 addrspace(1)* %res) #0 {
+; CHECK: [[CTX:%.*]] = bitcast %0* %captured to i8*
+; CHECK: [[BLOCK:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>, align 8
+; CHECK: store i8* {{.*}} @__enqueue_block_get_kernel_preferred_work_group_size_multiple_block_invoke
+; CHECK: [[CAPTUREDGEP:%.*]] = getelementptr <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>* [[BLOCK]], i32 0, i32 5
+; CHECK: [[CAPTUREDCAST:%.*]] = bitcast [8 x i8]* [[CAPTUREDGEP]] to i8*
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[CAPTUREDCAST]], i8* [[CTX]], i32 8, i32 8, i1 false)
+; CHECK: [[BLOCKBCST:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>* [[BLOCK]] to i8*
+; CHECK: [[BLOCKADDRCST:%.*]] = addrspacecast i8* [[BLOCKBCST]] to i8 addrspace(4)*
+; CHECK: call i32 @__get_kernel_work_group_size_impl(i8 addrspace(4)* [[BLOCKADDRCST]])
+; CHECK: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8 addrspace(4)* [[BLOCKADDRCST]])
+; CHECK: call i32 @__enqueue_kernel_basic_events({{.*}} i8 addrspace(4)* [[BLOCKADDRCST]])
+
+; CHECK: [[BLOCK2:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>, align 8
+; CHECK: store i8* {{.*}} @kernelBlockNoCtx_block_invoke
+; CHECK: [[BLOCKBCST2:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK2]] to i8*
+; CHECK: [[BLOCKADDRCST2:%.*]] = addrspacecast i8* [[BLOCKBCST2]] to i8 addrspace(4)*
+; CHECK-NOT: call void @llvm.memcpy
+; CHECK: call i32 @__enqueue_kernel_basic_events({{.*}} i8 addrspace(4)* [[BLOCKADDRCST2]])
+
entry:
%captured = alloca <{ i32 addrspace(1)* }>, align 8
%ndrange = alloca %struct.ndrange_t, align 8
%block.captured = getelementptr inbounds <{ i32 addrspace(1)* }>, <{ i32 addrspace(1)* }>* %captured, i64 0, i32 0
store i32 addrspace(1)* %res, i32 addrspace(1)** %block.captured, align 8
%0 = bitcast <{ i32 addrspace(1)* }>* %captured to i8*
-; CHECK: [[CTX:.*]] = bitcast %0* %captured to i8*
%1 = call %opencl.block* @spir_block_bind(i8* bitcast (void (i8*)* @__enqueue_block_get_kernel_preferred_work_group_size_multiple_block_invoke to i8*), i32 8, i32 8, i8* %0) #2
-; CHECK: [[BLOCK0:.*]] = call {{.*}} @spir_block_bind({{.*}}@__enqueue_block_get_kernel_preferred_work_group_size_multiple_block_invoke{{.*}}, i32 8, i32 8, i8*[[CTX]])
-; CHECK: call {{.*}} @_Z26get_kernel_work_group_sizeU13block_pointerFvvE(%opencl.block*[[BLOCK0]])
%call = call spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvvE(%opencl.block* %1) #2
-; CHECK: [[BLOCK1:.*]] = call {{.*}} @spir_block_bind({{.*}}@__enqueue_block_get_kernel_preferred_work_group_size_multiple_block_invoke{{.*}}, i32 8, i32 8, i8*[[CTX]])
-; CHECK: call {{.*}} @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvvE(%opencl.block*[[BLOCK1]])
%call1 = call spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvvE(%opencl.block* %1) #2
%div = udiv i32 %call, %call1
%call2 = call spir_func %opencl.queue_t* @get_default_queue() #2
%conv = zext i32 %div to i64
%conv3 = zext i32 %call to i64
call spir_func void @_Z10ndrange_1Dmm(%struct.ndrange_t* sret %ndrange, i64 %conv, i64 %conv3) #2
-; CHECK: [[BLOCK2:.*]] = call {{.*}} @spir_block_bind({{.*}}@__enqueue_block_get_kernel_preferred_work_group_size_multiple_block_invoke{{.*}}, i32 8, i32 8, i8*[[CTX]])
-; CHECK: call {{.*}} @_Z14enqueue_kernel{{.*}}, %opencl.block*[[BLOCK2]])
%call4 = call spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tU13block_pointerFvvE(%opencl.queue_t* %call2, i32 241, %struct.ndrange_t* byval %ndrange, %opencl.block* %1) #2
-; CHECK: [[BLOCK3:.*]] = call {{.*}} @spir_block_bind({{.*}}@kernelBlockNoCtx_block_invoke{{.*}}, i32 0, i32 0, i8* null)
-; CHECK: call {{.*}} @_Z14enqueue_kernel{{.*}}, %opencl.block*[[BLOCK3]])
%2 = call %opencl.block* @spir_block_bind(i8* bitcast (void (i8*)* @kernelBlockNoCtx_block_invoke to i8*), i32 0, i32 0, i8* null) #2
%call5 = call spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tU13block_pointerFvvE(%opencl.queue_t* %call2, i32 241, %struct.ndrange_t* byval %ndrange, %opencl.block* %2) #2
ret void
diff --git a/test/transcoding/device_execution_overloading.ll b/test/transcoding/device_execution_overloading.ll
index b719bca..e5ee309 100644
--- a/test/transcoding/device_execution_overloading.ll
+++ b/test/transcoding/device_execution_overloading.ll
@@ -70,13 +70,13 @@ entry:
ret void
}
-; CHECK: @_Z26get_kernel_work_group_sizeU13block_pointerFvPU3AS3vzE
-; CHECK: @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvPU3AS3vzE
-; CHECK: @_Z14enqueue_kernel9ocl_queue{{.*}}9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvPU3AS3vzEjz
+; CHECK: @__get_kernel_work_group_size_impl
+; CHECK: @__get_kernel_preferred_work_group_multiple_impl
+; CHECK: @__enqueue_kernel_events_vaargs
-; CHECK: @_Z26get_kernel_work_group_sizeU13block_pointerFvvE
-; CHECK: @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvvE
-; CHECK: @_Z14enqueue_kernel9ocl_queue{{.*}}9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvvE
+; CHECK: @__get_kernel_work_group_size_impl
+; CHECK: @__get_kernel_preferred_work_group_multiple_impl
+; CHECK: @__enqueue_kernel_basic_events
; Function Attrs: nounwind
define spir_kernel void @host_kernel(i32 %size, float addrspace(1)* %ptr) #0 {
diff --git a/test/transcoding/device_execution_simple_local_memory.ll b/test/transcoding/device_execution_simple_local_memory.ll
index cc615e5..51601eb 100644
--- a/test/transcoding/device_execution_simple_local_memory.ll
+++ b/test/transcoding/device_execution_simple_local_memory.ll
@@ -18,8 +18,8 @@
;; bash$ export PATH_TO_INCLUDE= $PATH_TO_GEN/lib/clang/3.6.1/include
;; bash$ $PATH_TO_GEN/bin/clang -cc1 -x cl -cl-std=CL2.0 -triple spir64-unknonw-unknown -emit-llvm -include opencl-20.h repro.cl -o device_execution.ll
-;; 1. Check mangling of device execution built-ins for blocks with local memory arguments
-;; 2. Check there is an enqueue_kernel with ellipsis
+;; Check that device enqueue BIs wasn't mangled
+
; RUN: llvm-as %s -o %t.bc
; RUN: llvm-spirv %t.bc -o %t.spv
@@ -61,11 +61,11 @@ entry:
%0 = call %opencl.block* @spir_block_bind(i8* bitcast (void (i8*, i8 addrspace(3)*, i8 addrspace(3)*)* @__host_kernel_block_invoke to i8*), i32 0, i32 0, i8* null)
store %opencl.block* %0, %opencl.block** %block, align 8
%1 = load %opencl.block*, %opencl.block** %block, align 8
-; CHECK: call {{.*}} @_Z26get_kernel_work_group_sizeU13block_pointerFvPU3AS3vzE
+; CHECK: call {{.*}} @__get_kernel_work_group_size_impl
%call = call spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvPU3AS3vzE(%opencl.block* %1)
store i32 %call, i32* %wgSize, align 4
%2 = load %opencl.block*, %opencl.block** %block, align 8
-; CHECK: call {{.*}} @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvPU3AS3vzE
+; CHECK: call {{.*}} @__get_kernel_preferred_work_group_multiple_impl
%call1 = call spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvPU3AS3vzE(%opencl.block* %2)
store i32 %call1, i32* %prefMul, align 4
%call2 = call spir_func %opencl.queue_t* @_Z17get_default_queuev()
@@ -75,7 +75,7 @@ entry:
%5 = load i32, i32* %wgSize, align 4
%6 = load i32, i32* %prefMul, align 4
%mul = mul i32 %5, %6
-; CHECK: call {{.*}} @_Z14enqueue_kernel{{.*}}U13block_pointerFvPU3AS3vzEjz({{.*}}, %opencl.block* {{.*}}, i32 {{.*}}, i32 {{.*}})
+; CHECK: call {{.*}} @__enqueue_kernel_events_vaargs({{.*}}, i32 {{.*}}, i32 {{.*}})
%call3 = call spir_func i32 (%opencl.queue_t*, i32, %struct.ndrange_t*, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, %opencl.block*, i32, ...) @_Z14enqueue_kernel9ocl_queuei9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvPU3AS3vzEjz(%opencl.queue_t* %call2, i32 241, %struct.ndrange_t* byval %agg.tmp, i32 0, %opencl.clk_event_t** null, %opencl.clk_event_t** null, %opencl.block* %3, i32 %4, i32 %mul)
ret void
}
@@ -108,7 +108,7 @@ declare spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvPU3AS3vzE
declare spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvPU3AS3vzE(%opencl.block*) #1
-; CHECK: declare {{.*}} @_Z14enqueue_kernel{{.*}}U13block_pointerFvPU3AS3vzEjz({{.*}}, %opencl.block*, i32, ...)
+; CHECK: declare {{.*}} @__enqueue_kernel_events_vaargs{{.*}}({{.*}}, i32, ...)
declare spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvPU3AS3vzEjz(%opencl.queue_t*, i32, %struct.ndrange_t* byval, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, %opencl.block*, i32, ...) #1
declare spir_func %opencl.queue_t* @_Z17get_default_queuev() #1
diff --git a/test/transcoding/device_execution_vaargs.ll b/test/transcoding/device_execution_vaargs.ll
new file mode 100644
index 0000000..5c307e6
--- /dev/null
+++ b/test/transcoding/device_execution_vaargs.ll
@@ -0,0 +1,231 @@
+;; bash$ cat device_execution_overloading.cl
+;; void device_kernel_with_local_args(__local float* ptr0, __local float* ptr1) {
+;; *ptr0 = 0;
+;; *ptr1 = 1;
+;; }
+;;
+;; void device_kernel(__global float* ptr) {
+;; *ptr = 3;
+;; }
+;;
+;; __kernel void host_kernel(uint size, __global float* ptr) {
+;; void(^block_with_local)(__local void*, __local void*) = ^(__local void* ptr0, __local void* ptr1){
+;; device_kernel_with_local_args(ptr0, ptr1);
+;; };
+;;
+;; void(^block)(void) = ^{
+;; device_kernel(ptr);
+;; };
+;;
+;n; uint wgSize = get_kernel_work_group_size(block_with_local);
+;; uint prefMul = get_kernel_preferred_work_group_size_multiple(block_with_local);
+;; enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(1),
+;; 0, NULL, NULL, block_with_local, size, wgSize * prefMul);
+;;
+;; wgSize = get_kernel_work_group_size(block);
+;; prefMul = get_kernel_preferred_work_group_size_multiple(block);
+;; enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(1),
+;; 0, NULL, NULL, block);
+;; }
+;; bash$
+;;$PATH_TO_GEN/bin/clang -cc1 -x cl -O0 -cl-std=CL2.0 -triple spir64-unknonw-unknown -include $PATH_TO_GEN/lib/clang/3.6.1/include/opencl-20.h -emit-llvm device_execution_overloading.cl -o device_execution_overloading.ll
+
+;; Test enqueue_kernel with and wthout vaargs
+
+; RUN: llvm-as %s -o %t.bc
+; RUN: llvm-spirv %t.bc -o %t.spv
+; RUN: llvm-spirv -r %t.spv -o %t.bc
+; RUN: llvm-dis < %t.bc | FileCheck %s
+
+; CHECK: %struct.__block_descriptor = type { i64, i64 }
+
+; CHECK: [[BLOCK:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>, align 8
+; CHECK: [[INVOKEGEP:%.*]] = getelementptr <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], i32 0, i32 3
+; CHECK: store i8* bitcast (void (i8*, i8 addrspace(3)*, i8 addrspace(3)*)* @__host_kernel_block_invoke to i8*), i8** [[INVOKEGEP]]
+; CHECK: [[BLOCKDESCGEP:%.*]] = getelementptr <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], i32 0, i32 4
+; CHECK: store %struct.__block_descriptor* @__block_descriptor_spirv, %struct.__block_descriptor** [[BLOCKDESCGEP]]
+; CHECK: [[BLOCKBCAST:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]] to i8*
+; CHECK: [[BLOCKADDRCAST:%.*]] = addrspacecast i8* [[BLOCKBCAST]] to i8 addrspace(4)*
+; CHECK: call i32 @__get_kernel_work_group_size_impl(i8 addrspace(4)* [[BLOCKADDRCAST]])
+; CHECK: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8 addrspace(4)* [[BLOCKADDRCAST]])
+; CHECK: call i32 {{.*}} @__enqueue_kernel_events_vaargs
+
+; CHECK: [[BLOCK2:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>, align 8
+; CHECK: [[INVOKEGEP2:%.*]] = getelementptr <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>* [[BLOCK2]], i32 0, i32 3
+; CHECK: store i8* bitcast (void (i8*)* @__host_kernel_block_invoke_2 to i8*), i8** [[INVOKEGEP2]]
+; CHECK: [[BLOCKDESCGEP2:%.*]] = getelementptr <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>* [[BLOCK2]], i32 0, i32 4
+; CHECK: store %struct.__block_descriptor* @__block_descriptor_spirv.1, %struct.__block_descriptor** [[BLOCKDESCGEP2]]
+; CHECK: [[CAPTUREDGEP:%.*]] = getelementptr <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>* [[BLOCK2]], i32 0, i32 5
+; CHECK: [[CAPTUREDCAST:%.*]] = bitcast [8 x i8]* [[CAPTUREDGEP]] to i8*
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[CAPTUREDCAST]], i8* %0, i32 8, i32 8, i1 false)
+; CHECK: [[BLOCKBCAST2:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>* [[BLOCK2]] to i8*
+; CHECK: [[BLOCKADDRCAST2:%.*]] = addrspacecast i8* [[BLOCKBCAST2]] to i8 addrspace(4)*
+; CHECK: call i32 @__get_kernel_work_group_size_impl(i8 addrspace(4)* [[BLOCKADDRCAST2]])
+; CHECK: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8 addrspace(4)* [[BLOCKADDRCAST2]]) #0
+; CHECK: call i32 @__enqueue_kernel_basic_events
+
+; CHECK: define internal spir_func void @__host_kernel_block_invoke(
+; CHECK-NOT: bitcast i8* %.block_descriptor to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [0 x i8] }>*
+
+; CHECK: define internal spir_func void @__host_kernel_block_invoke_2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INV_BLOCK:%.*]] = bitcast i8* %.block_descriptor to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>*
+; CHECK-NEXT: [[INV_CAPTUREDGEP:%.*]] = getelementptr <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>* [[INV_BLOCK]], i32 0, i32 5
+; CHECK-NEXT: bitcast [8 x i8]* [[INV_CAPTUREDGEP]] to i8*
+; CHECK-NEXT: br label %invoke
+
+; ModuleID = 'device_execution_overloading.cl'
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
+target triple = "spir64-unknonw-unknown"
+
+%opencl.block = type opaque
+%struct.ndrange_t = type { i32, [3 x i64], [3 x i64], [3 x i64] }
+%opencl.queue_t = type opaque
+%opencl.clk_event_t = type opaque
+
+; Function Attrs: nounwind
+define spir_func void @device_kernel_with_local_args(float addrspace(3)* %ptr0, float addrspace(3)* %ptr1) #0 {
+entry:
+ %ptr0.addr = alloca float addrspace(3)*, align 8
+ %ptr1.addr = alloca float addrspace(3)*, align 8
+ store float addrspace(3)* %ptr0, float addrspace(3)** %ptr0.addr, align 8
+ store float addrspace(3)* %ptr1, float addrspace(3)** %ptr1.addr, align 8
+ %0 = load float addrspace(3)*, float addrspace(3)** %ptr0.addr, align 8
+ store float 0.000000e+00, float addrspace(3)* %0, align 4
+ %1 = load float addrspace(3)*, float addrspace(3)** %ptr1.addr, align 8
+ store float 1.000000e+00, float addrspace(3)* %1, align 4
+ ret void
+}
+
+; Function Attrs: nounwind
+define spir_func void @device_kernel(float addrspace(1)* %ptr) #0 {
+entry:
+ %ptr.addr = alloca float addrspace(1)*, align 8
+ store float addrspace(1)* %ptr, float addrspace(1)** %ptr.addr, align 8
+ %0 = load float addrspace(1)*, float addrspace(1)** %ptr.addr, align 8
+ store float 3.000000e+00, float addrspace(1)* %0, align 4
+ ret void
+}
+; Function Attrs: nounwind
+define spir_kernel void @host_kernel(i32 %size, float addrspace(1)* %ptr) #0 {
+entry:
+ %size.addr = alloca i32, align 4
+ %ptr.addr = alloca float addrspace(1)*, align 8
+ %block_with_local = alloca %opencl.block*, align 8
+ %block = alloca %opencl.block*, align 8
+ %captured = alloca <{ float addrspace(1)* }>, align 8
+ %wgSize = alloca i32, align 4
+ %prefMul = alloca i32, align 4
+ %agg.tmp = alloca %struct.ndrange_t, align 8
+ %agg.tmp8 = alloca %struct.ndrange_t, align 8
+ store i32 %size, i32* %size.addr, align 4
+ store float addrspace(1)* %ptr, float addrspace(1)** %ptr.addr, align 8
+ %0 = call %opencl.block* @spir_block_bind(i8* bitcast (void (i8*, i8 addrspace(3)*, i8 addrspace(3)*)* @__host_kernel_block_invoke to i8*), i32 0, i32 0, i8* null)
+ store %opencl.block* %0, %opencl.block** %block_with_local, align 8
+ %block.captured = getelementptr inbounds <{ float addrspace(1)* }>, <{ float addrspace(1)* }>* %captured, i32 0, i32 0
+ %1 = load float addrspace(1)*, float addrspace(1)** %ptr.addr, align 8
+ store float addrspace(1)* %1, float addrspace(1)** %block.captured, align 8
+ %2 = bitcast <{ float addrspace(1)* }>* %captured to i8*
+ %3 = call %opencl.block* @spir_block_bind(i8* bitcast (void (i8*)* @__host_kernel_block_invoke_2 to i8*), i32 8, i32 8, i8* %2)
+ store %opencl.block* %3, %opencl.block** %block, align 8
+ %4 = load %opencl.block*, %opencl.block** %block_with_local, align 8
+ %call = call spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvPU3AS3vzE(%opencl.block* %4)
+ store i32 %call, i32* %wgSize, align 4
+ %5 = load %opencl.block*, %opencl.block** %block_with_local, align 8
+ %call2 = call spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvPU3AS3vzE(%opencl.block* %5)
+ store i32 %call2, i32* %prefMul, align 4
+ %call3 = call spir_func %opencl.queue_t* @_Z17get_default_queuev()
+ call spir_func void @_Z10ndrange_1Dm(%struct.ndrange_t* sret %agg.tmp, i64 1)
+ %6 = load %opencl.block*, %opencl.block** %block_with_local, align 8
+ %7 = load i32, i32* %size.addr, align 4
+ %8 = load i32, i32* %wgSize, align 4
+ %9 = load i32, i32* %prefMul, align 4
+ %mul = mul i32 %8, %9
+ %call4 = call spir_func i32 (%opencl.queue_t*, i32, %struct.ndrange_t*, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, %opencl.block*, i32, ...) @_Z14enqueue_kernel9ocl_queuei9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvPU3AS3vzEjz(%opencl.queue_t* %call3, i32 241, %struct.ndrange_t* byval %agg.tmp, i32 0, %opencl.clk_event_t** null, %opencl.clk_event_t** null, %opencl.block* %6, i32 %7, i32 %mul)
+ %10 = load %opencl.block*, %opencl.block** %block, align 8
+ %call5 = call spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvvE(%opencl.block* %10)
+ store i32 %call5, i32* %wgSize, align 4
+ %11 = load %opencl.block*, %opencl.block** %block, align 8
+ %call6 = call spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvvE(%opencl.block* %11)
+ store i32 %call6, i32* %prefMul, align 4
+ %call7 = call spir_func %opencl.queue_t* @_Z17get_default_queuev()
+ call spir_func void @_Z10ndrange_1Dm(%struct.ndrange_t* sret %agg.tmp8, i64 1)
+ %12 = load %opencl.block*, %opencl.block** %block, align 8
+ %call9 = call spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvvE(%opencl.queue_t* %call7, i32 241, %struct.ndrange_t* byval %agg.tmp8, i32 0, %opencl.clk_event_t** null, %opencl.clk_event_t** null, %opencl.block* %12)
+ ret void
+}
+
+; Function Attrs: nounwind
+define internal spir_func void @__host_kernel_block_invoke(i8* %.block_descriptor, i8 addrspace(3)* %ptr0, i8 addrspace(3)* %ptr1) #0 {
+entry:
+ %.block_descriptor.addr = alloca i8*, align 8
+ %ptr0.addr = alloca i8 addrspace(3)*, align 8
+ %ptr1.addr = alloca i8 addrspace(3)*, align 8
+ %block.addr = alloca <{}>*, align 8
+ store i8* %.block_descriptor, i8** %.block_descriptor.addr, align 8
+ %0 = load i8*, i8** %.block_descriptor.addr
+ store i8 addrspace(3)* %ptr0, i8 addrspace(3)** %ptr0.addr, align 8
+ store i8 addrspace(3)* %ptr1, i8 addrspace(3)** %ptr1.addr, align 8
+ %block = bitcast i8* %.block_descriptor to <{}>*
+ store <{}>* %block, <{}>** %block.addr, align 8
+ %1 = load i8 addrspace(3)*, i8 addrspace(3)** %ptr0.addr, align 8
+ %2 = bitcast i8 addrspace(3)* %1 to float addrspace(3)*
+ %3 = load i8 addrspace(3)*, i8 addrspace(3)** %ptr1.addr, align 8
+ %4 = bitcast i8 addrspace(3)* %3 to float addrspace(3)*
+ call spir_func void @device_kernel_with_local_args(float addrspace(3)* %2, float addrspace(3)* %4)
+ ret void
+}
+
+declare %opencl.block* @spir_block_bind(i8*, i32, i32, i8*)
+
+; Function Attrs: nounwind
+define internal spir_func void @__host_kernel_block_invoke_2(i8* %.block_descriptor) #0 {
+entry:
+ %.block_descriptor.addr = alloca i8*, align 8
+ %block.addr = alloca <{ float addrspace(1)* }>*, align 8
+ store i8* %.block_descriptor, i8** %.block_descriptor.addr, align 8
+ %0 = load i8*, i8** %.block_descriptor.addr
+ %block = bitcast i8* %.block_descriptor to <{ float addrspace(1)* }>*
+ store <{ float addrspace(1)* }>* %block, <{ float addrspace(1)* }>** %block.addr, align 8
+ %block.capture.addr = getelementptr inbounds <{ float addrspace(1)* }>, <{ float addrspace(1)* }>* %block, i32 0, i32 0
+ %1 = load float addrspace(1)*, float addrspace(1)** %block.capture.addr, align 8
+ call spir_func void @device_kernel(float addrspace(1)* %1)
+ ret void
+}
+
+declare spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvPU3AS3vzE(%opencl.block*) #1
+
+declare spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvPU3AS3vzE(%opencl.block*) #1
+
+declare spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvPU3AS3vzEjz(%opencl.queue_t*, i32, %struct.ndrange_t* byval, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, %opencl.block*, i32, ...) #1
+
+declare spir_func %opencl.queue_t* @_Z17get_default_queuev() #1
+
+declare spir_func void @_Z10ndrange_1Dm(%struct.ndrange_t* sret, i64) #1
+
+declare spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvvE(%opencl.block*) #1
+
+declare spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvvE(%opencl.block*) #1
+
+declare spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvvE(%opencl.queue_t*, i32, %struct.ndrange_t* byval, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, %opencl.block*) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!opencl.kernels = !{!0}
+!opencl.enable.FP_CONTRACT = !{}
+!opencl.spir.version = !{!6}
+!opencl.ocl.version = !{!7}
+!opencl.used.extensions = !{!8}
+!opencl.used.optional.core.features = !{!8}
+!opencl.compiler.options = !{!8}
+
+!0 = !{void (i32, float addrspace(1)*)* @host_kernel, !1, !2, !3, !4, !5}
+!1 = !{!"kernel_arg_addr_space", i32 0, i32 1}
+!2 = !{!"kernel_arg_access_qual", !"none", !"none"}
+!3 = !{!"kernel_arg_type", !"uint", !"float*"}
+!4 = !{!"kernel_arg_base_type", !"uint", !"float*"}
+!5 = !{!"kernel_arg_type_qual", !"", !""}
+!6 = !{i32 1, i32 2}
+!7 = !{i32 2, i32 0}
+!8 = !{}
diff --git a/test/transcoding/spirv-types.ll b/test/transcoding/spirv-types.ll
index 7539dd7..67e5bca 100644
--- a/test/transcoding/spirv-types.ll
+++ b/test/transcoding/spirv-types.ll
@@ -57,7 +57,7 @@ target triple = "spir-unknown-unknown"
%spirv.Pipe._0 = type opaque ; read_only pipe
%spirv.Pipe._1 = type opaque ; write_only pipe
%spirv.Image._void_0_0_0_0_0_0_0 = type opaque ; read_only image1d_t
-%spirv.Image._int_1_0_0_0_0_0_0 = type opaque ; read_only image2d_t
+%spirv.Image._int_1_0_0_0_0_0_0 = type opaque ; read_only image2d_t
%spirv.Image._uint_2_0_0_0_0_0_0 = type opaque ; read_only image3d_t
%spirv.Image._float_1_1_0_0_0_0_0 = type opaque; read_only image2d_depth_t
%spirv.Image._half_1_0_1_0_0_0_0 = type opaque ; read_only image2d_array_t
@@ -82,17 +82,22 @@ target triple = "spir-unknown-unknown"
; CHECK-SPIRV: 3 FunctionParameter [[IMG1D_WR]] {{[0-9]+}}
; CHECK-SPIRV: 3 FunctionParameter [[IMG2D_RW]] {{[0-9]+}}
-; CHECK-LLVM: define spir_kernel void @foo(
-; CHECK-LLVM: %opencl.pipe_t addrspace(1)* nocapture %a,
-; CHECK-LLVM: %opencl.pipe_t addrspace(1)* nocapture %b,
-; CHECK-LLVM: %opencl.image1d_t addrspace(1)* nocapture %c1,
-; CHECK-LLVM: %opencl.image2d_t addrspace(1)* nocapture %d1,
-; CHECK-LLVM: %opencl.image3d_t addrspace(1)* nocapture %e1,
-; CHECK-LLVM: %opencl.image2d_array_t addrspace(1)* nocapture %f1,
-; CHECK-LLVM: %opencl.image1d_buffer_t addrspace(1)* nocapture %g1,
-; CHECK-LLVM: %opencl.image1d_t addrspace(1)* nocapture %c2,
-; CHECK-LLVM: %opencl.image2d_t addrspace(1)* nocapture %d3)
-
+; CHECK-LLVM: define spir_kernel void @foo(
+; CHECK-LLVM-SAME: %opencl.pipe_t addrspace(1)* nocapture %a,
+; CHECK-LLVM-SAME: %opencl.pipe_t addrspace(1)* nocapture %b,
+; CHECK-LLVM-SAME: %opencl.image1d_t addrspace(1)* nocapture %c1,
+; CHECK-LLVM-SAME: %opencl.image2d_t addrspace(1)* nocapture %d1,
+; CHECK-LLVM-SAME: %opencl.image3d_t addrspace(1)* nocapture %e1,
+; CHECK-LLVM-SAME: %opencl.image2d_array_t addrspace(1)* nocapture %f1,
+; CHECK-LLVM-SAME: %opencl.image1d_buffer_t addrspace(1)* nocapture %g1,
+; CHECK-LLVM-SAME: %opencl.image1d_t addrspace(1)* nocapture %c2,
+; CHECK-LLVM-SAME: %opencl.image2d_t addrspace(1)* nocapture %d3)
+; CHECK-LLVM-SAME: !kernel_arg_addr_space [[AS:![0-9]+]]
+; CHECK-LLVM-SAME: !kernel_arg_access_qual [[AQ:![0-9]+]]
+; CHECK-LLVM-SAME: !kernel_arg_type [[TYPE:![0-9]+]]
+; CHECK-LLVM-SAME: !kernel_arg_type_qual [[TQ:![0-9]+]]
+; CHECK-LLVM-SAME: !kernel_arg_base_type [[BT:![0-9]+]]
+
; Function Attrs: nounwind readnone
define spir_kernel void @foo(
%spirv.Pipe._0 addrspace(1)* nocapture %a,
@@ -161,11 +166,11 @@ attributes #0 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-point
!opencl.compiler.options = !{!8}
!llvm.ident = !{!10}
-; CHECK-LLVM-DAG: {{![0-9]+}} = !{!"kernel_arg_addr_space", i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1}
-; CHECK-LLVM-DAG: {{![0-9]+}} = !{!"kernel_arg_access_qual", !"read_only", !"write_only", !"read_only", !"read_only", !"read_only", !"read_only", !"read_only", !"write_only", !"read_write"}
-; CHECK-LLVM-DAG: {{![0-9]+}} = !{!"kernel_arg_type", !"pipe", !"pipe", !"image1d_t", !"image2d_t", !"image3d_t", !"image2d_array_t", !"image1d_buffer_t", !"image1d_t", !"image2d_t"}
-; CHECK-LLVM-DAG: {{![0-9]+}} = !{!"kernel_arg_base_type", !"pipe", !"pipe", !"image1d_t", !"image2d_t", !"image3d_t", !"image2d_array_t", !"image1d_buffer_t", !"image1d_t", !"image2d_t"}
-; CHECK-LLVM-DAG: {{![0-9]+}} = !{!"kernel_arg_type_qual", !"pipe", !"pipe", !"", !"", !"", !"", !"", !"", !""}
+; CHECK-LLVM-DAG: [[AS]] = !{i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1}
+; CHECK-LLVM-DAG: [[AQ]] = !{!"read_only", !"write_only", !"read_only", !"read_only", !"read_only", !"read_only", !"read_only", !"write_only", !"read_write"}
+; CHECK-LLVM-DAG: [[TYPE]] = !{!"pipe", !"pipe", !"image1d_t", !"image2d_t", !"image3d_t", !"image2d_array_t", !"image1d_buffer_t", !"image1d_t", !"image2d_t"}
+; CHECK-LLVM-DAG: [[BT]] = !{!"pipe", !"pipe", !"image1d_t", !"image2d_t", !"image3d_t", !"image2d_array_t", !"image1d_buffer_t", !"image1d_t", !"image2d_t"}
+; CHECK-LLVM-DAG: [[TQ]] = !{!"pipe", !"pipe", !"", !"", !"", !"", !"", !"", !""}
!0 = !{void (%spirv.Pipe._0 addrspace(1)*, %spirv.Pipe._1 addrspace(1)*, %spirv.Image._void_0_0_0_0_0_0_0 addrspace(1)*, %spirv.Image._int_1_0_0_0_0_0_0 addrspace(1)*, %spirv.Image._uint_2_0_0_0_0_0_0 addrspace(1)*, %spirv.Image._half_1_0_1_0_0_0_0 addrspace(1)*, %spirv.Image._float_5_0_0_0_0_0_0 addrspace(1)*, %spirv.Image._void_0_0_0_0_0_0_1 addrspace(1)*, %spirv.Image._void_1_0_0_0_0_0_2 addrspace(1)*)* @foo, !1, !2, !3, !4, !5}
!1 = !{!"kernel_arg_addr_space", i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1}
diff --git a/test/vec_type_hint.ll b/test/vec_type_hint.ll
index e3bc812..6afef29 100644
--- a/test/vec_type_hint.ll
+++ b/test/vec_type_hint.ll
@@ -1,15 +1,15 @@
; kernel
; __attribute__((vec_type_hint(float4)))
; void test_float() {}
-;
+;
; kernel
; __attribute__((vec_type_hint(double)))
; void test_double() {}
-;
+;
; kernel
; __attribute__((vec_type_hint(uint4)))
; void test_uint() {}
-;
+;
; kernel
; __attribute__((vec_type_hint(int8)))
; void test_int() {}
@@ -26,33 +26,41 @@
; CHECK-SPIRV: {{[0-9]+}} EntryPoint {{[0-9]+}} {{[0-9]+}} "test_double"
; CHECK-SPIRV: {{[0-9]+}} EntryPoint {{[0-9]+}} {{[0-9]+}} "test_uint"
; CHECK-SPIRV: {{[0-9]+}} EntryPoint {{[0-9]+}} {{[0-9]+}} "test_int"
-; CHECK-SPIRV: {{[0-9]+}} ExecutionMode {{[0-9]+}} 30 {{[0-9]+}}
; CHECK-SPIRV: {{[0-9]+}} ExecutionMode {{[0-9]+}} 30 {{[0-9]+}}
-; CHECK-SPIRV: {{[0-9]+}} ExecutionMode {{[0-9]+}} 30 {{[0-9]+}}
-; CHECK-SPIRV: {{[0-9]+}} ExecutionMode {{[0-9]+}} 30 {{[0-9]+}}
+; CHECK-SPIRV: {{[0-9]+}} ExecutionMode {{[0-9]+}} 30 {{[0-9]+}}
+; CHECK-SPIRV: {{[0-9]+}} ExecutionMode {{[0-9]+}} 30 {{[0-9]+}}
+; CHECK-SPIRV: {{[0-9]+}} ExecutionMode {{[0-9]+}} 30 {{[0-9]+}}
; ModuleID = 'vec_type_hint.cl'
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
target triple = "spir64-unknown-unknown"
+; CHECK-LLVM: define spir_kernel void @test_float()
+; CHECK-LLVM-SAME: !vec_type_hint [[VFLOAT:![0-9]+]]
; Function Attrs: nounwind
define spir_kernel void @test_float() #0 {
entry:
ret void
}
+; CHECK-LLVM: define spir_kernel void @test_double()
+; CHECK-LLVM-SAME: !vec_type_hint [[VDOUBLE:![0-9]+]]
; Function Attrs: nounwind
define spir_kernel void @test_double() #0 {
entry:
ret void
}
+; CHECK-LLVM: define spir_kernel void @test_uint()
+; CHECK-LLVM-SAME: !vec_type_hint [[VUINT:![0-9]+]]
; Function Attrs: nounwind
define spir_kernel void @test_uint() #0 {
entry:
ret void
}
+; CHECK-LLVM: define spir_kernel void @test_int()
+; CHECK-LLVM-SAME: !vec_type_hint [[VINT:![0-9]+]]
; Function Attrs: nounwind
define spir_kernel void @test_int() #0 {
entry:
@@ -70,14 +78,10 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"=
!opencl.compiler.options = !{!15}
!llvm.ident = !{!16}
-; CHECK-LLVM: @test_float
-; CHECK-LLVM: {!"vec_type_hint", <4 x float> undef, i32 1}
-; CHECK-LLVM: @test_double
-; CHECK-LLVM: {!"vec_type_hint", double undef, i32 1}
-; CHECK-LLVM: @test_uint
-; CHECK-LLVM: {!"vec_type_hint", <4 x i32> undef, i32 1}
-; CHECK-LLVM: @test_int
-; CHECK-LLVM: {!"vec_type_hint", <8 x i32> undef, i32 1}
+; CHECK-LLVM: [[VFLOAT]] = !{<4 x float> undef, i32 1}
+; CHECK-LLVM: [[VDOUBLE]] = !{double undef, i32 1}
+; CHECK-LLVM: [[VUINT]] = !{<4 x i32> undef, i32 1}
+; CHECK-LLVM: [[VINT]] = !{<8 x i32> undef, i32 1}
!0 = !{void ()* @test_float, !1, !2, !3, !4, !5, !6}
!1 = !{!"kernel_arg_addr_space"}