From 5d0e2816c2649937ad731f7b72d04311e7fa2e65 Mon Sep 17 00:00:00 2001 From: Alexey Sotkin Date: Mon, 10 Jul 2017 17:39:01 +0300 Subject: Upgrade SPIRV translator from LLVM 3.8 to LLVM 4.0 Change-Id: I28e88c5b38463e14a4af4fc0e49e8b95c8debef0 --- lib/SPIRV/SPIRVReader.cpp | 429 +++++++++++++++------ test/ExecutionMode_SPIR_to_SPIRV.ll | 6 +- test/image_without_access_qualifier.spt | 94 ++--- test/transcoding/check_ro_qualifier.ll | 13 +- test/transcoding/check_wo_qualifier.ll | 13 +- test/transcoding/cl-types.ll | 37 +- test/transcoding/device_execution.ll | 5 +- .../device_execution_multiple_blocks.ll | 28 +- test/transcoding/device_execution_overloading.ll | 12 +- .../device_execution_simple_local_memory.ll | 12 +- test/transcoding/device_execution_vaargs.ll | 231 +++++++++++ test/transcoding/spirv-types.ll | 39 +- test/vec_type_hint.ll | 32 +- 13 files changed, 709 insertions(+), 242 deletions(-) create mode 100644 test/transcoding/device_execution_vaargs.ll diff --git a/lib/SPIRV/SPIRVReader.cpp b/lib/SPIRV/SPIRVReader.cpp index efc8952..808ea58 100644 --- a/lib/SPIRV/SPIRVReader.cpp +++ b/lib/SPIRV/SPIRVReader.cpp @@ -124,12 +124,12 @@ dumpLLVM(Module *M, const std::string &FName) { } static MDNode* -getMDNodeStringIntVec(LLVMContext *Context, const std::string& Str, - const std::vector& IntVals) { +getMDNodeStringIntVec(LLVMContext *Context, + const std::vector& IntVals) { std::vector ValueVec; - ValueVec.push_back(MDString::get(*Context, Str)); for (auto &I:IntVals) - ValueVec.push_back(ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(*Context), I))); + ValueVec.push_back(ConstantAsMetadata::get( + ConstantInt::get(Type::getInt32Ty(*Context), I))); return MDNode::get(*Context, ValueVec); } @@ -168,15 +168,14 @@ addNamedMetadataStringSet(LLVMContext *Context, Module *M, } static void -addOCLKernelArgumentMetadata(LLVMContext *Context, - std::vector &KernelMD, const std::string &MDName, - SPIRVFunction *BF, std::functionFunc){ +addOCLKernelArgumentMetadata(LLVMContext *Context, const std::string &MDName, + SPIRVFunction *BF, llvm::Function *Fn, + std::functionFunc) { std::vector ValueVec; - ValueVec.push_back(MDString::get(*Context, MDName)); BF->foreachArgument([&](SPIRVFunctionParameter *Arg) { ValueVec.push_back(Func(Arg)); }); - KernelMD.push_back(MDNode::get(*Context, ValueVec)); + Fn->setMetadata(MDName, MDNode::get(*Context, ValueVec)); } class SPIRVToLLVMDbgTran { @@ -297,12 +296,17 @@ public: bool CreatePlaceHolder = true); Value *transValueWithoutDecoration(SPIRVValue *, Function *F, BasicBlock *, bool CreatePlaceHolder = true); + Value *transDeviceEvent(SPIRVValue *BV, Function *F, BasicBlock *BB); + Value *transEnqueuedBlock(SPIRVValue *BF, SPIRVValue *BC, SPIRVValue *BCSize, + SPIRVValue *BCAligment, Function *F, BasicBlock *BB); bool transDecoration(SPIRVValue *, Value *); bool transAlign(SPIRVValue *, Value *); Instruction *transOCLBuiltinFromExtInst(SPIRVExtInst *BC, BasicBlock *BB); std::vector transValue(const std::vector&, Function *F, BasicBlock *); Function *transFunction(SPIRVFunction *F); + Instruction *transEnqueueKernelBI(SPIRVInstruction *BI, BasicBlock *BB); + Instruction *transWGSizeBI(SPIRVInstruction *BI, BasicBlock *BB); bool transFPContractMetadata(); bool transKernelMetadata(); bool transNonTemporalMetadata(Instruction *I); @@ -328,13 +332,6 @@ public: /// need to be post-processed to return the struct through sret argument. bool postProcessOCLBuiltinReturnStruct(Function *F); - /// \brief Post-process OpenCL builtin functions having block argument. - /// - /// These functions are translated to functions with function pointer type - /// argument first, then post-processed to have block argument. - bool postProcessOCLBuiltinWithFuncPointer(Function *F, - Function::arg_iterator I); - /// \brief Post-process OpenCL builtin functions having array argument. /// /// These functions are translated to functions with array type argument @@ -387,6 +384,7 @@ public: typedef DenseMap SPIRVToLLVMTypeMap; typedef DenseMap SPIRVToLLVMValueMap; + typedef DenseMap SPIRVBlockToLLVMStructMap; typedef DenseMap SPIRVToLLVMFunctionMap; typedef DenseMap BuiltinVarMap; @@ -402,6 +400,7 @@ private: SPIRVToLLVMTypeMap TypeMap; SPIRVToLLVMValueMap ValueMap; SPIRVToLLVMFunctionMap FuncMap; + SPIRVBlockToLLVMStructMap BlockMap; SPIRVToLLVMPlaceholderMap PlaceholderMap; SPIRVToLLVMDbgTran DbgTran; @@ -781,6 +780,7 @@ SPIRVToLLVM::transType(SPIRVType *T, bool IsClassMember) { return 0; } + std::string SPIRVToLLVM::transTypeToOCLTypeName(SPIRVType *T, bool IsSigned) { switch(T->getOpCode()) { @@ -906,30 +906,30 @@ void SPIRVToLLVM::setLLVMLoopMetadata(SPIRVLoopMerge* LM, BranchInst* BI) { if (!LM) return; - llvm::MDString *Name = nullptr; - auto Temp = MDNode::getTemporary(*Context, None); - auto Self = MDNode::get(*Context, Temp); - Self->replaceOperandWith(0, Self); - MDNode::deleteTemporary(Temp); - - if (LM->getLoopControl() == LoopControlMaskNone) { - BI->setMetadata("llvm.loop", Self); - return; - } - else if(LM->getLoopControl() == LoopControlUnrollMask) - Name = llvm::MDString::get(*Context, "llvm.loop.unroll.full"); - else if (LM->getLoopControl() == LoopControlDontUnrollMask) - Name = llvm::MDString::get(*Context, "llvm.loop.unroll.disable"); - else - return; - - std::vector OpValues(1, Name); - SmallVector Metadata; - Metadata.push_back(llvm::MDNode::get(*Context, Self)); - Metadata.push_back(llvm::MDNode::get(*Context, OpValues)); - - llvm::MDNode *Node = llvm::MDNode::get(*Context, Metadata); - Node->replaceOperandWith(0, Node); + llvm::MDString *Name = nullptr; + auto Temp = MDNode::getTemporary(*Context, None); + auto Self = MDNode::get(*Context, Temp); + Self->replaceOperandWith(0, Self); + MDNode::deleteTemporary(Temp); + + if (LM->getLoopControl() == LoopControlMaskNone) { + BI->setMetadata("llvm.loop", Self); + return; + } + else if(LM->getLoopControl() == LoopControlUnrollMask) + Name = llvm::MDString::get(*Context, "llvm.loop.unroll.full"); + else if (LM->getLoopControl() == LoopControlDontUnrollMask) + Name = llvm::MDString::get(*Context, "llvm.loop.unroll.disable"); + else + return; + + std::vector OpValues(1, Name); + SmallVector Metadata; + Metadata.push_back(llvm::MDNode::get(*Context, Self)); + Metadata.push_back(llvm::MDNode::get(*Context, OpValues)); + + llvm::MDNode *Node = llvm::MDNode::get(*Context, Metadata); + Node->replaceOperandWith(0, Node); BI->setMetadata("llvm.loop", Node); } @@ -968,6 +968,19 @@ SPIRVToLLVM::transValue(SPIRVValue *BV, Function *F, BasicBlock *BB, return V; } +Value * +SPIRVToLLVM::transDeviceEvent(SPIRVValue *BV, Function *F, BasicBlock *BB) { + auto Val = transValue(BV, F, BB, false); + auto Ty = dyn_cast(Val->getType()); + assert(Ty && "Invalid Device Event"); + if (Ty->getAddressSpace() == SPIRAS_Generic) + return Val; + + IRBuilder<> Builder(BB); + auto EventTy = PointerType::get(Ty->getElementType(), SPIRAS_Generic); + return Builder.CreateAddrSpaceCast(Val, EventTy); +} + Value * SPIRVToLLVM::transConvertInst(SPIRVValue* BV, Function* F, BasicBlock* BB) { SPIRVUnary* BC = static_cast(BV); @@ -1057,16 +1070,6 @@ SPIRVToLLVM::postProcessOCL() { } } } - for (auto I = M->begin(), E = M->end(); I != E;) { - auto F = I++; - if (F->hasName() && F->isDeclaration()) { - DEBUG(dbgs() << "[postProcessOCL func ptr] " << *F << '\n'); - auto AI = F->arg_begin(); - if (hasFunctionPointerArg(&(*F), AI) && isDecoratedSPIRVFunc(&(*F))) - if (!postProcessOCLBuiltinWithFuncPointer(&(*F), AI)) - return false; - } - } for (auto I = M->begin(), E = M->end(); I != E;) { auto F = I++; if (F->hasName() && F->isDeclaration()) { @@ -1106,40 +1109,6 @@ SPIRVToLLVM::postProcessOCLBuiltinReturnStruct(Function *F) { return true; } -bool -SPIRVToLLVM::postProcessOCLBuiltinWithFuncPointer(Function* F, - Function::arg_iterator I) { - auto Name = undecorateSPIRVFunction(F->getName()); - std::set InvokeFuncPtrs; - mutateFunctionOCL (F, [=, &InvokeFuncPtrs]( - CallInst *CI, std::vector &Args) { - auto ALoc = std::find_if(Args.begin(), Args.end(), [](Value * elem) { - return isFunctionPointerType(elem->getType()); - }); - assert(ALoc != Args.end() && "Buit-in must accept a pointer to function"); - assert(isa(*ALoc) && "Invalid function pointer usage"); - Value *Ctx = ALoc[1]; - Value *CtxLen = ALoc[2]; - Value *CtxAlign = ALoc[3]; - if (Name == kOCLBuiltinName::EnqueueKernel) - assert(Args.end() - ALoc > 3); - else - assert(Args.end() - ALoc > 0); - // Erase arguments what are hanled by "spir_block_bind" according to SPIR 2.0 - Args.erase(ALoc + 1, ALoc + 4); - - InvokeFuncPtrs.insert(*ALoc); - // There will be as many calls to spir_block_bind as how much device execution - // bult-ins using this block. This doesn't contradict SPIR 2.0 specification. - *ALoc = addBlockBind(M, cast(removeCast(*ALoc)), - Ctx, CtxLen, CtxAlign, CI); - return Name; - }); - for (auto &I:InvokeFuncPtrs) - eraseIfNoUse(I); - return true; -} - bool SPIRVToLLVM::postProcessOCLBuiltinWithArrayArguments(Function* F, const std::string &DemangledName) { @@ -1482,8 +1451,8 @@ SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, else if (LinkageTy == GlobalValue::CommonLinkage) // In LLVM variables with common linkage type must be initilized by 0 Initializer = Constant::getNullValue(Ty); - else if (BVar->getStorageClass() == SPIRVStorageClassKind::StorageClassWorkgroup) - Initializer = dyn_cast(UndefValue::get(Ty)); + else if (BVar->getStorageClass() == SPIRVStorageClassKind::StorageClassWorkgroup) + Initializer = dyn_cast(UndefValue::get(Ty)); SPIRVStorageClassKind BS = BVar->getStorageClass(); if (BS == StorageClassFunction && !Init) { @@ -1955,7 +1924,13 @@ SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, case OpSignBitSet : return mapValue(BV, transOCLRelational(static_cast(BV), BB)); - + case OpEnqueueKernel : + return mapValue(BV, transEnqueueKernelBI( + static_cast(BV), BB)); + case OpGetKernelWorkGroupSize : + case OpGetKernelPreferredWorkGroupSizeMultiple : + return mapValue(BV, transWGSizeBI( + static_cast(BV), BB)); default: { auto OC = BV->getOpCode(); if (isSPIRVCmpInstTransToLLVMInst(static_cast(BV))) { @@ -2129,6 +2104,249 @@ SPIRVToLLVM::transOCLBuiltinPostproc(SPIRVInstruction* BI, return CI; } +static void adaptBlockInvoke(Function *Invoke, Type *BlockStructTy) { + // As first argument block invoke takes a pointer to captured data. + // We pass to block invoke whole block structure, not only captured data + // as it expected. So we need to update original function to unpack expected + // captured data and use it instead of an original argument + // + // %block = bitcast i8 addrspace(4)* to <{ ..., [X x i8] }> addrspace(4)* + // %block.1 = addrspacecast %block to <{ ..., [X x i8] }>* + // %captured = getelementptr <{ ..., [X x i8] }>, i32 0, i32 5 + // %captured.1 = bitcast %captured to i8* + + BasicBlock *BB = &(Invoke->getEntryBlock()); + BB->splitBasicBlock(BB->begin(), "invoke"); + auto FirstArg = &*(Invoke->arg_begin()); + IRBuilder<> Builder(BB, BB->begin()); + + auto FirstArgTy = dyn_cast(FirstArg->getType()); + assert(FirstArgTy && "Expects that first argument of invoke is a pointer"); + unsigned FirstArgAS = FirstArgTy->getAddressSpace(); + + auto Int8PtrTy = + Type::getInt8PtrTy(Invoke->getParent()->getContext(), FirstArgAS); + auto BlockStructPtrTy = PointerType::get(BlockStructTy, FirstArgAS); + + auto Int32Ty = Type::getInt32Ty(Invoke->getParent()->getContext()); + Value *CapturedGEPIndices[2] = { ConstantInt::get(Int32Ty, 0), + ConstantInt::get(Int32Ty, 5) }; + auto BlockToStructCast = + Builder.CreateBitCast(FirstArg, BlockStructPtrTy, "block"); + auto CapturedGEP = Builder.CreateGEP(BlockToStructCast, CapturedGEPIndices); + auto CapturedToInt8Cast = Builder.CreateBitCast(CapturedGEP, Int8PtrTy); + + FirstArg->replaceUsesOutsideBlock(CapturedToInt8Cast, BB); +} + +static Type *getOrCreateBlockDescTy(Module *M) { + // Get or create block descriptor type which contains block size + // in the last element: %struct.__block_descriptor = type { i64, i64 } + auto BlockDescTy = M->getTypeByName("struct.__block_descriptor"); + if (BlockDescTy) + return BlockDescTy; + + auto Int64Ty = Type::getInt64Ty(M->getContext()); + Type *BlockDescElements[2] = {/*Reserved*/ Int64Ty, /*Block size*/ Int64Ty }; + return StructType::create(M->getContext(), BlockDescElements, + "struct.__block_descriptor"); +} + +Value * +SPIRVToLLVM::transEnqueuedBlock(SPIRVValue *SInvoke, SPIRVValue *SCaptured, + SPIRVValue *SCaptSize, SPIRVValue *SCaptAlignment, + Function *LBI, BasicBlock *LBB) { + // Search if that block have been already translated + auto Loc = BlockMap.find(SInvoke); + if (Loc != BlockMap.end()) + return Loc->second; + + IRBuilder<> Builder(LBB); + const DataLayout &DL = M->getDataLayout(); + + // Translate block and its arguments from SPIRV values to LLVM + auto LInvoke = transFunction(static_cast(SInvoke)); + auto LCaptured = transValue(SCaptured, LBI, LBB, false); + auto LCaptSize = + dyn_cast(transValue(SCaptSize, LBI, LBB, false)); + auto LCaptAlignment = + dyn_cast(transValue(SCaptAlignment, LBI, LBB, false)); + + // Create basic types + auto Int8Ty = Type::getInt8Ty(*Context); + auto Int32Ty = Type::getInt32Ty(*Context); + auto Int8PtrTy = Type::getInt8PtrTy(*Context, SPIRAS_Private); + auto Int8PtrTyGen = Type::getInt8PtrTy(*Context, SPIRAS_Generic); + auto BlockDescTy = getOrCreateBlockDescTy(M); + auto BlockDescPtrTy = BlockDescTy->getPointerTo(SPIRAS_Private); + + // Create a block as structure: + // <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }> + SmallVector BlockEls = + { /*isa*/ Int8PtrTy, /*flags*/ Int32Ty, /*reserved*/ Int32Ty, + /*invoke*/ Int8PtrTy, /*block_descriptor*/ BlockDescPtrTy }; + + // Add captured if any + // <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [X x i8] }> + // Note: captured data stored in structure as array of char + if (LCaptSize->getZExtValue() > 0) + BlockEls.push_back(ArrayType::get(Int8Ty, LCaptSize->getZExtValue())); + + auto BlockTy = StructType::get(*Context, BlockEls, /*isPacked*/ true); + + // Allocate block on the stack, then store data to it + auto BlockAlloca = Builder.CreateAlloca(BlockTy, nullptr, "block"); + BlockAlloca->setAlignment(DL.getPrefTypeAlignment(BlockTy)); + + auto getIndices = [Int32Ty](int a, int b) -> SmallVector { + return { ConstantInt::get(Int32Ty, a), ConstantInt::get(Int32Ty, b) }; + }; + + // 1. isa, flags and reserved fields isn't used in current implementation + // Fill them the same way as clang does + auto IsaGEP = Builder.CreateGEP(BlockAlloca, getIndices(0, 0)); + Builder.CreateStore(ConstantPointerNull::get(Int8PtrTy), IsaGEP); + auto FlagsGEP = Builder.CreateGEP(BlockAlloca, getIndices(0, 1)); + Builder.CreateStore(ConstantInt::get(Int32Ty, 1342177280), FlagsGEP); + auto ReservedGEP = Builder.CreateGEP(BlockAlloca, getIndices(0, 2)); + Builder.CreateStore(ConstantInt::get(Int32Ty, 0), ReservedGEP); + + // 2. Store pointer to block invoke to the structure + auto InvokeCast = Builder.CreateBitCast(LInvoke, Int8PtrTy, "invoke"); + auto InvokeGEP = Builder.CreateGEP(BlockAlloca, getIndices(0, 3)); + Builder.CreateStore(InvokeCast, InvokeGEP); + + // 3. Create and store a pointer to the block descriptor global value + uint64_t SizeOfBlock = DL.getTypeAllocSize(BlockTy); + + auto Int64Ty = Type::getInt64Ty(*Context); + Constant *BlockDescEls[2] = { ConstantInt::get(Int64Ty, 0), + ConstantInt::get(Int64Ty, SizeOfBlock) }; + auto BlockDesc = + ConstantStruct::get(dyn_cast(BlockDescTy), BlockDescEls); + + auto BlockDescGV = + new GlobalVariable(*M, BlockDescTy, true, GlobalValue::InternalLinkage, + BlockDesc, "__block_descriptor_spirv"); + auto BlockDescGEP = + Builder.CreateGEP(BlockAlloca, getIndices(0, 4), "block.descriptor"); + Builder.CreateStore(BlockDescGV, BlockDescGEP); + + // 4. Copy captured data to the structure + if (LCaptSize->getZExtValue() > 0) { + auto CapturedGEP = + Builder.CreateGEP(BlockAlloca, getIndices(0, 5), "block.captured"); + auto CapturedGEPCast = Builder.CreateBitCast(CapturedGEP, Int8PtrTy); + + // We can't make any guesses about type of captured data, so + // let's copy it through memcpy + std::string MemCpyName = "llvm.memcpy.p0i8.p0i8"; + MemCpyName += (LCaptSize->getType()->getBitWidth() == 32) ? ".i32" : ".i64"; + SmallVector MemCpyArgTys = { + Int8PtrTy, Int8PtrTy, // src and dst pointers + LCaptSize->getType(), // size + Int32Ty, // alignment + Type::getInt1Ty(*Context) // isVolatile + }; + + FunctionType *MemCpyTy = + FunctionType::get(Type::getVoidTy(*Context), MemCpyArgTys, false); + Function *MemCpy = + dyn_cast(M->getOrInsertFunction(MemCpyName, MemCpyTy)); + assert(MemCpy && "Can't create memcpy intrinsic"); + MemCpy->setLinkage(GlobalValue::ExternalLinkage); + SmallVector MemCpyArgs = + { CapturedGEPCast, LCaptured, LCaptSize, LCaptAlignment, + ConstantInt::get(Type::getInt1Ty(*Context), SCaptured->isVolatile()) }; + Builder.CreateCall(MemCpy, MemCpyArgs); + + // Fix invoke function to correctly process its first argument + adaptBlockInvoke(LInvoke, BlockTy); + } + auto BlockCast = Builder.CreateBitCast(BlockAlloca, Int8PtrTy); + auto BlockCastGen = Builder.CreateAddrSpaceCast(BlockCast, Int8PtrTyGen); + BlockMap[SInvoke] = BlockCastGen; + return BlockCastGen; +} + +Instruction * +SPIRVToLLVM::transEnqueueKernelBI(SPIRVInstruction *BI, BasicBlock *BB) { + Type *IntTy = Type::getInt32Ty(*Context); + + // Find or create enqueue kernel BI declaration + auto Ops = BI->getOperands(); + bool hasVaargs = Ops.size() > 10; + + std::string FName = hasVaargs ? "__enqueue_kernel_events_vaargs" + : "__enqueue_kernel_basic_events"; + Function* F = M->getFunction(FName); + if (!F) { + Type *EventTy = PointerType::get( + getOrCreateOpaquePtrType(M, SPIR_TYPE_NAME_CLK_EVENT_T, SPIRAS_Private), + SPIRAS_Generic); + + SmallVector Tys = { + transType(Ops[0]->getType()), // queue + IntTy, // flags + transType(Ops[2]->getType()), // ndrange + IntTy, EventTy, EventTy, // events + Type::getInt8PtrTy(*Context, SPIRAS_Generic) // block + }; + if (hasVaargs) + Tys.push_back(IntTy); // Number of variadics if any + + FunctionType* FT = FunctionType::get(IntTy, Tys, hasVaargs); + F = Function::Create(FT, GlobalValue::ExternalLinkage, FName, M); + if (isFuncNoUnwind()) + F->addFnAttr(Attribute::NoUnwind); + } + + // Create call to enqueue kernel BI + SmallVector Args = { + transValue(Ops[0], F, BB, false), // queue + transValue(Ops[1], F, BB, false), // flags + transValue(Ops[2], F, BB, false), // ndrange + transValue(Ops[3], F, BB, false), // events number + transDeviceEvent(Ops[4], F, BB), // event_wait_list + transDeviceEvent(Ops[5], F, BB), // event_ret + transEnqueuedBlock(Ops[6], Ops[7], Ops[8], Ops[9], F, BB) // block + }; + + if (hasVaargs) { + Args.push_back(ConstantInt::get(IntTy, Ops.size() - 10)); // Number of vaargs + for (unsigned i = 10; i < Ops.size(); ++i) + Args.push_back(transValue(Ops[i], F, BB, false)); + } + auto Call = CallInst::Create(F, Args, "", BB); + setName(Call, BI); + setAttrByCalledFunc(Call); + return Call; +} + +Instruction * +SPIRVToLLVM::transWGSizeBI(SPIRVInstruction *BI, BasicBlock *BB) { + std::string FName = + (BI->getOpCode() == OpGetKernelWorkGroupSize) + ? "__get_kernel_work_group_size_impl" + : "__get_kernel_preferred_work_group_multiple_impl"; + + Function* F = M->getFunction(FName); + if (!F) { + auto Int8PtrTyGen = Type::getInt8PtrTy(*Context, SPIRAS_Generic); + FunctionType* FT = + FunctionType::get(Type::getInt32Ty(*Context), Int8PtrTyGen, false); + F = Function::Create(FT, GlobalValue::ExternalLinkage, FName, M); + if (isFuncNoUnwind()) + F->addFnAttr(Attribute::NoUnwind); + } + auto Ops = BI->getOperands(); + auto Block = transEnqueuedBlock(Ops[0], Ops[1], Ops[2], Ops[3], F, BB); + auto Call = CallInst::Create(F, Block, "", BB); + setName(Call, BI); + setAttrByCalledFunc(Call); + return Call; +} + Instruction * SPIRVToLLVM::transBuiltinFromInst(const std::string& FuncName, SPIRVInstruction* BI, BasicBlock* BB) { @@ -2378,19 +2596,15 @@ SPIRVToLLVM::transNonTemporalMetadata(Instruction *I) { bool SPIRVToLLVM::transKernelMetadata() { - NamedMDNode *KernelMDs = M->getOrInsertNamedMetadata(SPIR_MD_KERNELS); for (unsigned I = 0, E = BM->getNumFunctions(); I != E; ++I) { SPIRVFunction *BF = BM->getFunction(I); Function *F = static_cast(getTranslatedValue(BF)); assert(F && "Invalid translated function"); if (F->getCallingConv() != CallingConv::SPIR_KERNEL) continue; - std::vector KernelMD; - KernelMD.push_back(ValueAsMetadata::get(F)); // Generate metadata for kernel_arg_address_spaces - addOCLKernelArgumentMetadata(Context, KernelMD, - SPIR_MD_KERNEL_ARG_ADDR_SPACE, BF, + addOCLKernelArgumentMetadata(Context, SPIR_MD_KERNEL_ARG_ADDR_SPACE, BF, F, [=](SPIRVFunctionParameter *Arg){ SPIRVType *ArgTy = Arg->getType(); SPIRAddressSpace AS = SPIRAS_Private; @@ -2402,9 +2616,8 @@ SPIRVToLLVM::transKernelMetadata() { ConstantInt::get(Type::getInt32Ty(*Context), AS)); }); // Generate metadata for kernel_arg_access_qual - addOCLKernelArgumentMetadata(Context, KernelMD, - SPIR_MD_KERNEL_ARG_ACCESS_QUAL, BF, - [=](SPIRVFunctionParameter *Arg){ + addOCLKernelArgumentMetadata(Context, SPIR_MD_KERNEL_ARG_ACCESS_QUAL, BF, F, + [=](SPIRVFunctionParameter *Arg){ std::string Qual; auto T = Arg->getType(); if (T->isTypeOCLImage()) { @@ -2418,14 +2631,12 @@ SPIRVToLLVM::transKernelMetadata() { return MDString::get(*Context, Qual); }); // Generate metadata for kernel_arg_type - addOCLKernelArgumentMetadata(Context, KernelMD, - SPIR_MD_KERNEL_ARG_TYPE, BF, + addOCLKernelArgumentMetadata(Context, SPIR_MD_KERNEL_ARG_TYPE, BF, F, [=](SPIRVFunctionParameter *Arg){ return transOCLKernelArgTypeName(Arg); }); // Generate metadata for kernel_arg_type_qual - addOCLKernelArgumentMetadata(Context, KernelMD, - SPIR_MD_KERNEL_ARG_TYPE_QUAL, BF, + addOCLKernelArgumentMetadata(Context, SPIR_MD_KERNEL_ARG_TYPE_QUAL, BF, F, [=](SPIRVFunctionParameter *Arg){ std::string Qual; if (Arg->hasDecorate(DecorationVolatile)) @@ -2451,8 +2662,7 @@ SPIRVToLLVM::transKernelMetadata() { return MDString::get(*Context, Qual); }); // Generate metadata for kernel_arg_base_type - addOCLKernelArgumentMetadata(Context, KernelMD, - SPIR_MD_KERNEL_ARG_BASE_TYPE, BF, + addOCLKernelArgumentMetadata(Context, SPIR_MD_KERNEL_ARG_BASE_TYPE, BF, F, [=](SPIRVFunctionParameter *Arg){ return transOCLKernelArgTypeName(Arg); }); @@ -2463,37 +2673,32 @@ SPIRVToLLVM::transKernelMetadata() { ArgHasName &= !Arg->getName().empty(); }); if (ArgHasName) - addOCLKernelArgumentMetadata(Context, KernelMD, - SPIR_MD_KERNEL_ARG_NAME, BF, + addOCLKernelArgumentMetadata(Context, SPIR_MD_KERNEL_ARG_NAME, BF, F, [=](SPIRVFunctionParameter *Arg){ return MDString::get(*Context, Arg->getName()); }); } // Generate metadata for reqd_work_group_size if (auto EM = BF->getExecutionMode(ExecutionModeLocalSize)) { - KernelMD.push_back(getMDNodeStringIntVec(Context, - kSPIR2MD::WGSize, EM->getLiterals())); + F->setMetadata(kSPIR2MD::WGSize, + getMDNodeStringIntVec(Context, EM->getLiterals())); } // Generate metadata for work_group_size_hint if (auto EM = BF->getExecutionMode(ExecutionModeLocalSizeHint)) { - KernelMD.push_back(getMDNodeStringIntVec(Context, - kSPIR2MD::WGSizeHint, EM->getLiterals())); + F->setMetadata(kSPIR2MD::WGSizeHint, + getMDNodeStringIntVec(Context, EM->getLiterals())); } // Generate metadata for vec_type_hint if (auto EM = BF->getExecutionMode(ExecutionModeVecTypeHint)) { std::vector MetadataVec; - MetadataVec.push_back(MDString::get(*Context, kSPIR2MD::VecTyHint)); Type *VecHintTy = decodeVecTypeHint(*Context, EM->getLiterals()[0]); assert(VecHintTy); MetadataVec.push_back(ValueAsMetadata::get(UndefValue::get(VecHintTy))); MetadataVec.push_back( ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(*Context), 1))); - KernelMD.push_back(MDNode::get(*Context, MetadataVec)); + F->setMetadata(kSPIR2MD::VecTyHint, MDNode::get(*Context, MetadataVec)); } - - llvm::MDNode *Node = MDNode::get(*Context, KernelMD); - KernelMDs->addOperand(Node); } return true; } diff --git a/test/ExecutionMode_SPIR_to_SPIRV.ll b/test/ExecutionMode_SPIR_to_SPIRV.ll index b8b2bb8..960b7f5 100644 --- a/test/ExecutionMode_SPIR_to_SPIRV.ll +++ b/test/ExecutionMode_SPIR_to_SPIRV.ll @@ -15,11 +15,9 @@ ; LLVM => SPIRV => LLVM checks ; CHECK-LLVM: define spir_kernel void @worker() -; CHECK-LLVM: !opencl.kernels = !{![[KERNEL_MD:[0-9]+]]} - -; CHECK-LLVM-DAG: ![[EXEC_MODE_MD:[0-9]+]] = !{!"work_group_size_hint", i32 128, i32 10, i32 1} -; CHECK-LLVM-DAG: ![[KERNEL_MD]] = !{void ()* @worker, {{.*}}![[EXEC_MODE_MD]]{{.*}}} +; CHECK-LLVM-SAME: !work_group_size_hint [[WG:![0-9]+]] +; CHECK-LLVM: [[WG]] = !{i32 128, i32 10, i32 1} target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" target triple = "spir-unknown-unknown" diff --git a/test/image_without_access_qualifier.spt b/test/image_without_access_qualifier.spt index fe88e76..98c9dbf 100644 --- a/test/image_without_access_qualifier.spt +++ b/test/image_without_access_qualifier.spt @@ -1,13 +1,13 @@ -119734787 65536 393230 34 0 -2 Capability Addresses -2 Capability Linkage -2 Capability Kernel -2 Capability Int64 -2 Capability ImageBasic +119734787 65536 393230 34 0 +2 Capability Addresses +2 Capability Linkage +2 Capability Kernel +2 Capability Int64 +2 Capability ImageBasic 5 ExtInstImport 1 "OpenCL.std" -3 MemoryModel 2 2 +3 MemoryModel 2 2 6 EntryPoint 6 13 "image_test" -3 Source 3 200000 +3 Source 3 200000 11 Name 5 "__spirv_BuiltInGlobalInvocationId" 4 Name 14 "srcImg" 4 Name 15 "samp" @@ -22,51 +22,51 @@ 7 Name 29 "TempSampledImage" 5 Name 30 "call4.old" 5 Name 33 "arrayidx" -4 Decorate 5 BuiltIn 28 -3 Decorate 5 Constant -13 Decorate 5 LinkageAttributes "__spirv_BuiltInGlobalInvocationId" Import -4 TypeInt 2 64 0 -4 TypeInt 9 32 0 -4 TypeVector 3 2 3 -4 TypePointer 4 0 3 -2 TypeVoid 6 -9 TypeImage 7 6 1 0 0 0 0 0 -2 TypeSampler 8 -4 TypeVector 10 9 4 -4 TypePointer 11 5 10 -6 TypeFunction 12 6 7 8 11 -4 TypeVector 23 9 2 -3 TypeSampledImage 28 7 -3 TypeFloat 31 32 -4 Variable 4 5 0 -3 Undef 23 24 -4 Constant 31 32 0 +4 Decorate 5 BuiltIn 28 +3 Decorate 5 Constant +13 Decorate 5 LinkageAttributes "__spirv_BuiltInGlobalInvocationId" Import +4 TypeInt 2 64 0 +4 TypeInt 9 32 0 +4 TypeVector 3 2 3 +4 TypePointer 4 0 3 +2 TypeVoid 6 +9 TypeImage 7 6 1 0 0 0 0 0 +2 TypeSampler 8 +4 TypeVector 10 9 4 +4 TypePointer 11 5 10 +6 TypeFunction 12 6 7 8 11 +4 TypeVector 23 9 2 +3 TypeSampledImage 28 7 +3 TypeFloat 31 32 +4 Variable 4 5 0 +3 Undef 23 24 +4 Constant 31 32 0 -5 Function 6 13 0 12 -3 FunctionParameter 7 14 -3 FunctionParameter 8 15 -3 FunctionParameter 11 16 +5 Function 6 13 0 12 +3 FunctionParameter 7 14 +3 FunctionParameter 8 15 +3 FunctionParameter 11 16 -2 Label 17 -4 Load 3 18 5 -5 CompositeExtract 2 19 18 0 -4 Load 3 20 5 -5 CompositeExtract 2 21 20 1 -4 UConvert 9 22 19 -6 CompositeInsert 23 25 22 24 0 -4 UConvert 9 26 21 -6 CompositeInsert 23 27 26 25 1 -5 SampledImage 28 29 14 15 -7 ImageSampleExplicitLod 10 30 29 27 2 32 -5 InBoundsPtrAccessChain 11 33 16 19 -5 Store 33 30 2 16 -1 Return +2 Label 17 +4 Load 3 18 5 +5 CompositeExtract 2 19 18 0 +4 Load 3 20 5 +5 CompositeExtract 2 21 20 1 +4 UConvert 9 22 19 +6 CompositeInsert 23 25 22 24 0 +4 UConvert 9 26 21 +6 CompositeInsert 23 27 26 25 1 +5 SampledImage 28 29 14 15 +7 ImageSampleExplicitLod 10 30 29 27 2 32 +5 InBoundsPtrAccessChain 11 33 16 19 +5 Store 33 30 2 16 +1 Return -1 FunctionEnd +1 FunctionEnd ; RUN: llvm-spirv %s -to-binary -o %t.spv ; RUN: llvm-spirv -r %t.spv -o %t.bc ; RUN: llvm-dis < %t.bc | FileCheck %s --check-prefix=CHECK-LLVM ; CHECK-LLVM: %opencl.image2d_t = type opaque -; CHECK-LLVM: !{{[0-9]*}} = !{!"kernel_arg_access_qual", !"read_only", !"none", !"none"} +; CHECK-LLVM: !{{[0-9]*}} = !{!"read_only", !"none", !"none"} diff --git a/test/transcoding/check_ro_qualifier.ll b/test/transcoding/check_ro_qualifier.ll index 8096e8c..1092f15 100644 --- a/test/transcoding/check_ro_qualifier.ll +++ b/test/transcoding/check_ro_qualifier.ll @@ -5,13 +5,18 @@ ; CHECK-LLVM: opencl.image2d_array_ro_t = type opaque ; CHECK-LLVM: define spir_kernel void @sample_kernel(%opencl.image2d_array_ro_t addrspace(1) +; CHECK-LLVM-SAME: !kernel_arg_access_qual [[AQ:![0-9]+]] +; CHECK-LLVM-SAME: !kernel_arg_type [[TYPE:![0-9]+]] +; CHECK-LLVM-SAME: !kernel_arg_base_type [[BT:![0-9]+]] + ; CHECK-LLVM: call spir_func <2 x i32> @_Z13get_image_dimPU3AS125opencl.image2d_array_ro_t(%opencl.image2d_array_ro_t addrspace(1) ; CHECK-LLVM: call spir_func i64 @_Z20get_image_array_sizePU3AS125opencl.image2d_array_ro_t(%opencl.image2d_array_ro_t addrspace(1) ; CHECK-LLVM: declare spir_func <2 x i32> @_Z13get_image_dimPU3AS125opencl.image2d_array_ro_t(%opencl.image2d_array_ro_t addrspace(1) ; CHECK-LLVM: declare spir_func i64 @_Z20get_image_array_sizePU3AS125opencl.image2d_array_ro_t(%opencl.image2d_array_ro_t addrspace(1) -; CHECK-LLVM: !{{[0-9]+}} = !{void (%opencl.image2d_array_ro_t -; CHECK-LLVM: !{{[0-9]+}} = !{!"kernel_arg_type", !"image2d_array_ro_t"} -; CHECK-LLVM: !{{[0-9]+}} = !{!"kernel_arg_base_type", !"image2d_array_ro_t"} + +; CHECK-LLVM-DAG: [[AQ]] = !{!"read_only"} +; CHECK-LLVM-DAG: [[TYPE]] = !{!"image2d_array_ro_t"} +; CHECK-LLVM-DAG: [[BT]] = !{!"image2d_array_ro_t"} ; ModuleID = 'out.ll' target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" @@ -58,4 +63,4 @@ attributes #0 = { nounwind } !7 = !{i32 1, i32 2} !8 = !{} !9 = !{!"cl_images"} -!10 = !{i16 6, i16 14} \ No newline at end of file +!10 = !{i16 6, i16 14} diff --git a/test/transcoding/check_wo_qualifier.ll b/test/transcoding/check_wo_qualifier.ll index 1f85d43..217070e 100644 --- a/test/transcoding/check_wo_qualifier.ll +++ b/test/transcoding/check_wo_qualifier.ll @@ -5,14 +5,17 @@ ; CHECK-LLVM: opencl.image2d_array_wo_t = type opaque ; CHECK-LLVM: define spir_kernel void @sample_kernel(%opencl.image2d_array_wo_t addrspace(1) +; CHECK-LLVM-SAME: !kernel_arg_access_qual [[AQ:![0-9]+]] +; CHECK-LLVM-SAME: !kernel_arg_type [[TYPE:![0-9]+]] +; CHECK-LLVM-SAME: !kernel_arg_base_type [[BT:![0-9]+]] + ; CHECK-LLVM: call spir_func <2 x i32> @_Z13get_image_dimPU3AS125opencl.image2d_array_wo_t(%opencl.image2d_array_wo_t addrspace(1) ; CHECK-LLVM: call spir_func i64 @_Z20get_image_array_sizePU3AS125opencl.image2d_array_wo_t(%opencl.image2d_array_wo_t addrspace(1) ; CHECK-LLVM: declare spir_func <2 x i32> @_Z13get_image_dimPU3AS125opencl.image2d_array_wo_t(%opencl.image2d_array_wo_t addrspace(1) ; CHECK-LLVM: declare spir_func i64 @_Z20get_image_array_sizePU3AS125opencl.image2d_array_wo_t(%opencl.image2d_array_wo_t addrspace(1) -; CHECK-LLVM: !{{[0-9]+}} = !{void (%opencl.image2d_array_wo_t -; CHECK-LLVM: !{{[0-9]+}} = !{!"kernel_arg_type", !"image2d_array_wo_t"} -; CHECK-LLVM: !{{[0-9]+}} = !{!"kernel_arg_base_type", !"image2d_array_wo_t"} - +; CHECK-LLVM-DAG: [[AQ]] = !{!"write_only"} +; CHECK-LLVM-DAG: [[TYPE]] = !{!"image2d_array_wo_t"} +; CHECK-LLVM-DAG: [[BT]] = !{!"image2d_array_wo_t"} ; ModuleID = 'out.ll' target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" @@ -59,4 +62,4 @@ attributes #0 = { nounwind } !7 = !{i32 1, i32 2} !8 = !{} !9 = !{!"cl_images"} -!10 = !{i16 6, i16 14} \ No newline at end of file +!10 = !{i16 6, i16 14} diff --git a/test/transcoding/cl-types.ll b/test/transcoding/cl-types.ll index 6c5a5bd..b466560 100644 --- a/test/transcoding/cl-types.ll +++ b/test/transcoding/cl-types.ll @@ -72,17 +72,22 @@ target triple = "spir-unknown-unknown" ; CHECK-SPIRV: 3 FunctionParameter [[IMG2D_RW]] {{[0-9]+}} ; CHECK-SPIRV: 3 FunctionParameter [[SAMP]] [[SAMP_ARG:[0-9]+]] -; CHECK-LLVM: define spir_kernel void @foo( -; CHECK-LLVM: %opencl.pipe_t addrspace(1)* nocapture %a, -; CHECK-LLVM: %opencl.pipe_t addrspace(1)* nocapture %b, -; CHECK-LLVM: %opencl.image1d_t addrspace(1)* nocapture %c1, -; CHECK-LLVM: %opencl.image2d_t addrspace(1)* nocapture %d1, -; CHECK-LLVM: %opencl.image3d_t addrspace(1)* nocapture %e1, -; CHECK-LLVM: %opencl.image2d_array_t addrspace(1)* nocapture %f1, -; CHECK-LLVM: %opencl.image1d_buffer_t addrspace(1)* nocapture %g1, -; CHECK-LLVM: %opencl.image1d_t addrspace(1)* nocapture %c2, -; CHECK-LLVM: %opencl.image2d_t addrspace(1)* nocapture %d3, -; CHECK-LLVM: i32 %s) +; CHECK-LLVM: define spir_kernel void @foo( +; CHECK-LLVM-SAME: %opencl.pipe_t addrspace(1)* nocapture %a, +; CHECK-LLVM-SAME: %opencl.pipe_t addrspace(1)* nocapture %b, +; CHECK-LLVM-SAME: %opencl.image1d_t addrspace(1)* nocapture %c1, +; CHECK-LLVM-SAME: %opencl.image2d_t addrspace(1)* nocapture %d1, +; CHECK-LLVM-SAME: %opencl.image3d_t addrspace(1)* nocapture %e1, +; CHECK-LLVM-SAME: %opencl.image2d_array_t addrspace(1)* nocapture %f1, +; CHECK-LLVM-SAME: %opencl.image1d_buffer_t addrspace(1)* nocapture %g1, +; CHECK-LLVM-SAME: %opencl.image1d_t addrspace(1)* nocapture %c2, +; CHECK-LLVM-SAME: %opencl.image2d_t addrspace(1)* nocapture %d3, +; CHECK-LLVM-SAME: i32 %s) +; CHECK-LLVM-SAME: !kernel_arg_addr_space [[AS:![0-9]+]] +; CHECK-LLVM-SAME: !kernel_arg_access_qual [[AQ:![0-9]+]] +; CHECK-LLVM-SAME: !kernel_arg_type [[TYPE:![0-9]+]] +; CHECK-LLVM-SAME: !kernel_arg_type_qual [[TQ:![0-9]+]] +; CHECK-LLVM-SAME: !kernel_arg_base_type [[BT:![0-9]+]] ; Function Attrs: nounwind readnone define spir_kernel void @foo( @@ -122,11 +127,11 @@ attributes #0 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-point !opencl.compiler.options = !{!8} !llvm.ident = !{!10} -; CHECK-LLVM-DAG: {{![0-9]+}} = !{!"kernel_arg_addr_space", i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 0} -; CHECK-LLVM-DAG: {{![0-9]+}} = !{!"kernel_arg_access_qual", !"read_only", !"write_only", !"read_only", !"read_only", !"read_only", !"read_only", !"read_only", !"write_only", !"read_write", !"none"} -; CHECK-LLVM-DAG: {{![0-9]+}} = !{!"kernel_arg_type", !"pipe", !"pipe", !"image1d_t", !"image2d_t", !"image3d_t", !"image2d_array_t", !"image1d_buffer_t", !"image1d_t", !"image2d_t", !"sampler_t"} -; CHECK-LLVM-DAG: {{![0-9]+}} = !{!"kernel_arg_base_type", !"pipe", !"pipe", !"image1d_t", !"image2d_t", !"image3d_t", !"image2d_array_t", !"image1d_buffer_t", !"image1d_t", !"image2d_t", !"sampler_t"} -; CHECK-LLVM-DAG: {{![0-9]+}} = !{!"kernel_arg_type_qual", !"pipe", !"pipe", !"", !"", !"", !"", !"", !"", !"", !""} +; CHECK-LLVM-DAG: [[AS]] = !{i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 0} +; CHECK-LLVM-DAG: [[AQ]] = !{!"read_only", !"write_only", !"read_only", !"read_only", !"read_only", !"read_only", !"read_only", !"write_only", !"read_write", !"none"} +; CHECK-LLVM-DAG: [[TYPE]] = !{!"pipe", !"pipe", !"image1d_t", !"image2d_t", !"image3d_t", !"image2d_array_t", !"image1d_buffer_t", !"image1d_t", !"image2d_t", !"sampler_t"} +; CHECK-LLVM-DAG: [[BT]] = !{!"pipe", !"pipe", !"image1d_t", !"image2d_t", !"image3d_t", !"image2d_array_t", !"image1d_buffer_t", !"image1d_t", !"image2d_t", !"sampler_t"} +; CHECK-LLVM-DAG: [[TQ]] = !{!"pipe", !"pipe", !"", !"", !"", !"", !"", !"", !"", !""} !0 = !{void (%opencl.pipe_t addrspace(1)*, %opencl.pipe_t addrspace(1)*, %opencl.image1d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*, %opencl.image3d_t addrspace(1)*, %opencl.image2d_array_t addrspace(1)*, %opencl.image1d_buffer_t addrspace(1)*, %opencl.image1d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*, i32)* @foo, !1, !2, !3, !4, !5} !1 = !{!"kernel_arg_addr_space", i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 0} diff --git a/test/transcoding/device_execution.ll b/test/transcoding/device_execution.ll index f947db3..538a1b2 100644 --- a/test/transcoding/device_execution.ll +++ b/test/transcoding/device_execution.ll @@ -24,8 +24,9 @@ ; RUN: llvm-spirv -r %t.spv -o %t.bc ; RUN: llvm-dis < %t.bc | FileCheck %s -; CHECK-NOT: @_Z14enqueue_kernel{{.*}}{{S_|S0_|S1_}} -; CHECK: @_Z14enqueue_kernel9ocl_queue{{22kernel_enqueue_flags_t|i}}9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvvE +; CHECK-NOT: @_Z14enqueue_kernel9ocl_queue{{22kernel_enqueue_flags_t|i}}9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvvE +; CHECK-NOT: @spir_block_bind +; CHECK: call i32 @__enqueue_kernel_basic_events declare spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvvE(%opencl.queue_t*, i32, %struct.ndrange_t* byval, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, %opencl.block*) #1 target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" diff --git a/test/transcoding/device_execution_multiple_blocks.ll b/test/transcoding/device_execution_multiple_blocks.ll index 69f3707..ac06106 100644 --- a/test/transcoding/device_execution_multiple_blocks.ll +++ b/test/transcoding/device_execution_multiple_blocks.ll @@ -68,30 +68,40 @@ entry: ; Function Attrs: nounwind define spir_kernel void @enqueue_block_get_kernel_preferred_work_group_size_multiple(i32 addrspace(1)* %res) #0 { +; CHECK: [[CTX:%.*]] = bitcast %0* %captured to i8* +; CHECK: [[BLOCK:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>, align 8 +; CHECK: store i8* {{.*}} @__enqueue_block_get_kernel_preferred_work_group_size_multiple_block_invoke +; CHECK: [[CAPTUREDGEP:%.*]] = getelementptr <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>* [[BLOCK]], i32 0, i32 5 +; CHECK: [[CAPTUREDCAST:%.*]] = bitcast [8 x i8]* [[CAPTUREDGEP]] to i8* +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[CAPTUREDCAST]], i8* [[CTX]], i32 8, i32 8, i1 false) +; CHECK: [[BLOCKBCST:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>* [[BLOCK]] to i8* +; CHECK: [[BLOCKADDRCST:%.*]] = addrspacecast i8* [[BLOCKBCST]] to i8 addrspace(4)* +; CHECK: call i32 @__get_kernel_work_group_size_impl(i8 addrspace(4)* [[BLOCKADDRCST]]) +; CHECK: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8 addrspace(4)* [[BLOCKADDRCST]]) +; CHECK: call i32 @__enqueue_kernel_basic_events({{.*}} i8 addrspace(4)* [[BLOCKADDRCST]]) + +; CHECK: [[BLOCK2:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>, align 8 +; CHECK: store i8* {{.*}} @kernelBlockNoCtx_block_invoke +; CHECK: [[BLOCKBCST2:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK2]] to i8* +; CHECK: [[BLOCKADDRCST2:%.*]] = addrspacecast i8* [[BLOCKBCST2]] to i8 addrspace(4)* +; CHECK-NOT: call void @llvm.memcpy +; CHECK: call i32 @__enqueue_kernel_basic_events({{.*}} i8 addrspace(4)* [[BLOCKADDRCST2]]) + entry: %captured = alloca <{ i32 addrspace(1)* }>, align 8 %ndrange = alloca %struct.ndrange_t, align 8 %block.captured = getelementptr inbounds <{ i32 addrspace(1)* }>, <{ i32 addrspace(1)* }>* %captured, i64 0, i32 0 store i32 addrspace(1)* %res, i32 addrspace(1)** %block.captured, align 8 %0 = bitcast <{ i32 addrspace(1)* }>* %captured to i8* -; CHECK: [[CTX:.*]] = bitcast %0* %captured to i8* %1 = call %opencl.block* @spir_block_bind(i8* bitcast (void (i8*)* @__enqueue_block_get_kernel_preferred_work_group_size_multiple_block_invoke to i8*), i32 8, i32 8, i8* %0) #2 -; CHECK: [[BLOCK0:.*]] = call {{.*}} @spir_block_bind({{.*}}@__enqueue_block_get_kernel_preferred_work_group_size_multiple_block_invoke{{.*}}, i32 8, i32 8, i8*[[CTX]]) -; CHECK: call {{.*}} @_Z26get_kernel_work_group_sizeU13block_pointerFvvE(%opencl.block*[[BLOCK0]]) %call = call spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvvE(%opencl.block* %1) #2 -; CHECK: [[BLOCK1:.*]] = call {{.*}} @spir_block_bind({{.*}}@__enqueue_block_get_kernel_preferred_work_group_size_multiple_block_invoke{{.*}}, i32 8, i32 8, i8*[[CTX]]) -; CHECK: call {{.*}} @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvvE(%opencl.block*[[BLOCK1]]) %call1 = call spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvvE(%opencl.block* %1) #2 %div = udiv i32 %call, %call1 %call2 = call spir_func %opencl.queue_t* @get_default_queue() #2 %conv = zext i32 %div to i64 %conv3 = zext i32 %call to i64 call spir_func void @_Z10ndrange_1Dmm(%struct.ndrange_t* sret %ndrange, i64 %conv, i64 %conv3) #2 -; CHECK: [[BLOCK2:.*]] = call {{.*}} @spir_block_bind({{.*}}@__enqueue_block_get_kernel_preferred_work_group_size_multiple_block_invoke{{.*}}, i32 8, i32 8, i8*[[CTX]]) -; CHECK: call {{.*}} @_Z14enqueue_kernel{{.*}}, %opencl.block*[[BLOCK2]]) %call4 = call spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tU13block_pointerFvvE(%opencl.queue_t* %call2, i32 241, %struct.ndrange_t* byval %ndrange, %opencl.block* %1) #2 -; CHECK: [[BLOCK3:.*]] = call {{.*}} @spir_block_bind({{.*}}@kernelBlockNoCtx_block_invoke{{.*}}, i32 0, i32 0, i8* null) -; CHECK: call {{.*}} @_Z14enqueue_kernel{{.*}}, %opencl.block*[[BLOCK3]]) %2 = call %opencl.block* @spir_block_bind(i8* bitcast (void (i8*)* @kernelBlockNoCtx_block_invoke to i8*), i32 0, i32 0, i8* null) #2 %call5 = call spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tU13block_pointerFvvE(%opencl.queue_t* %call2, i32 241, %struct.ndrange_t* byval %ndrange, %opencl.block* %2) #2 ret void diff --git a/test/transcoding/device_execution_overloading.ll b/test/transcoding/device_execution_overloading.ll index b719bca..e5ee309 100644 --- a/test/transcoding/device_execution_overloading.ll +++ b/test/transcoding/device_execution_overloading.ll @@ -70,13 +70,13 @@ entry: ret void } -; CHECK: @_Z26get_kernel_work_group_sizeU13block_pointerFvPU3AS3vzE -; CHECK: @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvPU3AS3vzE -; CHECK: @_Z14enqueue_kernel9ocl_queue{{.*}}9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvPU3AS3vzEjz +; CHECK: @__get_kernel_work_group_size_impl +; CHECK: @__get_kernel_preferred_work_group_multiple_impl +; CHECK: @__enqueue_kernel_events_vaargs -; CHECK: @_Z26get_kernel_work_group_sizeU13block_pointerFvvE -; CHECK: @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvvE -; CHECK: @_Z14enqueue_kernel9ocl_queue{{.*}}9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvvE +; CHECK: @__get_kernel_work_group_size_impl +; CHECK: @__get_kernel_preferred_work_group_multiple_impl +; CHECK: @__enqueue_kernel_basic_events ; Function Attrs: nounwind define spir_kernel void @host_kernel(i32 %size, float addrspace(1)* %ptr) #0 { diff --git a/test/transcoding/device_execution_simple_local_memory.ll b/test/transcoding/device_execution_simple_local_memory.ll index cc615e5..51601eb 100644 --- a/test/transcoding/device_execution_simple_local_memory.ll +++ b/test/transcoding/device_execution_simple_local_memory.ll @@ -18,8 +18,8 @@ ;; bash$ export PATH_TO_INCLUDE= $PATH_TO_GEN/lib/clang/3.6.1/include ;; bash$ $PATH_TO_GEN/bin/clang -cc1 -x cl -cl-std=CL2.0 -triple spir64-unknonw-unknown -emit-llvm -include opencl-20.h repro.cl -o device_execution.ll -;; 1. Check mangling of device execution built-ins for blocks with local memory arguments -;; 2. Check there is an enqueue_kernel with ellipsis +;; Check that device enqueue BIs wasn't mangled + ; RUN: llvm-as %s -o %t.bc ; RUN: llvm-spirv %t.bc -o %t.spv @@ -61,11 +61,11 @@ entry: %0 = call %opencl.block* @spir_block_bind(i8* bitcast (void (i8*, i8 addrspace(3)*, i8 addrspace(3)*)* @__host_kernel_block_invoke to i8*), i32 0, i32 0, i8* null) store %opencl.block* %0, %opencl.block** %block, align 8 %1 = load %opencl.block*, %opencl.block** %block, align 8 -; CHECK: call {{.*}} @_Z26get_kernel_work_group_sizeU13block_pointerFvPU3AS3vzE +; CHECK: call {{.*}} @__get_kernel_work_group_size_impl %call = call spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvPU3AS3vzE(%opencl.block* %1) store i32 %call, i32* %wgSize, align 4 %2 = load %opencl.block*, %opencl.block** %block, align 8 -; CHECK: call {{.*}} @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvPU3AS3vzE +; CHECK: call {{.*}} @__get_kernel_preferred_work_group_multiple_impl %call1 = call spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvPU3AS3vzE(%opencl.block* %2) store i32 %call1, i32* %prefMul, align 4 %call2 = call spir_func %opencl.queue_t* @_Z17get_default_queuev() @@ -75,7 +75,7 @@ entry: %5 = load i32, i32* %wgSize, align 4 %6 = load i32, i32* %prefMul, align 4 %mul = mul i32 %5, %6 -; CHECK: call {{.*}} @_Z14enqueue_kernel{{.*}}U13block_pointerFvPU3AS3vzEjz({{.*}}, %opencl.block* {{.*}}, i32 {{.*}}, i32 {{.*}}) +; CHECK: call {{.*}} @__enqueue_kernel_events_vaargs({{.*}}, i32 {{.*}}, i32 {{.*}}) %call3 = call spir_func i32 (%opencl.queue_t*, i32, %struct.ndrange_t*, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, %opencl.block*, i32, ...) @_Z14enqueue_kernel9ocl_queuei9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvPU3AS3vzEjz(%opencl.queue_t* %call2, i32 241, %struct.ndrange_t* byval %agg.tmp, i32 0, %opencl.clk_event_t** null, %opencl.clk_event_t** null, %opencl.block* %3, i32 %4, i32 %mul) ret void } @@ -108,7 +108,7 @@ declare spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvPU3AS3vzE declare spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvPU3AS3vzE(%opencl.block*) #1 -; CHECK: declare {{.*}} @_Z14enqueue_kernel{{.*}}U13block_pointerFvPU3AS3vzEjz({{.*}}, %opencl.block*, i32, ...) +; CHECK: declare {{.*}} @__enqueue_kernel_events_vaargs{{.*}}({{.*}}, i32, ...) declare spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvPU3AS3vzEjz(%opencl.queue_t*, i32, %struct.ndrange_t* byval, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, %opencl.block*, i32, ...) #1 declare spir_func %opencl.queue_t* @_Z17get_default_queuev() #1 diff --git a/test/transcoding/device_execution_vaargs.ll b/test/transcoding/device_execution_vaargs.ll new file mode 100644 index 0000000..5c307e6 --- /dev/null +++ b/test/transcoding/device_execution_vaargs.ll @@ -0,0 +1,231 @@ +;; bash$ cat device_execution_overloading.cl +;; void device_kernel_with_local_args(__local float* ptr0, __local float* ptr1) { +;; *ptr0 = 0; +;; *ptr1 = 1; +;; } +;; +;; void device_kernel(__global float* ptr) { +;; *ptr = 3; +;; } +;; +;; __kernel void host_kernel(uint size, __global float* ptr) { +;; void(^block_with_local)(__local void*, __local void*) = ^(__local void* ptr0, __local void* ptr1){ +;; device_kernel_with_local_args(ptr0, ptr1); +;; }; +;; +;; void(^block)(void) = ^{ +;; device_kernel(ptr); +;; }; +;; +;n; uint wgSize = get_kernel_work_group_size(block_with_local); +;; uint prefMul = get_kernel_preferred_work_group_size_multiple(block_with_local); +;; enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(1), +;; 0, NULL, NULL, block_with_local, size, wgSize * prefMul); +;; +;; wgSize = get_kernel_work_group_size(block); +;; prefMul = get_kernel_preferred_work_group_size_multiple(block); +;; enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(1), +;; 0, NULL, NULL, block); +;; } +;; bash$ +;;$PATH_TO_GEN/bin/clang -cc1 -x cl -O0 -cl-std=CL2.0 -triple spir64-unknonw-unknown -include $PATH_TO_GEN/lib/clang/3.6.1/include/opencl-20.h -emit-llvm device_execution_overloading.cl -o device_execution_overloading.ll + +;; Test enqueue_kernel with and wthout vaargs + +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-spirv %t.bc -o %t.spv +; RUN: llvm-spirv -r %t.spv -o %t.bc +; RUN: llvm-dis < %t.bc | FileCheck %s + +; CHECK: %struct.__block_descriptor = type { i64, i64 } + +; CHECK: [[BLOCK:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>, align 8 +; CHECK: [[INVOKEGEP:%.*]] = getelementptr <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], i32 0, i32 3 +; CHECK: store i8* bitcast (void (i8*, i8 addrspace(3)*, i8 addrspace(3)*)* @__host_kernel_block_invoke to i8*), i8** [[INVOKEGEP]] +; CHECK: [[BLOCKDESCGEP:%.*]] = getelementptr <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], i32 0, i32 4 +; CHECK: store %struct.__block_descriptor* @__block_descriptor_spirv, %struct.__block_descriptor** [[BLOCKDESCGEP]] +; CHECK: [[BLOCKBCAST:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]] to i8* +; CHECK: [[BLOCKADDRCAST:%.*]] = addrspacecast i8* [[BLOCKBCAST]] to i8 addrspace(4)* +; CHECK: call i32 @__get_kernel_work_group_size_impl(i8 addrspace(4)* [[BLOCKADDRCAST]]) +; CHECK: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8 addrspace(4)* [[BLOCKADDRCAST]]) +; CHECK: call i32 {{.*}} @__enqueue_kernel_events_vaargs + +; CHECK: [[BLOCK2:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>, align 8 +; CHECK: [[INVOKEGEP2:%.*]] = getelementptr <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>* [[BLOCK2]], i32 0, i32 3 +; CHECK: store i8* bitcast (void (i8*)* @__host_kernel_block_invoke_2 to i8*), i8** [[INVOKEGEP2]] +; CHECK: [[BLOCKDESCGEP2:%.*]] = getelementptr <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>* [[BLOCK2]], i32 0, i32 4 +; CHECK: store %struct.__block_descriptor* @__block_descriptor_spirv.1, %struct.__block_descriptor** [[BLOCKDESCGEP2]] +; CHECK: [[CAPTUREDGEP:%.*]] = getelementptr <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>* [[BLOCK2]], i32 0, i32 5 +; CHECK: [[CAPTUREDCAST:%.*]] = bitcast [8 x i8]* [[CAPTUREDGEP]] to i8* +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[CAPTUREDCAST]], i8* %0, i32 8, i32 8, i1 false) +; CHECK: [[BLOCKBCAST2:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>* [[BLOCK2]] to i8* +; CHECK: [[BLOCKADDRCAST2:%.*]] = addrspacecast i8* [[BLOCKBCAST2]] to i8 addrspace(4)* +; CHECK: call i32 @__get_kernel_work_group_size_impl(i8 addrspace(4)* [[BLOCKADDRCAST2]]) +; CHECK: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8 addrspace(4)* [[BLOCKADDRCAST2]]) #0 +; CHECK: call i32 @__enqueue_kernel_basic_events + +; CHECK: define internal spir_func void @__host_kernel_block_invoke( +; CHECK-NOT: bitcast i8* %.block_descriptor to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [0 x i8] }>* + +; CHECK: define internal spir_func void @__host_kernel_block_invoke_2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[INV_BLOCK:%.*]] = bitcast i8* %.block_descriptor to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>* +; CHECK-NEXT: [[INV_CAPTUREDGEP:%.*]] = getelementptr <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [8 x i8] }>* [[INV_BLOCK]], i32 0, i32 5 +; CHECK-NEXT: bitcast [8 x i8]* [[INV_CAPTUREDGEP]] to i8* +; CHECK-NEXT: br label %invoke + +; ModuleID = 'device_execution_overloading.cl' +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir64-unknonw-unknown" + +%opencl.block = type opaque +%struct.ndrange_t = type { i32, [3 x i64], [3 x i64], [3 x i64] } +%opencl.queue_t = type opaque +%opencl.clk_event_t = type opaque + +; Function Attrs: nounwind +define spir_func void @device_kernel_with_local_args(float addrspace(3)* %ptr0, float addrspace(3)* %ptr1) #0 { +entry: + %ptr0.addr = alloca float addrspace(3)*, align 8 + %ptr1.addr = alloca float addrspace(3)*, align 8 + store float addrspace(3)* %ptr0, float addrspace(3)** %ptr0.addr, align 8 + store float addrspace(3)* %ptr1, float addrspace(3)** %ptr1.addr, align 8 + %0 = load float addrspace(3)*, float addrspace(3)** %ptr0.addr, align 8 + store float 0.000000e+00, float addrspace(3)* %0, align 4 + %1 = load float addrspace(3)*, float addrspace(3)** %ptr1.addr, align 8 + store float 1.000000e+00, float addrspace(3)* %1, align 4 + ret void +} + +; Function Attrs: nounwind +define spir_func void @device_kernel(float addrspace(1)* %ptr) #0 { +entry: + %ptr.addr = alloca float addrspace(1)*, align 8 + store float addrspace(1)* %ptr, float addrspace(1)** %ptr.addr, align 8 + %0 = load float addrspace(1)*, float addrspace(1)** %ptr.addr, align 8 + store float 3.000000e+00, float addrspace(1)* %0, align 4 + ret void +} +; Function Attrs: nounwind +define spir_kernel void @host_kernel(i32 %size, float addrspace(1)* %ptr) #0 { +entry: + %size.addr = alloca i32, align 4 + %ptr.addr = alloca float addrspace(1)*, align 8 + %block_with_local = alloca %opencl.block*, align 8 + %block = alloca %opencl.block*, align 8 + %captured = alloca <{ float addrspace(1)* }>, align 8 + %wgSize = alloca i32, align 4 + %prefMul = alloca i32, align 4 + %agg.tmp = alloca %struct.ndrange_t, align 8 + %agg.tmp8 = alloca %struct.ndrange_t, align 8 + store i32 %size, i32* %size.addr, align 4 + store float addrspace(1)* %ptr, float addrspace(1)** %ptr.addr, align 8 + %0 = call %opencl.block* @spir_block_bind(i8* bitcast (void (i8*, i8 addrspace(3)*, i8 addrspace(3)*)* @__host_kernel_block_invoke to i8*), i32 0, i32 0, i8* null) + store %opencl.block* %0, %opencl.block** %block_with_local, align 8 + %block.captured = getelementptr inbounds <{ float addrspace(1)* }>, <{ float addrspace(1)* }>* %captured, i32 0, i32 0 + %1 = load float addrspace(1)*, float addrspace(1)** %ptr.addr, align 8 + store float addrspace(1)* %1, float addrspace(1)** %block.captured, align 8 + %2 = bitcast <{ float addrspace(1)* }>* %captured to i8* + %3 = call %opencl.block* @spir_block_bind(i8* bitcast (void (i8*)* @__host_kernel_block_invoke_2 to i8*), i32 8, i32 8, i8* %2) + store %opencl.block* %3, %opencl.block** %block, align 8 + %4 = load %opencl.block*, %opencl.block** %block_with_local, align 8 + %call = call spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvPU3AS3vzE(%opencl.block* %4) + store i32 %call, i32* %wgSize, align 4 + %5 = load %opencl.block*, %opencl.block** %block_with_local, align 8 + %call2 = call spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvPU3AS3vzE(%opencl.block* %5) + store i32 %call2, i32* %prefMul, align 4 + %call3 = call spir_func %opencl.queue_t* @_Z17get_default_queuev() + call spir_func void @_Z10ndrange_1Dm(%struct.ndrange_t* sret %agg.tmp, i64 1) + %6 = load %opencl.block*, %opencl.block** %block_with_local, align 8 + %7 = load i32, i32* %size.addr, align 4 + %8 = load i32, i32* %wgSize, align 4 + %9 = load i32, i32* %prefMul, align 4 + %mul = mul i32 %8, %9 + %call4 = call spir_func i32 (%opencl.queue_t*, i32, %struct.ndrange_t*, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, %opencl.block*, i32, ...) @_Z14enqueue_kernel9ocl_queuei9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvPU3AS3vzEjz(%opencl.queue_t* %call3, i32 241, %struct.ndrange_t* byval %agg.tmp, i32 0, %opencl.clk_event_t** null, %opencl.clk_event_t** null, %opencl.block* %6, i32 %7, i32 %mul) + %10 = load %opencl.block*, %opencl.block** %block, align 8 + %call5 = call spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvvE(%opencl.block* %10) + store i32 %call5, i32* %wgSize, align 4 + %11 = load %opencl.block*, %opencl.block** %block, align 8 + %call6 = call spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvvE(%opencl.block* %11) + store i32 %call6, i32* %prefMul, align 4 + %call7 = call spir_func %opencl.queue_t* @_Z17get_default_queuev() + call spir_func void @_Z10ndrange_1Dm(%struct.ndrange_t* sret %agg.tmp8, i64 1) + %12 = load %opencl.block*, %opencl.block** %block, align 8 + %call9 = call spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvvE(%opencl.queue_t* %call7, i32 241, %struct.ndrange_t* byval %agg.tmp8, i32 0, %opencl.clk_event_t** null, %opencl.clk_event_t** null, %opencl.block* %12) + ret void +} + +; Function Attrs: nounwind +define internal spir_func void @__host_kernel_block_invoke(i8* %.block_descriptor, i8 addrspace(3)* %ptr0, i8 addrspace(3)* %ptr1) #0 { +entry: + %.block_descriptor.addr = alloca i8*, align 8 + %ptr0.addr = alloca i8 addrspace(3)*, align 8 + %ptr1.addr = alloca i8 addrspace(3)*, align 8 + %block.addr = alloca <{}>*, align 8 + store i8* %.block_descriptor, i8** %.block_descriptor.addr, align 8 + %0 = load i8*, i8** %.block_descriptor.addr + store i8 addrspace(3)* %ptr0, i8 addrspace(3)** %ptr0.addr, align 8 + store i8 addrspace(3)* %ptr1, i8 addrspace(3)** %ptr1.addr, align 8 + %block = bitcast i8* %.block_descriptor to <{}>* + store <{}>* %block, <{}>** %block.addr, align 8 + %1 = load i8 addrspace(3)*, i8 addrspace(3)** %ptr0.addr, align 8 + %2 = bitcast i8 addrspace(3)* %1 to float addrspace(3)* + %3 = load i8 addrspace(3)*, i8 addrspace(3)** %ptr1.addr, align 8 + %4 = bitcast i8 addrspace(3)* %3 to float addrspace(3)* + call spir_func void @device_kernel_with_local_args(float addrspace(3)* %2, float addrspace(3)* %4) + ret void +} + +declare %opencl.block* @spir_block_bind(i8*, i32, i32, i8*) + +; Function Attrs: nounwind +define internal spir_func void @__host_kernel_block_invoke_2(i8* %.block_descriptor) #0 { +entry: + %.block_descriptor.addr = alloca i8*, align 8 + %block.addr = alloca <{ float addrspace(1)* }>*, align 8 + store i8* %.block_descriptor, i8** %.block_descriptor.addr, align 8 + %0 = load i8*, i8** %.block_descriptor.addr + %block = bitcast i8* %.block_descriptor to <{ float addrspace(1)* }>* + store <{ float addrspace(1)* }>* %block, <{ float addrspace(1)* }>** %block.addr, align 8 + %block.capture.addr = getelementptr inbounds <{ float addrspace(1)* }>, <{ float addrspace(1)* }>* %block, i32 0, i32 0 + %1 = load float addrspace(1)*, float addrspace(1)** %block.capture.addr, align 8 + call spir_func void @device_kernel(float addrspace(1)* %1) + ret void +} + +declare spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvPU3AS3vzE(%opencl.block*) #1 + +declare spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvPU3AS3vzE(%opencl.block*) #1 + +declare spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvPU3AS3vzEjz(%opencl.queue_t*, i32, %struct.ndrange_t* byval, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, %opencl.block*, i32, ...) #1 + +declare spir_func %opencl.queue_t* @_Z17get_default_queuev() #1 + +declare spir_func void @_Z10ndrange_1Dm(%struct.ndrange_t* sret, i64) #1 + +declare spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvvE(%opencl.block*) #1 + +declare spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvvE(%opencl.block*) #1 + +declare spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvvE(%opencl.queue_t*, i32, %struct.ndrange_t* byval, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, %opencl.block*) #1 + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!opencl.kernels = !{!0} +!opencl.enable.FP_CONTRACT = !{} +!opencl.spir.version = !{!6} +!opencl.ocl.version = !{!7} +!opencl.used.extensions = !{!8} +!opencl.used.optional.core.features = !{!8} +!opencl.compiler.options = !{!8} + +!0 = !{void (i32, float addrspace(1)*)* @host_kernel, !1, !2, !3, !4, !5} +!1 = !{!"kernel_arg_addr_space", i32 0, i32 1} +!2 = !{!"kernel_arg_access_qual", !"none", !"none"} +!3 = !{!"kernel_arg_type", !"uint", !"float*"} +!4 = !{!"kernel_arg_base_type", !"uint", !"float*"} +!5 = !{!"kernel_arg_type_qual", !"", !""} +!6 = !{i32 1, i32 2} +!7 = !{i32 2, i32 0} +!8 = !{} diff --git a/test/transcoding/spirv-types.ll b/test/transcoding/spirv-types.ll index 7539dd7..67e5bca 100644 --- a/test/transcoding/spirv-types.ll +++ b/test/transcoding/spirv-types.ll @@ -57,7 +57,7 @@ target triple = "spir-unknown-unknown" %spirv.Pipe._0 = type opaque ; read_only pipe %spirv.Pipe._1 = type opaque ; write_only pipe %spirv.Image._void_0_0_0_0_0_0_0 = type opaque ; read_only image1d_t -%spirv.Image._int_1_0_0_0_0_0_0 = type opaque ; read_only image2d_t +%spirv.Image._int_1_0_0_0_0_0_0 = type opaque ; read_only image2d_t %spirv.Image._uint_2_0_0_0_0_0_0 = type opaque ; read_only image3d_t %spirv.Image._float_1_1_0_0_0_0_0 = type opaque; read_only image2d_depth_t %spirv.Image._half_1_0_1_0_0_0_0 = type opaque ; read_only image2d_array_t @@ -82,17 +82,22 @@ target triple = "spir-unknown-unknown" ; CHECK-SPIRV: 3 FunctionParameter [[IMG1D_WR]] {{[0-9]+}} ; CHECK-SPIRV: 3 FunctionParameter [[IMG2D_RW]] {{[0-9]+}} -; CHECK-LLVM: define spir_kernel void @foo( -; CHECK-LLVM: %opencl.pipe_t addrspace(1)* nocapture %a, -; CHECK-LLVM: %opencl.pipe_t addrspace(1)* nocapture %b, -; CHECK-LLVM: %opencl.image1d_t addrspace(1)* nocapture %c1, -; CHECK-LLVM: %opencl.image2d_t addrspace(1)* nocapture %d1, -; CHECK-LLVM: %opencl.image3d_t addrspace(1)* nocapture %e1, -; CHECK-LLVM: %opencl.image2d_array_t addrspace(1)* nocapture %f1, -; CHECK-LLVM: %opencl.image1d_buffer_t addrspace(1)* nocapture %g1, -; CHECK-LLVM: %opencl.image1d_t addrspace(1)* nocapture %c2, -; CHECK-LLVM: %opencl.image2d_t addrspace(1)* nocapture %d3) - +; CHECK-LLVM: define spir_kernel void @foo( +; CHECK-LLVM-SAME: %opencl.pipe_t addrspace(1)* nocapture %a, +; CHECK-LLVM-SAME: %opencl.pipe_t addrspace(1)* nocapture %b, +; CHECK-LLVM-SAME: %opencl.image1d_t addrspace(1)* nocapture %c1, +; CHECK-LLVM-SAME: %opencl.image2d_t addrspace(1)* nocapture %d1, +; CHECK-LLVM-SAME: %opencl.image3d_t addrspace(1)* nocapture %e1, +; CHECK-LLVM-SAME: %opencl.image2d_array_t addrspace(1)* nocapture %f1, +; CHECK-LLVM-SAME: %opencl.image1d_buffer_t addrspace(1)* nocapture %g1, +; CHECK-LLVM-SAME: %opencl.image1d_t addrspace(1)* nocapture %c2, +; CHECK-LLVM-SAME: %opencl.image2d_t addrspace(1)* nocapture %d3) +; CHECK-LLVM-SAME: !kernel_arg_addr_space [[AS:![0-9]+]] +; CHECK-LLVM-SAME: !kernel_arg_access_qual [[AQ:![0-9]+]] +; CHECK-LLVM-SAME: !kernel_arg_type [[TYPE:![0-9]+]] +; CHECK-LLVM-SAME: !kernel_arg_type_qual [[TQ:![0-9]+]] +; CHECK-LLVM-SAME: !kernel_arg_base_type [[BT:![0-9]+]] + ; Function Attrs: nounwind readnone define spir_kernel void @foo( %spirv.Pipe._0 addrspace(1)* nocapture %a, @@ -161,11 +166,11 @@ attributes #0 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-point !opencl.compiler.options = !{!8} !llvm.ident = !{!10} -; CHECK-LLVM-DAG: {{![0-9]+}} = !{!"kernel_arg_addr_space", i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1} -; CHECK-LLVM-DAG: {{![0-9]+}} = !{!"kernel_arg_access_qual", !"read_only", !"write_only", !"read_only", !"read_only", !"read_only", !"read_only", !"read_only", !"write_only", !"read_write"} -; CHECK-LLVM-DAG: {{![0-9]+}} = !{!"kernel_arg_type", !"pipe", !"pipe", !"image1d_t", !"image2d_t", !"image3d_t", !"image2d_array_t", !"image1d_buffer_t", !"image1d_t", !"image2d_t"} -; CHECK-LLVM-DAG: {{![0-9]+}} = !{!"kernel_arg_base_type", !"pipe", !"pipe", !"image1d_t", !"image2d_t", !"image3d_t", !"image2d_array_t", !"image1d_buffer_t", !"image1d_t", !"image2d_t"} -; CHECK-LLVM-DAG: {{![0-9]+}} = !{!"kernel_arg_type_qual", !"pipe", !"pipe", !"", !"", !"", !"", !"", !"", !""} +; CHECK-LLVM-DAG: [[AS]] = !{i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1} +; CHECK-LLVM-DAG: [[AQ]] = !{!"read_only", !"write_only", !"read_only", !"read_only", !"read_only", !"read_only", !"read_only", !"write_only", !"read_write"} +; CHECK-LLVM-DAG: [[TYPE]] = !{!"pipe", !"pipe", !"image1d_t", !"image2d_t", !"image3d_t", !"image2d_array_t", !"image1d_buffer_t", !"image1d_t", !"image2d_t"} +; CHECK-LLVM-DAG: [[BT]] = !{!"pipe", !"pipe", !"image1d_t", !"image2d_t", !"image3d_t", !"image2d_array_t", !"image1d_buffer_t", !"image1d_t", !"image2d_t"} +; CHECK-LLVM-DAG: [[TQ]] = !{!"pipe", !"pipe", !"", !"", !"", !"", !"", !"", !""} !0 = !{void (%spirv.Pipe._0 addrspace(1)*, %spirv.Pipe._1 addrspace(1)*, %spirv.Image._void_0_0_0_0_0_0_0 addrspace(1)*, %spirv.Image._int_1_0_0_0_0_0_0 addrspace(1)*, %spirv.Image._uint_2_0_0_0_0_0_0 addrspace(1)*, %spirv.Image._half_1_0_1_0_0_0_0 addrspace(1)*, %spirv.Image._float_5_0_0_0_0_0_0 addrspace(1)*, %spirv.Image._void_0_0_0_0_0_0_1 addrspace(1)*, %spirv.Image._void_1_0_0_0_0_0_2 addrspace(1)*)* @foo, !1, !2, !3, !4, !5} !1 = !{!"kernel_arg_addr_space", i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1} diff --git a/test/vec_type_hint.ll b/test/vec_type_hint.ll index e3bc812..6afef29 100644 --- a/test/vec_type_hint.ll +++ b/test/vec_type_hint.ll @@ -1,15 +1,15 @@ ; kernel ; __attribute__((vec_type_hint(float4))) ; void test_float() {} -; +; ; kernel ; __attribute__((vec_type_hint(double))) ; void test_double() {} -; +; ; kernel ; __attribute__((vec_type_hint(uint4))) ; void test_uint() {} -; +; ; kernel ; __attribute__((vec_type_hint(int8))) ; void test_int() {} @@ -26,33 +26,41 @@ ; CHECK-SPIRV: {{[0-9]+}} EntryPoint {{[0-9]+}} {{[0-9]+}} "test_double" ; CHECK-SPIRV: {{[0-9]+}} EntryPoint {{[0-9]+}} {{[0-9]+}} "test_uint" ; CHECK-SPIRV: {{[0-9]+}} EntryPoint {{[0-9]+}} {{[0-9]+}} "test_int" -; CHECK-SPIRV: {{[0-9]+}} ExecutionMode {{[0-9]+}} 30 {{[0-9]+}} ; CHECK-SPIRV: {{[0-9]+}} ExecutionMode {{[0-9]+}} 30 {{[0-9]+}} -; CHECK-SPIRV: {{[0-9]+}} ExecutionMode {{[0-9]+}} 30 {{[0-9]+}} -; CHECK-SPIRV: {{[0-9]+}} ExecutionMode {{[0-9]+}} 30 {{[0-9]+}} +; CHECK-SPIRV: {{[0-9]+}} ExecutionMode {{[0-9]+}} 30 {{[0-9]+}} +; CHECK-SPIRV: {{[0-9]+}} ExecutionMode {{[0-9]+}} 30 {{[0-9]+}} +; CHECK-SPIRV: {{[0-9]+}} ExecutionMode {{[0-9]+}} 30 {{[0-9]+}} ; ModuleID = 'vec_type_hint.cl' target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" target triple = "spir64-unknown-unknown" +; CHECK-LLVM: define spir_kernel void @test_float() +; CHECK-LLVM-SAME: !vec_type_hint [[VFLOAT:![0-9]+]] ; Function Attrs: nounwind define spir_kernel void @test_float() #0 { entry: ret void } +; CHECK-LLVM: define spir_kernel void @test_double() +; CHECK-LLVM-SAME: !vec_type_hint [[VDOUBLE:![0-9]+]] ; Function Attrs: nounwind define spir_kernel void @test_double() #0 { entry: ret void } +; CHECK-LLVM: define spir_kernel void @test_uint() +; CHECK-LLVM-SAME: !vec_type_hint [[VUINT:![0-9]+]] ; Function Attrs: nounwind define spir_kernel void @test_uint() #0 { entry: ret void } +; CHECK-LLVM: define spir_kernel void @test_int() +; CHECK-LLVM-SAME: !vec_type_hint [[VINT:![0-9]+]] ; Function Attrs: nounwind define spir_kernel void @test_int() #0 { entry: @@ -70,14 +78,10 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"= !opencl.compiler.options = !{!15} !llvm.ident = !{!16} -; CHECK-LLVM: @test_float -; CHECK-LLVM: {!"vec_type_hint", <4 x float> undef, i32 1} -; CHECK-LLVM: @test_double -; CHECK-LLVM: {!"vec_type_hint", double undef, i32 1} -; CHECK-LLVM: @test_uint -; CHECK-LLVM: {!"vec_type_hint", <4 x i32> undef, i32 1} -; CHECK-LLVM: @test_int -; CHECK-LLVM: {!"vec_type_hint", <8 x i32> undef, i32 1} +; CHECK-LLVM: [[VFLOAT]] = !{<4 x float> undef, i32 1} +; CHECK-LLVM: [[VDOUBLE]] = !{double undef, i32 1} +; CHECK-LLVM: [[VUINT]] = !{<4 x i32> undef, i32 1} +; CHECK-LLVM: [[VINT]] = !{<8 x i32> undef, i32 1} !0 = !{void ()* @test_float, !1, !2, !3, !4, !5, !6} !1 = !{!"kernel_arg_addr_space"} -- cgit v1.2.3