diff options
author | Alexey Sotkin <alexey.sotkin@intel.com> | 2018-01-09 17:10:30 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-01-09 17:10:30 +0300 |
commit | d526f83cce796953e06f35c9be7f769bf44b1065 (patch) | |
tree | 5f0767b03254468820f859c72ab5b44717602edb | |
parent | 4dd2f1c54e0f9fede0a99f09f5e5728ec8e80aac (diff) | |
parent | 252037c9100c2a72444907819418bc663422be8f (diff) |
Merge pull request #226 from AlexeySotkin/SPV_INTEL_subgroups
LLVM SPIR-V changes for SPV_INTEL_subgroups
-rw-r--r-- | lib/SPIRV/OCL20ToSPIRV.cpp | 79 | ||||
-rw-r--r-- | lib/SPIRV/OCLUtil.h | 7 | ||||
-rw-r--r-- | lib/SPIRV/SPIRVReader.cpp | 31 | ||||
-rw-r--r-- | lib/SPIRV/libSPIRV/SPIRVInstruction.h | 62 | ||||
-rw-r--r-- | lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h | 8 | ||||
-rw-r--r-- | lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h | 3 | ||||
-rw-r--r-- | lib/SPIRV/libSPIRV/SPIRVOpCode.h | 4 | ||||
-rw-r--r-- | lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h | 8 | ||||
-rw-r--r-- | lib/SPIRV/libSPIRV/SPIRVType.h | 2 | ||||
-rw-r--r-- | lib/SPIRV/libSPIRV/spirv.hpp | 94 | ||||
-rw-r--r-- | test/transcoding/cl_intel_sub_groups.ll | 126 |
11 files changed, 412 insertions, 12 deletions
diff --git a/lib/SPIRV/OCL20ToSPIRV.cpp b/lib/SPIRV/OCL20ToSPIRV.cpp index 7d95e64..58278f6 100644 --- a/lib/SPIRV/OCL20ToSPIRV.cpp +++ b/lib/SPIRV/OCL20ToSPIRV.cpp @@ -260,6 +260,14 @@ public: const std::string &DemangledName,
unsigned int Offset);
+ /// For cl_intel_subgroups block read built-ins:
+ void visitSubgroupBlockReadINTEL(CallInst *CI, StringRef MangledName,
+ const std::string &DemangledName);
+
+ /// For cl_intel_subgroups block write built-ins:
+ void visitSubgroupBlockWriteINTEL(CallInst *CI, StringRef MangledName,
+ const std::string &DemangledName);
+
void visitDbgInfoIntrinsic(DbgInfoIntrinsic &I){
I.dropAllReferences();
I.eraseFromParent();
@@ -511,6 +519,14 @@ OCL20ToSPIRV::visitCallInst(CallInst& CI) { OCLImageChannelOrderOffset);
return;
}
+ if (DemangledName.find(kOCLBuiltinName::SubgroupBlockReadINTELPrefix) == 0) {
+ visitSubgroupBlockReadINTEL(&CI, MangledName, DemangledName);
+ return;
+ }
+ if (DemangledName.find(kOCLBuiltinName::SubgroupBlockWriteINTELPrefix) == 0) {
+ visitSubgroupBlockWriteINTEL(&CI, MangledName, DemangledName);
+ return;
+ }
visitCallBuiltinSimple(&CI, MangledName, DemangledName);
}
@@ -1453,6 +1469,69 @@ void OCL20ToSPIRV::visitCallGetImageChannel(CallInst *CI, StringRef MangledName, },
&Attrs);
}
+
+// The intel_sub_group_block_read built-ins are overloaded to support both
+// buffers and images, but need to be mapped to distinct SPIR-V instructions.
+// Additionally, for block reads, need to distinguish between scalar block
+// reads and vector block reads.
+void OCL20ToSPIRV::visitSubgroupBlockReadINTEL(CallInst *CI, StringRef MangledName,
+ const std::string &DemangledName) {
+ OCLBuiltinTransInfo Info;
+ if (isOCLImageType(CI->getArgOperand(0)->getType()))
+ Info.UniqName = getSPIRVFuncName(spv::OpSubgroupImageBlockReadINTEL);
+ else
+ Info.UniqName = getSPIRVFuncName(spv::OpSubgroupBlockReadINTEL);
+ if (CI->getType()->isVectorTy()) {
+ switch(CI->getType()->getVectorNumElements()) {
+ case 2: Info.Postfix = "_v2"; break;
+ case 4: Info.Postfix = "_v4"; break;
+ case 8: Info.Postfix = "_v8"; break;
+ default: break;
+ }
+ }
+ if (CI->getType()->getScalarSizeInBits() == 16)
+ Info.Postfix += "_us";
+ else
+ Info.Postfix += "_ui";
+ AttributeSet Attrs = CI->getCalledFunction()->getAttributes();
+ mutateCallInstSPIRV(M, CI,
+ [=](CallInst *, std::vector<Value *> &Args) {
+ Info.PostProc(Args);
+ return Info.UniqName + Info.Postfix;
+ },
+ &Attrs);
+}
+
+// The intel_sub_group_block_write built-ins are similarly overloaded to support
+// both buffers and images but need to be mapped to distinct SPIR-V instructions.
+// Since the type of data to be written is encoded in the mangled name there is
+// no need to do additional work to distinguish between scalar block writes and
+// vector block writes.
+void OCL20ToSPIRV::visitSubgroupBlockWriteINTEL(CallInst *CI, StringRef MangledName,
+ const std::string &DemangledName) {
+ OCLBuiltinTransInfo Info;
+ if (isOCLImageType(CI->getArgOperand(0)->getType()))
+ Info.UniqName = getSPIRVFuncName(spv::OpSubgroupImageBlockWriteINTEL);
+ else
+ Info.UniqName = getSPIRVFuncName(spv::OpSubgroupBlockWriteINTEL);
+ unsigned numArgs = CI->getNumArgOperands();
+ if (numArgs && CI->getArgOperand(numArgs - 1)->getType()->isVectorTy()) {
+ switch(CI->getArgOperand(numArgs - 1)->getType()->getVectorNumElements()) {
+ case 2: Info.Postfix = "_v2"; break;
+ case 4: Info.Postfix = "_v4"; break;
+ case 8: Info.Postfix = "_v8"; break;
+ default: break;
+ }
+ }
+ AttributeSet Attrs = CI->getCalledFunction()->getAttributes();
+ mutateCallInstSPIRV(M, CI,
+ [=](CallInst *, std::vector<Value *> &Args) {
+ Info.PostProc(Args);
+ return Info.UniqName + Info.Postfix;
+ },
+ &Attrs);
+}
+
}
INITIALIZE_PASS_BEGIN(OCL20ToSPIRV, "cl20tospv", "Transform OCL 2.0 to SPIR-V",
diff --git a/lib/SPIRV/OCLUtil.h b/lib/SPIRV/OCLUtil.h index 77ff468..8a4e261 100644 --- a/lib/SPIRV/OCLUtil.h +++ b/lib/SPIRV/OCLUtil.h @@ -203,6 +203,8 @@ namespace kOCLBuiltinName { const static char SubGroupAll[] = "sub_group_all"; const static char SubGroupAny[] = "sub_group_any"; const static char WorkPrefix[] = "work_"; + const static char SubgroupBlockReadINTELPrefix[] = "intel_sub_group_block_read"; + const static char SubgroupBlockWriteINTELPrefix[] = "intel_sub_group_block_write"; } /// Offset for OpenCL image channel order enumeration values. @@ -608,6 +610,11 @@ _SPIRV_OP(get_image_channel_data_type, ImageQueryFormat) _SPIRV_OP(get_image_channel_order, ImageQueryOrder) _SPIRV_OP(get_image_num_mip_levels, ImageQueryLevels) _SPIRV_OP(get_image_num_samples, ImageQuerySamples) +// Intel Subgroups builtins +_SPIRV_OP(intel_sub_group_shuffle, SubgroupShuffleINTEL) +_SPIRV_OP(intel_sub_group_shuffle_down, SubgroupShuffleDownINTEL) +_SPIRV_OP(intel_sub_group_shuffle_up, SubgroupShuffleUpINTEL) +_SPIRV_OP(intel_sub_group_shuffle_xor, SubgroupShuffleXorINTEL) #undef _SPIRV_OP } diff --git a/lib/SPIRV/SPIRVReader.cpp b/lib/SPIRV/SPIRVReader.cpp index 8bd80a7..2e5c49f 100644 --- a/lib/SPIRV/SPIRVReader.cpp +++ b/lib/SPIRV/SPIRVReader.cpp @@ -1968,7 +1968,8 @@ SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, auto OC = BV->getOpCode();
if (isSPIRVCmpInstTransToLLVMInst(static_cast<SPIRVInstruction*>(BV))) {
return mapValue(BV, transCmpInst(BV, BB, F));
- } else if (OCLSPIRVBuiltinMap::rfind(OC, nullptr) &&
+ } else if ((OCLSPIRVBuiltinMap::rfind(OC, nullptr) ||
+ isIntelSubgroupOpCode(OC)) &&
!isAtomicOpCode(OC) &&
!isGroupOpCode(OC) &&
!isPipeOpCode(OC)) {
@@ -2210,6 +2211,34 @@ SPIRVToLLVM::getOCLBuiltinName(SPIRVInstruction* BI) { (EleTy->isTypeArray() && Dim >= 2 && Dim <= 3));
return std::string(kOCLBuiltinName::NDRangePrefix) + OS.str() + "D";
}
+ if (isIntelSubgroupOpCode(OC)) {
+ std::stringstream Name;
+ SPIRVType *DataTy = nullptr;
+ switch (OC) {
+ case OpSubgroupBlockReadINTEL:
+ case OpSubgroupImageBlockReadINTEL:
+ Name << "intel_sub_group_block_read";
+ DataTy = BI->getType();
+ break;
+ case OpSubgroupBlockWriteINTEL:
+ Name << "intel_sub_group_block_write";
+ DataTy = BI->getOperands()[1]->getType();
+ break;
+ case OpSubgroupImageBlockWriteINTEL:
+ Name << "intel_sub_group_block_write";
+ DataTy = BI->getOperands()[2]->getType();
+ break;
+ default:
+ return OCLSPIRVBuiltinMap::rmap(OC);
+ }
+ if (DataTy && DataTy->isTypeVector()) {
+ if (DataTy->getVectorComponentType()->getBitWidth() == 16)
+ Name << "_us";
+ if (unsigned ComponentCount = DataTy->getVectorComponentCount())
+ Name << ComponentCount;
+ }
+ return Name.str();
+ }
auto Name = OCLSPIRVBuiltinMap::rmap(OC);
SPIRVType *T = nullptr;
diff --git a/lib/SPIRV/libSPIRV/SPIRVInstruction.h b/lib/SPIRV/libSPIRV/SPIRVInstruction.h index a3e0169..7fb0758 100644 --- a/lib/SPIRV/libSPIRV/SPIRVInstruction.h +++ b/lib/SPIRV/libSPIRV/SPIRVInstruction.h @@ -1867,7 +1867,7 @@ public: setHasNoId();
setHasNoType();
}
- SPIRVCapVec getRequiredCapability() const {
+ SPIRVCapVec getRequiredCapability() const override {
return getVec(CapabilityKernel);
}
SPIRVValue *getObject() { return getValue(Object); };
@@ -1952,7 +1952,7 @@ enum SPIRVOpKind { class SPIRVDevEnqInstBase:public SPIRVInstTemplateBase {
public:
- SPIRVCapVec getRequiriedCapability() const {
+ SPIRVCapVec getRequiredCapability() const override {
return getVec(CapabilityDeviceEnqueue);
}
};
@@ -1979,7 +1979,7 @@ _SPIRV_OP(BuildNDRange, true, 6) class SPIRVPipeInstBase:public SPIRVInstTemplateBase {
public:
- SPIRVCapVec getRequiriedCapability() const {
+ SPIRVCapVec getRequiredCapability() const override {
return getVec(CapabilityPipes);
}
};
@@ -2003,7 +2003,7 @@ _SPIRV_OP(GetMaxPipePackets, true, 6) class SPIRVPipeStorageInstBase :public SPIRVInstTemplateBase {
public:
- SPIRVCapVec getRequiriedCapability() const {
+ SPIRVCapVec getRequiredCapability() const override {
return getVec(CapabilityPipeStorage, CapabilityPipes);
}
};
@@ -2017,7 +2017,7 @@ _SPIRV_OP(CreatePipeFromPipeStorage, true, 4) class SPIRVGroupInstBase:public SPIRVInstTemplateBase {
public:
- SPIRVCapVec getRequiriedCapability() const {
+ SPIRVCapVec getRequiredCapability() const override {
return getVec(CapabilityGroups);
}
};
@@ -2046,7 +2046,7 @@ _SPIRV_OP(GroupCommitWritePipe, false, 6) class SPIRVAtomicInstBase:public SPIRVInstTemplateBase {
public:
- SPIRVCapVec getRequiriedCapability() const {
+ SPIRVCapVec getRequiredCapability() const override {
return getVec(CapabilityInt64Atomics);
}
};
@@ -2078,7 +2078,7 @@ _SPIRV_OP(MemoryBarrier, false, 3) class SPIRVImageInstBase:public SPIRVInstTemplateBase {
public:
- SPIRVCapVec getRequiriedCapability() const {
+ SPIRVCapVec getRequiredCapability() const override {
return getVec(CapabilityImageBasic);
}
};
@@ -2110,6 +2110,54 @@ _SPIRV_OP(GenericPtrMemSemantics, true, 4, false) _SPIRV_OP(GenericCastToPtrExplicit, true, 5, false, 1)
#undef _SPIRV_OP
+class SPIRVSubgroupShuffleINTELInstBase:public SPIRVInstTemplateBase {
+protected:
+ SPIRVCapVec getRequiredCapability() const override {
+ return getVec(CapabilitySubgroupShuffleINTEL);
+ }
+};
+
+#define _SPIRV_OP(x, ...) \
+ typedef SPIRVInstTemplate<SPIRVSubgroupShuffleINTELInstBase, Op##x, __VA_ARGS__> \
+ SPIRV##x;
+// Intel Subgroup Shuffle Instructions
+_SPIRV_OP(SubgroupShuffleINTEL, true, 5)
+_SPIRV_OP(SubgroupShuffleDownINTEL, true, 6)
+_SPIRV_OP(SubgroupShuffleUpINTEL, true, 6)
+_SPIRV_OP(SubgroupShuffleXorINTEL, true, 5)
+#undef _SPIRV_OP
+
+class SPIRVSubgroupBufferBlockIOINTELInstBase:public SPIRVInstTemplateBase {
+protected:
+ SPIRVCapVec getRequiredCapability() const override {
+ return getVec(CapabilitySubgroupBufferBlockIOINTEL);
+ }
+};
+
+#define _SPIRV_OP(x, ...) \
+ typedef SPIRVInstTemplate<SPIRVSubgroupBufferBlockIOINTELInstBase, Op##x, __VA_ARGS__> \
+ SPIRV##x;
+// Intel Subgroup Buffer Block Read and Write Instructions
+_SPIRV_OP(SubgroupBlockReadINTEL, true, 4)
+_SPIRV_OP(SubgroupBlockWriteINTEL, false, 3)
+#undef _SPIRV_OP
+
+class SPIRVSubgroupImageBlockIOINTELInstBase:public SPIRVInstTemplateBase {
+protected:
+ SPIRVCapVec getRequiredCapability() const override {
+ return getVec(CapabilitySubgroupImageBlockIOINTEL);
+ }
+};
+
+#define _SPIRV_OP(x, ...) \
+ typedef SPIRVInstTemplate<SPIRVSubgroupImageBlockIOINTELInstBase, Op##x, __VA_ARGS__> \
+ SPIRV##x;
+// Intel Subgroup Image Block Read and Write Instructions
+_SPIRV_OP(SubgroupImageBlockReadINTEL, true, 5)
+_SPIRV_OP(SubgroupImageBlockWriteINTEL, false, 4)
+#undef _SPIRV_OP
+
+
SPIRVSpecConstantOp *createSpecConstantOpInst(SPIRVInstruction *Inst);
SPIRVInstruction *createInstFromSpecConstantOp(SPIRVSpecConstantOp *C);
}
diff --git a/lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h b/lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h index c380946..193c236 100644 --- a/lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h +++ b/lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h @@ -884,6 +884,14 @@ isValid(spv::Op V) { case OpMemoryNamedBarrier: case OpModuleProcessed: case OpForward: + case OpSubgroupShuffleINTEL: + case OpSubgroupShuffleDownINTEL: + case OpSubgroupShuffleUpINTEL: + case OpSubgroupShuffleXorINTEL: + case OpSubgroupBlockReadINTEL: + case OpSubgroupBlockWriteINTEL: + case OpSubgroupImageBlockReadINTEL: + case OpSubgroupImageBlockWriteINTEL: return true; default: return false; diff --git a/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h b/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h index 4128071..fc66cb7 100644 --- a/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h +++ b/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h @@ -478,6 +478,9 @@ SPIRVMap<Capability, std::string>::init() { add(CapabilityStorageImageReadWithoutFormat, "StorageImageReadWithoutFormat"); add(CapabilityStorageImageWriteWithoutFormat, "StorageImageWriteWithoutFormat"); add(CapabilityMultiViewport, "MultiViewport"); + add(CapabilitySubgroupShuffleINTEL, "SubgroupShuffleINTEL"); + add(CapabilitySubgroupBufferBlockIOINTEL, "SubgroupBufferBlockIOINTEL"); + add(CapabilitySubgroupImageBlockIOINTEL, "SubgroupImageBlockIOINTEL"); } SPIRV_DEF_NAMEMAP(Capability, SPIRVCapabilityNameMap) diff --git a/lib/SPIRV/libSPIRV/SPIRVOpCode.h b/lib/SPIRV/libSPIRV/SPIRVOpCode.h index 8cb8ce7..f502fa2 100644 --- a/lib/SPIRV/libSPIRV/SPIRVOpCode.h +++ b/lib/SPIRV/libSPIRV/SPIRVOpCode.h @@ -169,6 +169,10 @@ inline bool isModuleScopeAllowedOpCode(Op OpCode) { isConstantOpCode(OpCode);
}
+inline bool isIntelSubgroupOpCode(Op OpCode) {
+ unsigned OC = OpCode;
+ return OpSubgroupShuffleINTEL <= OC && OC <=OpSubgroupImageBlockWriteINTEL;
+}
}
#endif /* SPIRVOPCODE_HPP_ */
diff --git a/lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h b/lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h index 04b2d7c..bd81aba 100644 --- a/lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h +++ b/lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h @@ -295,3 +295,11 @@ _SPIRV_OP(TypePipeStorage, 322) _SPIRV_OP(ConstantPipeStorage, 323)
_SPIRV_OP(CreatePipeFromPipeStorage, 324)
_SPIRV_OP(Forward, 1024)
+_SPIRV_OP(SubgroupShuffleINTEL, 5571) +_SPIRV_OP(SubgroupShuffleDownINTEL, 5572) +_SPIRV_OP(SubgroupShuffleUpINTEL, 5573) +_SPIRV_OP(SubgroupShuffleXorINTEL, 5574) +_SPIRV_OP(SubgroupBlockReadINTEL, 5575) +_SPIRV_OP(SubgroupBlockWriteINTEL, 5576) +_SPIRV_OP(SubgroupImageBlockReadINTEL, 5577) +_SPIRV_OP(SubgroupImageBlockWriteINTEL, 5578) diff --git a/lib/SPIRV/libSPIRV/SPIRVType.h b/lib/SPIRV/libSPIRV/SPIRVType.h index 3b668e1..fe80d5d 100644 --- a/lib/SPIRV/libSPIRV/SPIRVType.h +++ b/lib/SPIRV/libSPIRV/SPIRVType.h @@ -281,7 +281,7 @@ public: bool isValidIndex(SPIRVWord Index) const { return Index < CompCount;}
SPIRVCapVec getRequiredCapability() const {
SPIRVCapVec V(getComponentType()->getRequiredCapability());
- if (CompCount >= 8)
+ if (CompCount > 8)
V.push_back(CapabilityVector16);
return std::move(V);
}
diff --git a/lib/SPIRV/libSPIRV/spirv.hpp b/lib/SPIRV/libSPIRV/spirv.hpp index b072d9f..00ddd51 100644 --- a/lib/SPIRV/libSPIRV/spirv.hpp +++ b/lib/SPIRV/libSPIRV/spirv.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2014-2016 The Khronos Group Inc. +// Copyright (c) 2014-2017 The Khronos Group Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and/or associated documentation files (the "Materials"), @@ -47,11 +47,11 @@ namespace spv { typedef unsigned int Id; #define SPV_VERSION 0x10100 -#define SPV_REVISION 1 +#define SPV_REVISION 7 static const unsigned int MagicNumber = 0x07230203; static const unsigned int Version = 0x00010100; -static const unsigned int Revision = 1; +static const unsigned int Revision = 7; static const unsigned int OpCodeMask = 0xffff; static const unsigned int WordCountShift = 16; @@ -61,6 +61,8 @@ enum SourceLanguage { SourceLanguageGLSL = 2, SourceLanguageOpenCL_C = 3, SourceLanguageOpenCL_CPP = 4, + SourceLanguageHLSL = 5, + SourceLanguageMax = 0x7fffffff, }; enum ExecutionModel { @@ -71,18 +73,21 @@ enum ExecutionModel { ExecutionModelFragment = 4, ExecutionModelGLCompute = 5, ExecutionModelKernel = 6, + ExecutionModelMax = 0x7fffffff, }; enum AddressingModel { AddressingModelLogical = 0, AddressingModelPhysical32 = 1, AddressingModelPhysical64 = 2, + AddressingModelMax = 0x7fffffff, }; enum MemoryModel { MemoryModelSimple = 0, MemoryModelGLSL450 = 1, MemoryModelOpenCL = 2, + MemoryModelMax = 0x7fffffff, }; enum ExecutionMode { @@ -121,6 +126,7 @@ enum ExecutionMode { ExecutionModeFinalizer = 34, ExecutionModeSubgroupSize = 35, ExecutionModeSubgroupsPerWorkgroup = 36, + ExecutionModeMax = 0x7fffffff, }; enum StorageClass { @@ -136,6 +142,8 @@ enum StorageClass { StorageClassPushConstant = 9, StorageClassAtomicCounter = 10, StorageClassImage = 11, + StorageClassStorageBuffer = 12, + StorageClassMax = 0x7fffffff, }; enum Dim { @@ -146,6 +154,7 @@ enum Dim { DimRect = 4, DimBuffer = 5, DimSubpassData = 6, + DimMax = 0x7fffffff, }; enum SamplerAddressingMode { @@ -154,11 +163,13 @@ enum SamplerAddressingMode { SamplerAddressingModeClamp = 2, SamplerAddressingModeRepeat = 3, SamplerAddressingModeRepeatMirrored = 4, + SamplerAddressingModeMax = 0x7fffffff, }; enum SamplerFilterMode { SamplerFilterModeNearest = 0, SamplerFilterModeLinear = 1, + SamplerFilterModeMax = 0x7fffffff, }; enum ImageFormat { @@ -202,6 +213,7 @@ enum ImageFormat { ImageFormatRg8ui = 37, ImageFormatR16ui = 38, ImageFormatR8ui = 39, + ImageFormatMax = 0x7fffffff, }; enum ImageChannelOrder { @@ -225,6 +237,7 @@ enum ImageChannelOrder { ImageChannelOrdersRGBA = 17, ImageChannelOrdersBGRA = 18, ImageChannelOrderABGR = 19, + ImageChannelOrderMax = 0x7fffffff, }; enum ImageChannelDataType { @@ -245,6 +258,7 @@ enum ImageChannelDataType { ImageChannelDataTypeFloat = 14, ImageChannelDataTypeUnormInt24 = 15, ImageChannelDataTypeUnormInt101010_2 = 16, + ImageChannelDataTypeMax = 0x7fffffff, }; enum ImageOperandsShift { @@ -256,6 +270,7 @@ enum ImageOperandsShift { ImageOperandsConstOffsetsShift = 5, ImageOperandsSampleShift = 6, ImageOperandsMinLodShift = 7, + ImageOperandsMax = 0x7fffffff, }; enum ImageOperandsMask { @@ -276,6 +291,7 @@ enum FPFastMathModeShift { FPFastMathModeNSZShift = 2, FPFastMathModeAllowRecipShift = 3, FPFastMathModeFastShift = 4, + FPFastMathModeMax = 0x7fffffff, }; enum FPFastMathModeMask { @@ -292,18 +308,21 @@ enum FPRoundingMode { FPRoundingModeRTZ = 1, FPRoundingModeRTP = 2, FPRoundingModeRTN = 3, + FPRoundingModeMax = 0x7fffffff, }; enum LinkageType { LinkageTypeExport = 0, LinkageTypeImport = 1, LinkageTypeInternal, /* internal use only */ + LinkageTypeMax = 0x7fffffff, }; enum AccessQualifier { AccessQualifierReadOnly = 0, AccessQualifierWriteOnly = 1, AccessQualifierReadWrite = 2, + AccessQualifierMax = 0x7fffffff, }; enum FunctionParameterAttribute { @@ -315,6 +334,7 @@ enum FunctionParameterAttribute { FunctionParameterAttributeNoCapture = 5, FunctionParameterAttributeNoWrite = 6, FunctionParameterAttributeNoReadWrite = 7, + FunctionParameterAttributeMax = 0x7fffffff, }; enum Decoration { @@ -362,6 +382,11 @@ enum Decoration { DecorationInputAttachmentIndex = 43, DecorationAlignment = 44, DecorationMaxByteOffset = 45, + DecorationOverrideCoverageNV = 5248, + DecorationPassthroughNV = 5250, + DecorationViewportRelativeNV = 5252, + DecorationSecondaryViewportRelativeNV = 5256, + DecorationMax = 0x7fffffff, }; enum BuiltIn { @@ -406,11 +431,28 @@ enum BuiltIn { BuiltInSubgroupLocalInvocationId = 41, BuiltInVertexIndex = 42, BuiltInInstanceIndex = 43, + BuiltInSubgroupEqMaskKHR = 4416, + BuiltInSubgroupGeMaskKHR = 4417, + BuiltInSubgroupGtMaskKHR = 4418, + BuiltInSubgroupLeMaskKHR = 4419, + BuiltInSubgroupLtMaskKHR = 4420, + BuiltInBaseVertex = 4424, + BuiltInBaseInstance = 4425, + BuiltInDrawIndex = 4426, + BuiltInDeviceIndex = 4438, + BuiltInViewIndex = 4440, + BuiltInViewportMaskNV = 5253, + BuiltInSecondaryPositionNV = 5257, + BuiltInSecondaryViewportMaskNV = 5258, + BuiltInPositionPerViewNV = 5261, + BuiltInViewportMaskPerViewNV = 5262, + BuiltInMax = 0x7fffffff, }; enum SelectionControlShift { SelectionControlFlattenShift = 0, SelectionControlDontFlattenShift = 1, + SelectionControlMax = 0x7fffffff, }; enum SelectionControlMask { @@ -424,6 +466,7 @@ enum LoopControlShift { LoopControlDontUnrollShift = 1, LoopControlDependencyInfiniteShift = 2, LoopControlDependencyLengthShift = 3, + LoopControlMax = 0x7fffffff, }; enum LoopControlMask { @@ -439,6 +482,7 @@ enum FunctionControlShift { FunctionControlDontInlineShift = 1, FunctionControlPureShift = 2, FunctionControlConstShift = 3, + FunctionControlMax = 0x7fffffff, }; enum FunctionControlMask { @@ -460,6 +504,7 @@ enum MemorySemanticsShift { MemorySemanticsCrossWorkgroupMemoryShift = 9, MemorySemanticsAtomicCounterMemoryShift = 10, MemorySemanticsImageMemoryShift = 11, + MemorySemanticsMax = 0x7fffffff, }; enum MemorySemanticsMask { @@ -480,6 +525,7 @@ enum MemoryAccessShift { MemoryAccessVolatileShift = 0, MemoryAccessAlignedShift = 1, MemoryAccessNontemporalShift = 2, + MemoryAccessMax = 0x7fffffff, }; enum MemoryAccessMask { @@ -495,22 +541,26 @@ enum Scope { ScopeWorkgroup = 2, ScopeSubgroup = 3, ScopeInvocation = 4, + ScopeMax = 0x7fffffff, }; enum GroupOperation { GroupOperationReduce = 0, GroupOperationInclusiveScan = 1, GroupOperationExclusiveScan = 2, + GroupOperationMax = 0x7fffffff, }; enum KernelEnqueueFlags { KernelEnqueueFlagsNoWait = 0, KernelEnqueueFlagsWaitKernel = 1, KernelEnqueueFlagsWaitWorkGroup = 2, + KernelEnqueueFlagsMax = 0x7fffffff, }; enum KernelProfilingInfoShift { KernelProfilingInfoCmdExecTimeShift = 0, + KernelProfilingInfoMax = 0x7fffffff, }; enum KernelProfilingInfoMask { @@ -578,6 +628,29 @@ enum Capability { CapabilitySubgroupDispatch = 58, CapabilityNamedBarrier = 59, CapabilityPipeStorage = 60, + CapabilitySubgroupBallotKHR = 4423, + CapabilityDrawParameters = 4427, + CapabilitySubgroupVoteKHR = 4431, + CapabilityStorageBuffer16BitAccess = 4433, + CapabilityStorageUniformBufferBlock16 = 4433, + CapabilityStorageUniform16 = 4434, + CapabilityUniformAndStorageBuffer16BitAccess = 4434, + CapabilityStoragePushConstant16 = 4435, + CapabilityStorageInputOutput16 = 4436, + CapabilityDeviceGroup = 4437, + CapabilityMultiView = 4439, + CapabilityVariablePointersStorageBuffer = 4441, + CapabilityVariablePointers = 4442, + CapabilitySampleMaskOverrideCoverageNV = 5249, + CapabilityGeometryShaderPassthroughNV = 5251, + CapabilityShaderViewportIndexLayerNV = 5254, + CapabilityShaderViewportMaskNV = 5255, + CapabilityShaderStereoViewNV = 5259, + CapabilityPerViewAttributesNV = 5260, + CapabilitySubgroupShuffleINTEL = 5568, + CapabilitySubgroupBufferBlockIOINTEL = 5569, + CapabilitySubgroupImageBlockIOINTEL = 5570, + CapabilityMax = 0x7fffffff, }; enum Op { @@ -886,6 +959,21 @@ enum Op { OpMemoryNamedBarrier = 329, OpModuleProcessed = 330, OpForward = 1024, /* internal use only */ + OpSubgroupBallotKHR = 4421, + OpSubgroupFirstInvocationKHR = 4422, + OpSubgroupAllKHR = 4428, + OpSubgroupAnyKHR = 4429, + OpSubgroupAllEqualKHR = 4430, + OpSubgroupReadInvocationKHR = 4432, + OpSubgroupShuffleINTEL = 5571, + OpSubgroupShuffleDownINTEL = 5572, + OpSubgroupShuffleUpINTEL = 5573, + OpSubgroupShuffleXorINTEL = 5574, + OpSubgroupBlockReadINTEL = 5575, + OpSubgroupBlockWriteINTEL = 5576, + OpSubgroupImageBlockReadINTEL = 5577, + OpSubgroupImageBlockWriteINTEL = 5578, + OpMax = 0x7fffffff, }; // Overload operator| for mask bit combining diff --git a/test/transcoding/cl_intel_sub_groups.ll b/test/transcoding/cl_intel_sub_groups.ll new file mode 100644 index 0000000..edf4945 --- /dev/null +++ b/test/transcoding/cl_intel_sub_groups.ll @@ -0,0 +1,126 @@ +;Source: +;void __kernel test(float2 x, uint c, +; read_only image2d_t image_in, +; write_only image2d_t image_out, +; int2 coord, +; __global uint* p, +; __global ushort* sp) { +; intel_sub_group_shuffle(x, c); +; intel_sub_group_shuffle_down(x, x, c); +; intel_sub_group_shuffle_up(x, x, c); +; intel_sub_group_shuffle_xor(x, c); +; +; uint2 ui2 = intel_sub_group_block_read2(image_in, coord); +; intel_sub_group_block_write2(p, ui2); +; intel_sub_group_block_write2(image_out, coord, ui2); +; +; ushort2 us2 = intel_sub_group_block_read_us2(sp); +; intel_sub_group_block_write_us2(sp, us2); +; intel_sub_group_block_write_us2(image_out, coord, us2); +;} + +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-spirv %t.bc -o - -spirv-text | FileCheck %s --check-prefix=CHECK-SPIRV +; RUN: llvm-spirv %t.bc -o %t.spv +; RUN: llvm-spirv -r -spirv-gen-image-type-acc-postfix %t.spv -o %t.rev.bc +; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM + +; CHECK-SPIRV: Capability SubgroupShuffleINTEL +; CHECK-SPIRV: Capability SubgroupBufferBlockIOINTEL +; CHECK-SPIRV: Capability SubgroupImageBlockIOINTEL +; CHECK-SPIRV: Extension "cl_intel_subgroups" +; CHECK-SPIRV: Extension "cl_intel_subgroups_short" + +; CHECK-SPIRV: SubgroupShuffleINTEL +; CHECK-SPIRV: SubgroupShuffleDownINTEL +; CHECK-SPIRV: SubgroupShuffleUpINTEL +; CHECK-SPIRV: SubgroupShuffleXorINTEL + +; CHECK-SPIRV: SubgroupImageBlockReadINTEL +; CHECK-SPIRV: SubgroupBlockWriteINTEL +; CHECK-SPIRV: SubgroupImageBlockWriteINTEL + +; CHECK-SPIRV: SubgroupBlockReadINTEL +; CHECK-SPIRV: SubgroupBlockWriteINTEL +; CHECK-SPIRV: SubgroupImageBlockWriteINTEL + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64" + +%opencl.image2d_ro_t = type opaque +%opencl.image2d_wo_t = type opaque + +; Function Attrs: nounwind +define spir_kernel void @test(<2 x float> %x, i32 %c, %opencl.image2d_ro_t addrspace(1)* %image_in, %opencl.image2d_wo_t addrspace(1)* %image_out, <2 x i32> %coord, i32 addrspace(1)* %p, i16 addrspace(1)* %sp) #0 { +entry: + %call = tail call spir_func <2 x float> @_Z23intel_sub_group_shuffleDv2_fj(<2 x float> %x, i32 %c) #3 + %call1 = tail call spir_func <2 x float> @_Z28intel_sub_group_shuffle_downDv2_fDv2_fj(<2 x float> %x, <2 x float> %x, i32 %c) #3 + %call2 = tail call spir_func <2 x float> @_Z26intel_sub_group_shuffle_upDv2_fDv2_fj(<2 x float> %x, <2 x float> %x, i32 %c) #3 + %call3 = tail call spir_func <2 x float> @_Z27intel_sub_group_shuffle_xorDv2_fj(<2 x float> %x, i32 %c) #3 +; CHECK-LLVM: call spir_func <2 x float> @_Z23intel_sub_group_shuffle{{.*}}(<2 x float> %x, i32 %c) +; CHECK-LLVM: call spir_func <2 x float> @_Z28intel_sub_group_shuffle_down{{.*}}(<2 x float> %x, <2 x float> %x, i32 %c) +; CHECK-LLVM: call spir_func <2 x float> @_Z26intel_sub_group_shuffle_up{{.*}}(<2 x float> %x, <2 x float> %x, i32 %c) +; CHECK-LLVM: call spir_func <2 x float> @_Z27intel_sub_group_shuffle_xor{{.*}}(<2 x float> %x, i32 %c) + + %call4 = tail call spir_func <2 x i32> @_Z27intel_sub_group_block_read214ocl_image2d_roDv2_i(%opencl.image2d_ro_t addrspace(1)* %image_in, <2 x i32> %coord) #4 + tail call spir_func void @_Z28intel_sub_group_block_write2PU3AS1jDv2_j(i32 addrspace(1)* %p, <2 x i32> %call4) #3 + tail call spir_func void @_Z28intel_sub_group_block_write214ocl_image2d_woDv2_iDv2_j(%opencl.image2d_wo_t addrspace(1)* %image_out, <2 x i32> %coord, <2 x i32> %call4) #3 +; CHECK-LLVM: call spir_func <2 x i32> @_Z27intel_sub_group_block_read2{{.*}}(%opencl.image2d_ro_t addrspace(1)* %image_in, <2 x i32> %coord) +; CHECK-LLVM: call spir_func void @_Z28intel_sub_group_block_write2{{.*}}(i32 addrspace(1)* %p, <2 x i32> %call4) +; CHECK-LLVM: call spir_func void @_Z28intel_sub_group_block_write2{{.*}}(%opencl.image2d_wo_t addrspace(1)* %image_out, <2 x i32> %coord, <2 x i32> %call4) + + %call5 = tail call spir_func <2 x i16> @_Z30intel_sub_group_block_read_us2PKU3AS1t(i16 addrspace(1)* %sp) #4 + tail call spir_func void @_Z31intel_sub_group_block_write_us2PU3AS1tDv2_t(i16 addrspace(1)* %sp, <2 x i16> %call5) #3 + tail call spir_func void @_Z31intel_sub_group_block_write_us214ocl_image2d_woDv2_iDv2_t(%opencl.image2d_wo_t addrspace(1)* %image_out, <2 x i32> %coord, <2 x i16> %call5) #3 +; CHECK-LLVM: call spir_func <2 x i16> @_Z30intel_sub_group_block_read_us2{{.*}}(i16 addrspace(1)* %sp) +; CHECK-LLVM: call spir_func void @_Z31intel_sub_group_block_write_us2{{.*}}(i16 addrspace(1)* %sp, <2 x i16> %call5) +; CHECK-LLVM: call spir_func void @_Z31intel_sub_group_block_write_us2{{.*}}(%opencl.image2d_wo_t addrspace(1)* %image_out, <2 x i32> %coord, <2 x i16> %call5) + ret void +} + +declare spir_func <2 x float> @_Z23intel_sub_group_shuffleDv2_fj(<2 x float>, i32) #1 + +declare spir_func <2 x float> @_Z28intel_sub_group_shuffle_downDv2_fDv2_fj(<2 x float>, <2 x float>, i32) #1 + +declare spir_func <2 x float> @_Z26intel_sub_group_shuffle_upDv2_fDv2_fj(<2 x float>, <2 x float>, i32) #1 + +declare spir_func <2 x float> @_Z27intel_sub_group_shuffle_xorDv2_fj(<2 x float>, i32) #1 + +; Function Attrs: nounwind readonly +declare spir_func <2 x i32> @_Z27intel_sub_group_block_read214ocl_image2d_roDv2_i(%opencl.image2d_ro_t addrspace(1)*, <2 x i32>) #2 + +declare spir_func void @_Z28intel_sub_group_block_write2PU3AS1jDv2_j(i32 addrspace(1)*, <2 x i32>) #1 + +declare spir_func void @_Z28intel_sub_group_block_write214ocl_image2d_woDv2_iDv2_j(%opencl.image2d_wo_t addrspace(1)*, <2 x i32>, <2 x i32>) #1 + +; Function Attrs: nounwind readonly +declare spir_func <2 x i16> @_Z30intel_sub_group_block_read_us2PKU3AS1t(i16 addrspace(1)*) #2 + +declare spir_func void @_Z31intel_sub_group_block_write_us2PU3AS1tDv2_t(i16 addrspace(1)*, <2 x i16>) #1 + +declare spir_func void @_Z31intel_sub_group_block_write_us214ocl_image2d_woDv2_iDv2_t(%opencl.image2d_wo_t addrspace(1)*, <2 x i32>, <2 x i16>) #1 + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind } +attributes #4 = { nounwind readonly } + +!opencl.kernels = !{!0} +!opencl.enable.FP_CONTRACT = !{} +!opencl.spir.version = !{!6} +!opencl.ocl.version = !{!7} +!opencl.used.extensions = !{!8} +!opencl.used.optional.core.features = !{!9} +!opencl.compiler.options = !{!9} + +!0 = !{void (<2 x float>, i32, %opencl.image2d_ro_t addrspace(1)*, %opencl.image2d_wo_t addrspace(1)*, <2 x i32>, i32 addrspace(1)*, i16 addrspace(1)*)* @test, !1, !2, !3, !4, !5} +!1 = !{!"kernel_arg_addr_space", i32 0, i32 0, i32 1, i32 1, i32 0, i32 1, i32 1} +!2 = !{!"kernel_arg_access_qual", !"none", !"none", !"read_only", !"write_only", !"none", !"none", !"none"} +!3 = !{!"kernel_arg_type", !"float2", !"uint", !"__read_only image2d_t", !"__write_only image2d_t", !"int2", !"uint*", !"ushort*"} +!4 = !{!"kernel_arg_base_type", !"float2", !"uint", !"__read_only image2d_t", !"__write_only image2d_t", !"int2", !"uint*", !"ushort*"} +!5 = !{!"kernel_arg_type_qual", !"", !"", !"", !"", !"", !"", !""} +!6 = !{i32 1, i32 2} +!7 = !{i32 2, i32 0} +!8 = !{!"cl_intel_subgroups", !"cl_intel_subgroups_short"} +!9 = !{} |