diff options
author | Robert Khasanov <rob.khasanov@gmail.com> | 2014-07-21 14:54:21 +0000 |
---|---|---|
committer | Robert Khasanov <rob.khasanov@gmail.com> | 2014-07-21 14:54:21 +0000 |
commit | aac33cfc08baaa8e19994fc58a302804328114ca (patch) | |
tree | 1f9ec7dff5fb549670365862d2a518967c185121 /lib/Target | |
parent | 9f276b7021de37eb4e161067d3c68a2b99a9fae4 (diff) |
[SKX] Enabling SKX target and AVX512BW, AVX512DQ, AVX512VL features.
Enabling HasAVX512{DQ,BW,VL} predicates.
Adding VK2, VK4, VK32, VK64 masked register classes.
Adding new types (v64i8, v32i16) to VR512.
Extending calling conventions for new types (v64i8, v32i16)
Patch by Zinovy Nis <zinovy.y.nis@intel.com>
Reviewed by Elena Demikhovsky <elena.demikhovsky@intel.com>
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213545 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r-- | lib/Target/X86/AsmParser/X86AsmParser.cpp | 2 | ||||
-rw-r--r-- | lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h | 6 | ||||
-rw-r--r-- | lib/Target/X86/X86.td | 21 | ||||
-rw-r--r-- | lib/Target/X86/X86CallingConv.td | 4 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrAVX512.td | 32 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrFormats.td | 2 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.td | 6 | ||||
-rw-r--r-- | lib/Target/X86/X86RegisterInfo.td | 20 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.cpp | 3 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.h | 18 |
10 files changed, 91 insertions, 23 deletions
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index a259c963203..a11a238fc97 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1666,6 +1666,8 @@ bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands, // Recognize only reasonable suffixes. const char *BroadcastPrimitive = StringSwitch<const char*>(getLexer().getTok().getIdentifier()) + .Case("to2", "{1to2}") + .Case("to4", "{1to4}") .Case("to8", "{1to8}") .Case("to16", "{1to16}") .Default(nullptr); diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index 4baaf1e6873..13a7b557b44 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -265,7 +265,7 @@ enum attributeBits { ENUM_ENTRY(IC_EVEX_L2_W_KZ, 3, "requires EVEX_KZ, L2 and W") \ ENUM_ENTRY(IC_EVEX_L2_W_XS_KZ, 4, "requires EVEX_KZ, L2, W and XS prefix") \ ENUM_ENTRY(IC_EVEX_L2_W_XD_KZ, 4, "requires EVEX_KZ, L2, W and XD prefix") \ - ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ, 4, "requires EVEX_KZ, L2, W and OpSize") + ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ, 4, "requires EVEX_KZ, L2, W and OpSize") #define ENUM_ENTRY(n, r, d) n, enum InstructionContext { @@ -453,8 +453,12 @@ enum OperandEncoding { ENUM_ENTRY(TYPE_XMM256, "32-byte") \ ENUM_ENTRY(TYPE_XMM512, "64-byte") \ ENUM_ENTRY(TYPE_VK1, "1-bit") \ + ENUM_ENTRY(TYPE_VK2, "2-bit") \ + ENUM_ENTRY(TYPE_VK4, "4-bit") \ ENUM_ENTRY(TYPE_VK8, "8-bit") \ ENUM_ENTRY(TYPE_VK16, "16-bit") \ + ENUM_ENTRY(TYPE_VK32, "32-bit") \ + ENUM_ENTRY(TYPE_VK64, "64-bit") \ ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \ ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \ ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \ diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 93f516a151a..cd32a0f2423 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -104,7 +104,15 @@ def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true", def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true", "Enable AVX-512 PreFetch Instructions", [FeatureAVX512]>; - +def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true", + "Enable AVX-512 Doubleword and Quadword Instructions", + [FeatureAVX512]>; +def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true", + "Enable AVX-512 Byte and Word Instructions", + [FeatureAVX512]>; +def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true", + "Enable AVX-512 Vector Length eXtensions", + [FeatureAVX512]>; def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true", "Enable packed carry-less multiplication instructions", [FeatureSSE2]>; @@ -276,6 +284,17 @@ def : ProcessorModel<"knl", HaswellModel, FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE, FeatureSlowIncDec]>; +// SKX +// FIXME: define SKX model +def : ProcessorModel<"skx", HaswellModel, + [FeatureAVX512, FeatureCDI, + FeatureDQI, FeatureBWI, FeatureVLX, + FeatureCMPXCHG16B, FeatureFastUAMem, FeaturePOPCNT, + FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C, + FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI, + FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE, + FeatureSlowIncDec]>; + def : Proc<"k6", [FeatureMMX]>; def : Proc<"k6-2", [Feature3DNow]>; def : Proc<"k6-3", [Feature3DNow]>; diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index 0824d4ed660..86c01bd6464 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -52,7 +52,7 @@ def RetCC_X86Common : CallingConv<[ // 512-bit vectors are returned in ZMM0 and ZMM1, when they fit. ZMM2 and ZMM3 // can only be used by ABI non-compliant code. This vector type is only // supported while using the AVX-512 target feature. - CCIfType<[v16i32, v8i64, v16f32, v8f64], + CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64], CCAssignToReg<[ZMM0,ZMM1,ZMM2,ZMM3]>>, // MMX vector types are always returned in MM0. If the target doesn't have @@ -252,7 +252,7 @@ def CC_X86_64_C : CallingConv<[ YMM4, YMM5, YMM6, YMM7]>>>>, // The first 8 512-bit vector arguments are passed in ZMM registers. - CCIfNotVarArg<CCIfType<[v16i32, v8i64, v16f32, v8f64], + CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64], CCIfSubtarget<"hasAVX512()", CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5, ZMM6, ZMM7]>>>>, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 41e900ed11a..d2894088b80 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1,19 +1,36 @@ // Bitcasts between 512-bit vector types. Return the original type since // no instruction is needed for the conversion let Predicates = [HasAVX512] in { - def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>; - def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>; def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>; - def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>; + def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>; + def : Pat<(v8f64 (bitconvert (v32i16 VR512:$src))), (v8f64 VR512:$src)>; + def : Pat<(v8f64 (bitconvert (v64i8 VR512:$src))), (v8f64 VR512:$src)>; + def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>; def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>; + def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>; + def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>; + def : Pat<(v16f32 (bitconvert (v64i8 VR512:$src))), (v16f32 VR512:$src)>; def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>; - def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>; def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>; + def : Pat<(v8i64 (bitconvert (v32i16 VR512:$src))), (v8i64 VR512:$src)>; + def : Pat<(v8i64 (bitconvert (v64i8 VR512:$src))), (v8i64 VR512:$src)>; def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>; - def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>; - def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>; + def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>; + def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>; + def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>; + def : Pat<(v16i32 (bitconvert (v64i8 VR512:$src))), (v16i32 VR512:$src)>; def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>; - def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>; + def : Pat<(v32i16 (bitconvert (v8i64 VR512:$src))), (v32i16 VR512:$src)>; + def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>; + def : Pat<(v32i16 (bitconvert (v64i8 VR512:$src))), (v32i16 VR512:$src)>; + def : Pat<(v32i16 (bitconvert (v8f64 VR512:$src))), (v32i16 VR512:$src)>; + def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>; + def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>; + def : Pat<(v64i8 (bitconvert (v8i64 VR512:$src))), (v64i8 VR512:$src)>; + def : Pat<(v64i8 (bitconvert (v16i32 VR512:$src))), (v64i8 VR512:$src)>; + def : Pat<(v64i8 (bitconvert (v32i16 VR512:$src))), (v64i8 VR512:$src)>; + def : Pat<(v64i8 (bitconvert (v8f64 VR512:$src))), (v64i8 VR512:$src)>; + def : Pat<(v64i8 (bitconvert (v16f32 VR512:$src))), (v64i8 VR512:$src)>; def : Pat<(v2i64 (bitconvert (v4i32 VR128X:$src))), (v2i64 VR128X:$src)>; def : Pat<(v2i64 (bitconvert (v8i16 VR128X:$src))), (v2i64 VR128X:$src)>; @@ -135,7 +152,6 @@ def VINSERTI32x4rm : AVX512AIi8<0x38, MRMSrcMem, (outs VR512:$dst), (ins VR512:$src1, i128mem:$src2, i8imm:$src3), "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>; - } let hasSideEffects = 0 in { diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index ab6d2885fb3..8ef5f901c18 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -184,6 +184,8 @@ class EVEX_KZ : EVEX_K { bit hasEVEX_Z = 1; } class EVEX_B { bit hasEVEX_B = 1; } class EVEX_RC { bit hasEVEX_RC = 1; } class EVEX_V512 { bit hasEVEX_L2 = 1; bit hasVEX_L = 0; } +class EVEX_V256 { bit hasEVEX_L2 = 0; bit hasVEX_L = 1; } +class EVEX_V128 { bit hasEVEX_L2 = 0; bit hasVEX_L = 0; } // Specify AVX512 8-bit compressed displacement encoding based on the vector // element size in bits (8, 16, 32, 64) and the CDisp8 form. diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index e7b532c6af8..0f872a676c2 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -719,10 +719,14 @@ def HasAVX512 : Predicate<"Subtarget->hasAVX512()">, AssemblerPredicate<"FeatureAVX512", "AVX-512 ISA">; def UseAVX : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX512()">; def UseAVX2 : Predicate<"Subtarget->hasAVX2() && !Subtarget->hasAVX512()">; -def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">; +def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">; def HasCDI : Predicate<"Subtarget->hasCDI()">; def HasPFI : Predicate<"Subtarget->hasPFI()">; def HasERI : Predicate<"Subtarget->hasERI()">; +def HasDQI : Predicate<"Subtarget->hasDQI()">; +def HasBWI : Predicate<"Subtarget->hasBWI()">; +def HasVLX : Predicate<"Subtarget->hasVLX()">, + AssemblerPredicate<"FeatureVLX", "AVX-512 VLX ISA">; def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; def HasAES : Predicate<"Subtarget->hasAES()">; diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 33c402b69a4..0da98637496 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -449,7 +449,7 @@ def FPCCR : RegisterClass<"X86", [i16], 16, (add FPSW)> { } // AVX-512 vector/mask registers. -def VR512 : RegisterClass<"X86", [v16f32, v8f64, v16i32, v8i64], 512, +def VR512 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64], 512, (sequence "ZMM%u", 0, 31)>; // Scalar AVX-512 floating point registers. @@ -463,13 +463,19 @@ def VR128X : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], def VR256X : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], 256, (sequence "YMM%u", 0, 31)>; -// The size of the all masked registers is 16 bit because we have only one -// KMOVW istruction that can store this register in memory, and it writes 2 bytes -def VK1 : RegisterClass<"X86", [i1], 16, (sequence "K%u", 0, 7)>; -def VK8 : RegisterClass<"X86", [v8i1], 16, (add VK1)> {let Size = 16;} +// Mask registers +def VK1 : RegisterClass<"X86", [i1], 16, (sequence "K%u", 0, 7)> {let Size = 16;} +def VK2 : RegisterClass<"X86", [v2i1], 16, (add VK1)> {let Size = 16;} +def VK4 : RegisterClass<"X86", [v4i1], 16, (add VK2)> {let Size = 16;} +def VK8 : RegisterClass<"X86", [v8i1], 16, (add VK4)> {let Size = 16;} def VK16 : RegisterClass<"X86", [v16i1], 16, (add VK8)> {let Size = 16;} +def VK32 : RegisterClass<"X86", [v32i1], 32, (add VK16)> {let Size = 32;} +def VK64 : RegisterClass<"X86", [v64i1], 64, (add VK32)> {let Size = 64;} def VK1WM : RegisterClass<"X86", [i1], 16, (sub VK1, K0)> {let Size = 16;} +def VK2WM : RegisterClass<"X86", [v2i1], 16, (sub VK2, K0)> {let Size = 16;} +def VK4WM : RegisterClass<"X86", [v4i1], 16, (sub VK4, K0)> {let Size = 16;} def VK8WM : RegisterClass<"X86", [v8i1], 16, (sub VK8, K0)> {let Size = 16;} -def VK16WM : RegisterClass<"X86", [v16i1], 16, (add VK8WM)>; - +def VK16WM : RegisterClass<"X86", [v16i1], 16, (add VK8WM)> {let Size = 16;} +def VK32WM : RegisterClass<"X86", [v32i1], 32, (add VK16WM)> {let Size = 32;} +def VK64WM : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;} diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index d1b71c48a92..41551a1d677 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -272,6 +272,9 @@ void X86Subtarget::initializeEnvironment() { HasERI = false; HasCDI = false; HasPFI = false; + HasDQI = false; + HasBWI = false; + HasVLX = false; HasADX = false; HasSHA = false; HasPRFCHW = false; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 30c3b08a890..5f5df5e0818 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -189,13 +189,22 @@ protected: /// Processor has AVX-512 PreFetch Instructions bool HasPFI; - + /// Processor has AVX-512 Exponential and Reciprocal Instructions bool HasERI; - + /// Processor has AVX-512 Conflict Detection Instructions bool HasCDI; - + + /// Processor has AVX-512 Doubleword and Quadword instructions + bool HasDQI; + + /// Processor has AVX-512 Byte and Word instructions + bool HasBWI; + + /// Processor has AVX-512 Vector Length eXtenstions + bool HasVLX; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -349,6 +358,9 @@ public: bool hasCDI() const { return HasCDI; } bool hasPFI() const { return HasPFI; } bool hasERI() const { return HasERI; } + bool hasDQI() const { return HasDQI; } + bool hasBWI() const { return HasBWI; } + bool hasVLX() const { return HasVLX; } bool isAtom() const { return X86ProcFamily == IntelAtom; } bool isSLM() const { return X86ProcFamily == IntelSLM; } |