diff options
author | Ruiling Song <ruiling.song@intel.com> | 2015-12-04 16:30:30 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2015-12-14 15:06:12 +0800 |
commit | cf5857f9f706800e421637b45fa3e0e39957384d (patch) | |
tree | 91dc314c036e6307ffdd80b7b60e4b1fdde2a9a1 | |
parent | 6e6c2ca2ebccfb7dd8f598860dd80599c84ccbbf (diff) |
OCL20/GBE: Fix 64bit pointer issue in Load store instruction selection.
previously we do not handle 64bit pointer correctly.
Signed-off-by: Ruiling Song <ruiling.song@intel.com>
Reviewed-by: Luo, Xionghu <xionghu.luo@intel.com>
-rw-r--r-- | backend/src/backend/gen_insn_selection.cpp | 79 |
1 files changed, 59 insertions, 20 deletions
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 63e36ef6..f3008e0d 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -3720,7 +3720,13 @@ namespace gbe if (sel.isScalarReg(addr.reg())) { sel.curr.noMask = 1; } - sel.SHR(addrDW, GenRegister::retype(addr, GEN_TYPE_UD), GenRegister::immud(2)); + if (sel.getRegisterFamily(addr.reg())) { + // as we still use offset instead of absolut graphics address, + // it is safe to convert from u64 to u32 + GenRegister t = convertU64ToU32(sel, addr); + sel.SHR(addrDW, t, GenRegister::immud(2)); + } else + sel.SHR(addrDW, GenRegister::retype(addr, GEN_TYPE_UD), GenRegister::immud(2)); sel.pop(); sel.DWORD_GATHER(dst, addrDW, BTI_CONSTANT); @@ -3797,6 +3803,7 @@ namespace gbe dst[dstID] = sel.selReg(insn.getValue(dstID), ir::TYPE_U64); bool isUniform = sel.isScalarReg(insn.getValue(0)); + unsigned addrBytes = typeSize(addr.type); AddressMode AM = insn.getAddressMode(); vector<GenRegister> btiTemp = sel.getBTITemps(AM); sel.push(); @@ -3814,7 +3821,10 @@ namespace gbe read64Legacy(sel, addr, dst, b, btiTemp); } else if (addrSpace == MEM_LOCAL || addrSpace == MEM_CONSTANT) { GenRegister b = GenRegister::immud(addrSpace == MEM_LOCAL? 0xfe : BTI_CONSTANT); - read64Legacy(sel, addr, dst, b, btiTemp); + GenRegister addrDW = addr; + if (addrBytes == 8) + addrDW = convertU64ToU32(sel, addr); + read64Legacy(sel, addrDW, dst, b, btiTemp); } else { read64Stateless(sel, addr, dst); } @@ -3830,9 +3840,12 @@ namespace gbe ir::AddressSpace addrSpace) const { using namespace ir; - Register tmpReg = sel.reg(FAMILY_DWORD); - GenRegister tmpAddr = sel.selReg(sel.reg(FAMILY_DWORD, isUniform), ir::TYPE_U32); + RegisterFamily addrFamily = sel.getRegisterFamily(address.reg()); + Type addrType = getType(addrFamily); + Register tmpReg = sel.reg(FAMILY_DWORD, isUniform); + GenRegister tmpAddr = sel.selReg(sel.reg(addrFamily, isUniform), addrType); GenRegister tmpData = sel.selReg(tmpReg, ir::TYPE_U32); + GenRegister addrOffset = sel.selReg(sel.reg(FAMILY_DWORD, isUniform), ir::TYPE_U32); // Get dword aligned addr sel.push(); @@ -3840,7 +3853,11 @@ namespace gbe sel.curr.noMask = 1; sel.curr.execWidth = 1; } - sel.AND(tmpAddr, GenRegister::retype(address,GEN_TYPE_UD), GenRegister::immud(0xfffffffc)); + if (addrFamily == FAMILY_DWORD) + sel.AND(tmpAddr, GenRegister::retype(address,GEN_TYPE_UD), GenRegister::immud(0xfffffffc)); + else + sel.AND(tmpAddr, GenRegister::retype(address,GEN_TYPE_UL), GenRegister::immuint64(0xfffffffffffffffc)); + sel.pop(); sel.push(); vector<GenRegister> tmp; @@ -3852,9 +3869,13 @@ namespace gbe if (isUniform) sel.curr.execWidth = 1; // Get the remaining offset from aligned addr - sel.AND(tmpAddr, GenRegister::retype(address,GEN_TYPE_UD), GenRegister::immud(0x3)); - sel.SHL(tmpAddr, tmpAddr, GenRegister::immud(0x3)); - sel.SHR(tmpData, tmpData, tmpAddr); + if (addrFamily == FAMILY_QWORD) { + sel.AND(addrOffset, sel.unpacked_ud(address.reg()), GenRegister::immud(0x3)); + } else { + sel.AND(addrOffset, GenRegister::retype(address,GEN_TYPE_UD), GenRegister::immud(0x3)); + } + sel.SHL(addrOffset, addrOffset, GenRegister::immud(0x3)); + sel.SHR(tmpData, tmpData, addrOffset); if (elemSize == GEN_BYTE_SCATTER_WORD) sel.MOV(GenRegister::retype(dst, GEN_TYPE_UW), GenRegister::unpacked_uw(tmpReg, isUniform, sel.isLongReg(tmpReg))); @@ -3908,6 +3929,7 @@ namespace gbe using namespace ir; GBE_ASSERT(effectData.size() == effectDataNum); GBE_ASSERT(tmp.size() == effectDataNum + 1); + RegisterFamily addrFamily = sel.getRegisterFamily(address.reg()); sel.push(); Register alignedFlag = sel.reg(FAMILY_BOOL, isUniform); GenRegister shiftL = sel.selReg(sel.reg(FAMILY_DWORD, isUniform), ir::TYPE_U32); @@ -3916,7 +3938,12 @@ namespace gbe sel.push(); if (isUniform) sel.curr.noMask = 1; - sel.AND(shiftL, GenRegister::retype(address, GEN_TYPE_UD), GenRegister::immud(0x3)); + if (addrFamily == FAMILY_QWORD) { + GenRegister t = convertU64ToU32(sel, address); + sel.AND(shiftL, t, GenRegister::immud(0x3)); + } else { + sel.AND(shiftL, GenRegister::retype(address,GEN_TYPE_UD), GenRegister::immud(0x3)); + } sel.SHL(shiftL, shiftL, GenRegister::immud(0x3)); sel.ADD(shiftH, GenRegister::negate(shiftL), GenRegister::immud(32)); sel.curr.physicalFlag = 0; @@ -4026,6 +4053,8 @@ namespace gbe 1 : sel.ctx.getSimdWidth(); const bool isUniform = simdWidth == 1; RegisterFamily family = getFamily(insn.getValueType()); + RegisterFamily addrFamily = sel.getRegisterFamily(address.reg()); + Type addrType = getType(addrFamily); if(valueNum > 1) { GBE_ASSERT(!isUniform && "vector load should not be uniform. Something went wrong."); @@ -4041,11 +4070,14 @@ namespace gbe for(uint32_t i = 0; i < effectDataNum + 1; i++) tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD, isUniform), ir::TYPE_U32); - GenRegister alignedAddr = sel.selReg(sel.reg(FAMILY_DWORD, isUniform), ir::TYPE_U32); + GenRegister alignedAddr = sel.selReg(sel.reg(addrFamily, isUniform), addrType); sel.push(); if (isUniform) sel.curr.noMask = 1; - sel.AND(alignedAddr, GenRegister::retype(address, GEN_TYPE_UD), GenRegister::immud(~0x3)); + if (addrFamily == FAMILY_DWORD) + sel.AND(alignedAddr, GenRegister::retype(address, GEN_TYPE_UD), GenRegister::immud(~0x3)); + else + sel.AND(alignedAddr, GenRegister::retype(address, GEN_TYPE_UL), GenRegister::immuint64(~0x3ul)); sel.pop(); uint32_t remainedReg = effectDataNum + 1; @@ -4057,7 +4089,10 @@ namespace gbe sel.push(); if (isUniform) sel.curr.noMask = 1; - sel.ADD(alignedAddr, alignedAddr, GenRegister::immud(pos * 4)); + if (addrFamily == FAMILY_DWORD) + sel.ADD(alignedAddr, alignedAddr, GenRegister::immud(pos * 4)); + else + sel.ADD(alignedAddr, alignedAddr, GenRegister::immuint64(pos * 4)); sel.pop(); } shootUntypedReadMsg(sel, insn, t1, alignedAddr, width, addrSpace); @@ -4110,7 +4145,8 @@ namespace gbe { using namespace ir; const ir::LoadInstruction &insn = cast<ir::LoadInstruction>(dag.insn); - GenRegister address = sel.selReg(insn.getAddressRegister(), ir::TYPE_U32); + Register reg = insn.getAddressRegister(); + GenRegister address = sel.selReg(reg, getType(sel.getRegisterFamily(reg))); GBE_ASSERT(insn.getAddressSpace() == MEM_GLOBAL || insn.getAddressSpace() == MEM_CONSTANT || insn.getAddressSpace() == MEM_PRIVATE || @@ -4198,7 +4234,6 @@ namespace gbe for (unsigned k = 0; k < (valueNum+1)/2+1; k++) { msgs.push_back(sel.selReg(sel.reg(ir::FAMILY_DWORD), ir::TYPE_U32)); } - bool valueScalar = sel.isScalarReg(value[0].reg()); sel.push(); /* do first quarter */ sel.curr.execWidth = 8; @@ -4232,8 +4267,6 @@ namespace gbe AddressMode AM = insn.getAddressMode(); vector<GenRegister> btiTemp = sel.getBTITemps(AM); - bool addrScalar = sel.isScalarReg(address.reg()); - if (AM == AM_DynamicBti || AM == AM_StaticBti) { if (AM == AM_DynamicBti) { Register btiReg = insn.getBtiReg(); @@ -4347,6 +4380,7 @@ namespace gbe src[valueID] = sel.selReg(insn.getValue(valueID), ir::TYPE_U64); AddressMode AM = insn.getAddressMode(); + unsigned int addrBytes = typeSize(address.type); vector<GenRegister> btiTemp = sel.getBTITemps(AM); if (AM != AM_Stateless) { GenRegister b; @@ -4356,9 +4390,13 @@ namespace gbe b = GenRegister::immud(insn.getSurfaceIndex()); } write64Legacy(sel, address, src, b, btiTemp); - } else if (addrSpace == MEM_CONSTANT || addrSpace == MEM_LOCAL) { - GenRegister b = GenRegister::immud(addrSpace == MEM_CONSTANT ? BTI_CONSTANT : 0xfe); - write64Legacy(sel, address, src, b, btiTemp); + } else if (addrSpace == MEM_LOCAL) { + GenRegister b = GenRegister::immud(0xfe); + GenRegister addr = address; + if (addrBytes == 8) { + addr = convertU64ToU32(sel, address); + } + write64Legacy(sel, addr, src, b, btiTemp); } else { GBE_ASSERT(sel.hasLongType()); write64Stateless(sel, address, src); @@ -4480,7 +4518,8 @@ namespace gbe { using namespace ir; const ir::StoreInstruction &insn = cast<ir::StoreInstruction>(dag.insn); - GenRegister address = sel.selReg(insn.getAddressRegister(), ir::TYPE_U32); + Register reg = insn.getAddressRegister(); + GenRegister address = sel.selReg(reg, getType(sel.getRegisterFamily(reg))); AddressSpace addrSpace = insn.getAddressSpace(); const Type type = insn.getValueType(); const uint32_t elemSize = getByteScatterGatherSize(sel, type); |