summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRuiling Song <ruiling.song@intel.com>2015-12-04 16:30:30 +0800
committerYang Rong <rong.r.yang@intel.com>2015-12-14 15:06:12 +0800
commitcf5857f9f706800e421637b45fa3e0e39957384d (patch)
tree91dc314c036e6307ffdd80b7b60e4b1fdde2a9a1
parent6e6c2ca2ebccfb7dd8f598860dd80599c84ccbbf (diff)
OCL20/GBE: Fix 64bit pointer issue in Load store instruction selection.
previously we do not handle 64bit pointer correctly. Signed-off-by: Ruiling Song <ruiling.song@intel.com> Reviewed-by: Luo, Xionghu <xionghu.luo@intel.com>
-rw-r--r--backend/src/backend/gen_insn_selection.cpp79
1 files changed, 59 insertions, 20 deletions
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 63e36ef6..f3008e0d 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -3720,7 +3720,13 @@ namespace gbe
if (sel.isScalarReg(addr.reg())) {
sel.curr.noMask = 1;
}
- sel.SHR(addrDW, GenRegister::retype(addr, GEN_TYPE_UD), GenRegister::immud(2));
+ if (sel.getRegisterFamily(addr.reg())) {
+ // as we still use offset instead of absolut graphics address,
+ // it is safe to convert from u64 to u32
+ GenRegister t = convertU64ToU32(sel, addr);
+ sel.SHR(addrDW, t, GenRegister::immud(2));
+ } else
+ sel.SHR(addrDW, GenRegister::retype(addr, GEN_TYPE_UD), GenRegister::immud(2));
sel.pop();
sel.DWORD_GATHER(dst, addrDW, BTI_CONSTANT);
@@ -3797,6 +3803,7 @@ namespace gbe
dst[dstID] = sel.selReg(insn.getValue(dstID), ir::TYPE_U64);
bool isUniform = sel.isScalarReg(insn.getValue(0));
+ unsigned addrBytes = typeSize(addr.type);
AddressMode AM = insn.getAddressMode();
vector<GenRegister> btiTemp = sel.getBTITemps(AM);
sel.push();
@@ -3814,7 +3821,10 @@ namespace gbe
read64Legacy(sel, addr, dst, b, btiTemp);
} else if (addrSpace == MEM_LOCAL || addrSpace == MEM_CONSTANT) {
GenRegister b = GenRegister::immud(addrSpace == MEM_LOCAL? 0xfe : BTI_CONSTANT);
- read64Legacy(sel, addr, dst, b, btiTemp);
+ GenRegister addrDW = addr;
+ if (addrBytes == 8)
+ addrDW = convertU64ToU32(sel, addr);
+ read64Legacy(sel, addrDW, dst, b, btiTemp);
} else {
read64Stateless(sel, addr, dst);
}
@@ -3830,9 +3840,12 @@ namespace gbe
ir::AddressSpace addrSpace) const
{
using namespace ir;
- Register tmpReg = sel.reg(FAMILY_DWORD);
- GenRegister tmpAddr = sel.selReg(sel.reg(FAMILY_DWORD, isUniform), ir::TYPE_U32);
+ RegisterFamily addrFamily = sel.getRegisterFamily(address.reg());
+ Type addrType = getType(addrFamily);
+ Register tmpReg = sel.reg(FAMILY_DWORD, isUniform);
+ GenRegister tmpAddr = sel.selReg(sel.reg(addrFamily, isUniform), addrType);
GenRegister tmpData = sel.selReg(tmpReg, ir::TYPE_U32);
+ GenRegister addrOffset = sel.selReg(sel.reg(FAMILY_DWORD, isUniform), ir::TYPE_U32);
// Get dword aligned addr
sel.push();
@@ -3840,7 +3853,11 @@ namespace gbe
sel.curr.noMask = 1;
sel.curr.execWidth = 1;
}
- sel.AND(tmpAddr, GenRegister::retype(address,GEN_TYPE_UD), GenRegister::immud(0xfffffffc));
+ if (addrFamily == FAMILY_DWORD)
+ sel.AND(tmpAddr, GenRegister::retype(address,GEN_TYPE_UD), GenRegister::immud(0xfffffffc));
+ else
+ sel.AND(tmpAddr, GenRegister::retype(address,GEN_TYPE_UL), GenRegister::immuint64(0xfffffffffffffffc));
+
sel.pop();
sel.push();
vector<GenRegister> tmp;
@@ -3852,9 +3869,13 @@ namespace gbe
if (isUniform)
sel.curr.execWidth = 1;
// Get the remaining offset from aligned addr
- sel.AND(tmpAddr, GenRegister::retype(address,GEN_TYPE_UD), GenRegister::immud(0x3));
- sel.SHL(tmpAddr, tmpAddr, GenRegister::immud(0x3));
- sel.SHR(tmpData, tmpData, tmpAddr);
+ if (addrFamily == FAMILY_QWORD) {
+ sel.AND(addrOffset, sel.unpacked_ud(address.reg()), GenRegister::immud(0x3));
+ } else {
+ sel.AND(addrOffset, GenRegister::retype(address,GEN_TYPE_UD), GenRegister::immud(0x3));
+ }
+ sel.SHL(addrOffset, addrOffset, GenRegister::immud(0x3));
+ sel.SHR(tmpData, tmpData, addrOffset);
if (elemSize == GEN_BYTE_SCATTER_WORD)
sel.MOV(GenRegister::retype(dst, GEN_TYPE_UW), GenRegister::unpacked_uw(tmpReg, isUniform, sel.isLongReg(tmpReg)));
@@ -3908,6 +3929,7 @@ namespace gbe
using namespace ir;
GBE_ASSERT(effectData.size() == effectDataNum);
GBE_ASSERT(tmp.size() == effectDataNum + 1);
+ RegisterFamily addrFamily = sel.getRegisterFamily(address.reg());
sel.push();
Register alignedFlag = sel.reg(FAMILY_BOOL, isUniform);
GenRegister shiftL = sel.selReg(sel.reg(FAMILY_DWORD, isUniform), ir::TYPE_U32);
@@ -3916,7 +3938,12 @@ namespace gbe
sel.push();
if (isUniform)
sel.curr.noMask = 1;
- sel.AND(shiftL, GenRegister::retype(address, GEN_TYPE_UD), GenRegister::immud(0x3));
+ if (addrFamily == FAMILY_QWORD) {
+ GenRegister t = convertU64ToU32(sel, address);
+ sel.AND(shiftL, t, GenRegister::immud(0x3));
+ } else {
+ sel.AND(shiftL, GenRegister::retype(address,GEN_TYPE_UD), GenRegister::immud(0x3));
+ }
sel.SHL(shiftL, shiftL, GenRegister::immud(0x3));
sel.ADD(shiftH, GenRegister::negate(shiftL), GenRegister::immud(32));
sel.curr.physicalFlag = 0;
@@ -4026,6 +4053,8 @@ namespace gbe
1 : sel.ctx.getSimdWidth();
const bool isUniform = simdWidth == 1;
RegisterFamily family = getFamily(insn.getValueType());
+ RegisterFamily addrFamily = sel.getRegisterFamily(address.reg());
+ Type addrType = getType(addrFamily);
if(valueNum > 1) {
GBE_ASSERT(!isUniform && "vector load should not be uniform. Something went wrong.");
@@ -4041,11 +4070,14 @@ namespace gbe
for(uint32_t i = 0; i < effectDataNum + 1; i++)
tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD, isUniform), ir::TYPE_U32);
- GenRegister alignedAddr = sel.selReg(sel.reg(FAMILY_DWORD, isUniform), ir::TYPE_U32);
+ GenRegister alignedAddr = sel.selReg(sel.reg(addrFamily, isUniform), addrType);
sel.push();
if (isUniform)
sel.curr.noMask = 1;
- sel.AND(alignedAddr, GenRegister::retype(address, GEN_TYPE_UD), GenRegister::immud(~0x3));
+ if (addrFamily == FAMILY_DWORD)
+ sel.AND(alignedAddr, GenRegister::retype(address, GEN_TYPE_UD), GenRegister::immud(~0x3));
+ else
+ sel.AND(alignedAddr, GenRegister::retype(address, GEN_TYPE_UL), GenRegister::immuint64(~0x3ul));
sel.pop();
uint32_t remainedReg = effectDataNum + 1;
@@ -4057,7 +4089,10 @@ namespace gbe
sel.push();
if (isUniform)
sel.curr.noMask = 1;
- sel.ADD(alignedAddr, alignedAddr, GenRegister::immud(pos * 4));
+ if (addrFamily == FAMILY_DWORD)
+ sel.ADD(alignedAddr, alignedAddr, GenRegister::immud(pos * 4));
+ else
+ sel.ADD(alignedAddr, alignedAddr, GenRegister::immuint64(pos * 4));
sel.pop();
}
shootUntypedReadMsg(sel, insn, t1, alignedAddr, width, addrSpace);
@@ -4110,7 +4145,8 @@ namespace gbe
{
using namespace ir;
const ir::LoadInstruction &insn = cast<ir::LoadInstruction>(dag.insn);
- GenRegister address = sel.selReg(insn.getAddressRegister(), ir::TYPE_U32);
+ Register reg = insn.getAddressRegister();
+ GenRegister address = sel.selReg(reg, getType(sel.getRegisterFamily(reg)));
GBE_ASSERT(insn.getAddressSpace() == MEM_GLOBAL ||
insn.getAddressSpace() == MEM_CONSTANT ||
insn.getAddressSpace() == MEM_PRIVATE ||
@@ -4198,7 +4234,6 @@ namespace gbe
for (unsigned k = 0; k < (valueNum+1)/2+1; k++) {
msgs.push_back(sel.selReg(sel.reg(ir::FAMILY_DWORD), ir::TYPE_U32));
}
- bool valueScalar = sel.isScalarReg(value[0].reg());
sel.push();
/* do first quarter */
sel.curr.execWidth = 8;
@@ -4232,8 +4267,6 @@ namespace gbe
AddressMode AM = insn.getAddressMode();
vector<GenRegister> btiTemp = sel.getBTITemps(AM);
- bool addrScalar = sel.isScalarReg(address.reg());
-
if (AM == AM_DynamicBti || AM == AM_StaticBti) {
if (AM == AM_DynamicBti) {
Register btiReg = insn.getBtiReg();
@@ -4347,6 +4380,7 @@ namespace gbe
src[valueID] = sel.selReg(insn.getValue(valueID), ir::TYPE_U64);
AddressMode AM = insn.getAddressMode();
+ unsigned int addrBytes = typeSize(address.type);
vector<GenRegister> btiTemp = sel.getBTITemps(AM);
if (AM != AM_Stateless) {
GenRegister b;
@@ -4356,9 +4390,13 @@ namespace gbe
b = GenRegister::immud(insn.getSurfaceIndex());
}
write64Legacy(sel, address, src, b, btiTemp);
- } else if (addrSpace == MEM_CONSTANT || addrSpace == MEM_LOCAL) {
- GenRegister b = GenRegister::immud(addrSpace == MEM_CONSTANT ? BTI_CONSTANT : 0xfe);
- write64Legacy(sel, address, src, b, btiTemp);
+ } else if (addrSpace == MEM_LOCAL) {
+ GenRegister b = GenRegister::immud(0xfe);
+ GenRegister addr = address;
+ if (addrBytes == 8) {
+ addr = convertU64ToU32(sel, address);
+ }
+ write64Legacy(sel, addr, src, b, btiTemp);
} else {
GBE_ASSERT(sel.hasLongType());
write64Stateless(sel, address, src);
@@ -4480,7 +4518,8 @@ namespace gbe
{
using namespace ir;
const ir::StoreInstruction &insn = cast<ir::StoreInstruction>(dag.insn);
- GenRegister address = sel.selReg(insn.getAddressRegister(), ir::TYPE_U32);
+ Register reg = insn.getAddressRegister();
+ GenRegister address = sel.selReg(reg, getType(sel.getRegisterFamily(reg)));
AddressSpace addrSpace = insn.getAddressSpace();
const Type type = insn.getValueType();
const uint32_t elemSize = getByteScatterGatherSize(sel, type);