diff options
author | Junyan He <junyan.he@linux.intel.com> | 2015-04-14 16:16:45 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@intel.com> | 2015-04-14 15:51:58 +0800 |
commit | 761755b3109fa2f5a59408fc8947eedc5c61586f (patch) | |
tree | 7d3f61746d32268c59dae7fba8f2dcf951f15248 | |
parent | 054a9e4e55a7d41a3381d83ed61de074027ec4f4 (diff) |
Kill the A0 cache in GenContext.
The a0 value cache in Gencontext can just hold the value
in compiling time, which may be different with the true
offset value in run time when the code generates the
backward jump. So just kill the cache of a0 and we will
use load vector instruction to optimize it lader.
Signed-off-by: Junyan He <junyan.he@linux.intel.com>
Reviewed-by: "Song, Ruiling" <ruiling.song@intel.com>
-rw-r--r-- | backend/src/backend/gen8_context.cpp | 54 | ||||
-rw-r--r-- | backend/src/backend/gen_context.cpp | 51 | ||||
-rw-r--r-- | backend/src/backend/gen_context.hpp | 1 |
3 files changed, 24 insertions, 82 deletions
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp index 920eb3eb..2cdb2482 100644 --- a/backend/src/backend/gen8_context.cpp +++ b/backend/src/backend/gen8_context.cpp @@ -98,8 +98,7 @@ namespace gbe p->curr.execWidth = 4; p->curr.predicate = GEN_PREDICATE_NONE; p->curr.noMask = 1; - GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), - a0[0], new_a0[0] - a0[0]); + GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), new_a0[0], 0); GenRegister dst_ = dst; dst_.type = GEN_TYPE_UB; dst_.hstride = GEN_HORIZONTAL_STRIDE_1; @@ -159,8 +158,7 @@ namespace gbe p->curr.execWidth = 16; p->curr.predicate = GEN_PREDICATE_NONE; p->curr.noMask = 1; - GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), - a0[0], new_a0[0] - a0[0]); + GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), new_a0[0], 0); p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src); ind_src.addr_imm += 16; p->MOV(GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_UB), 0, 16), ind_src); @@ -218,8 +216,7 @@ namespace gbe p->curr.execWidth = 16; p->curr.predicate = GEN_PREDICATE_NONE; p->curr.noMask = 1; - GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), - a0[0], new_a0[0] - a0[0]); + GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), new_a0[0], 0); p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src); if (simd == 16) { ind_src.addr_imm += 16; @@ -862,46 +859,21 @@ namespace gbe } void Gen8Context::setA0Content(uint16_t new_a0[16], uint16_t max_offset, int sz) { - int16_t diff = new_a0[0] - this->a0[0]; if (sz == 0) sz = 16; GBE_ASSERT(sz%4 == 0); GBE_ASSERT(new_a0[0] >= 0 && new_a0[0] < 4096); - bool need_reset = false; - for (int i = 1; i < sz; i++) { - GBE_ASSERT(new_a0[i] >= 0 && new_a0[0] < 4096); - int16_t d = new_a0[i] - this->a0[i]; - if (diff != d) { - need_reset = true; - break; - } - } - GBE_ASSERT(this->a0[0] + diff < 4096 && this->a0[0] + diff >= 0); - if (!need_reset && diff >= -512 && diff + max_offset <= 511) { - return; - } else if (!need_reset && sz == 16) { - p->push(); - p->curr.execWidth = 16; - p->curr.predicate = GEN_PREDICATE_NONE; - p->curr.noMask = 1; - p->ADD(GenRegister::retype(GenRegister::addr8(0), GEN_TYPE_W), - GenRegister::retype(GenRegister::addr8(0), GEN_TYPE_W), GenRegister::immw(diff)); - p->pop(); - } else { - p->push(); - p->curr.execWidth = 1; - p->curr.predicate = GEN_PREDICATE_NONE; - p->curr.noMask = 1; - for (int i = 0; i < sz/4; i++) { - uint64_t addr = (new_a0[i*4 + 3] << 16) | (new_a0[i*4 + 2]); - addr = addr << 32; - addr = addr | (new_a0[i*4 + 1] << 16) | (new_a0[i*4]); - p->MOV(GenRegister::retype(GenRegister::addr1(i*4), GEN_TYPE_UL), GenRegister::immuint64(addr)); - } - p->pop(); + p->push(); + p->curr.execWidth = 1; + p->curr.predicate = GEN_PREDICATE_NONE; + p->curr.noMask = 1; + for (int i = 0; i < sz/4; i++) { + uint64_t addr = (new_a0[i*4 + 3] << 16) | (new_a0[i*4 + 2]); + addr = addr << 32; + addr = addr | (new_a0[i*4 + 1] << 16) | (new_a0[i*4]); + p->MOV(GenRegister::retype(GenRegister::addr1(i*4), GEN_TYPE_UL), GenRegister::immuint64(addr)); } - memcpy(this->a0, new_a0, sizeof(uint16_t)*sz); + p->pop(); } - } diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 094e6b49..684ecaf8 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -51,7 +51,6 @@ namespace gbe this->ra = NULL; this->ifEndifFix = false; this->regSpillTick = 0; - memset(a0, 0, sizeof(a0)); } GenContext::~GenContext(void) { @@ -340,8 +339,7 @@ namespace gbe p->curr.execWidth = 4; p->curr.predicate = GEN_PREDICATE_NONE; p->curr.noMask = 1; - GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), - a0[0], new_a0[0] - a0[0]); + GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), new_a0[0], 0); GenRegister dst_ = dst; dst_.type = GEN_TYPE_UB; dst_.hstride = GEN_HORIZONTAL_STRIDE_1; @@ -385,8 +383,7 @@ namespace gbe p->curr.execWidth = 8; p->curr.predicate = GEN_PREDICATE_NONE; p->curr.noMask = 1; - GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), - a0[0], new_a0[0] - a0[0]); + GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), new_a0[0], 0); p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src); for (int i = 1; i < 4; i++) { ind_src.addr_imm += 8; @@ -430,8 +427,7 @@ namespace gbe p->curr.execWidth = 8; p->curr.predicate = GEN_PREDICATE_NONE; p->curr.noMask = 1; - GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), - a0[0], new_a0[0] - a0[0]); + GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), new_a0[0], 0); p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src); for (int i = 1; i < (simd == 8 ? 2 : 4); i++) { ind_src.addr_imm += 8; @@ -1951,45 +1947,20 @@ namespace gbe } void GenContext::setA0Content(uint16_t new_a0[16], uint16_t max_offset, int sz) { - int16_t diff = new_a0[0] - this->a0[0]; - if (sz == 0) sz = 8; GBE_ASSERT(sz%4 == 0); GBE_ASSERT(new_a0[0] >= 0 && new_a0[0] < 4096); - bool need_reset = false; - for (int i = 1; i < sz; i++) { - GBE_ASSERT(new_a0[i] >= 0 && new_a0[0] < 4096); - int16_t d = new_a0[i] - this->a0[i]; - if (diff != d) { - need_reset = true; - break; - } - } - GBE_ASSERT(a0[0] + diff < 4096 && a0[0] + diff >= 0); - if (!need_reset && diff >= -512 && diff + max_offset <= 511) { - return; - } else if (!need_reset && sz == 8) { - p->push(); - p->curr.execWidth = 8; - p->curr.predicate = GEN_PREDICATE_NONE; - p->curr.noMask = 1; - p->ADD(GenRegister::retype(GenRegister::addr8(0), GEN_TYPE_W), - GenRegister::retype(GenRegister::addr8(0), GEN_TYPE_W), GenRegister::immw(diff)); - p->pop(); - } else { - p->push(); - p->curr.execWidth = 1; - p->curr.predicate = GEN_PREDICATE_NONE; - p->curr.noMask = 1; - for (int i = 0; i < sz/2; i++) { - p->MOV(GenRegister::retype(GenRegister::addr1(i*2), GEN_TYPE_UD), - GenRegister::immud(new_a0[i*2 + 1] << 16 | new_a0[i*2])); - } - p->pop(); + p->push(); + p->curr.execWidth = 1; + p->curr.predicate = GEN_PREDICATE_NONE; + p->curr.noMask = 1; + for (int i = 0; i < sz/2; i++) { + p->MOV(GenRegister::retype(GenRegister::addr1(i*2), GEN_TYPE_UD), + GenRegister::immud(new_a0[i*2 + 1] << 16 | new_a0[i*2])); } - memcpy(this->a0, new_a0, sizeof(uint16_t)*sz); + p->pop(); } BVAR(OCL_OUTPUT_REG_ALLOC, false); diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp index 6ca88db1..560248ad 100644 --- a/backend/src/backend/gen_context.hpp +++ b/backend/src/backend/gen_context.hpp @@ -208,7 +208,6 @@ namespace gbe /*! allocate a new curbe register and insert to curbe pool. */ void allocCurbeReg(ir::Register reg, gbe_curbe_type value, uint32_t subValue = 0); - uint16_t a0[16]; virtual void setA0Content(uint16_t new_a0[16], uint16_t max_offset = 0, int sz = 0); private: |