summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunyan He <junyan.he@linux.intel.com>2015-04-14 16:16:45 +0800
committerZhigang Gong <zhigang.gong@intel.com>2015-04-14 15:51:58 +0800
commit761755b3109fa2f5a59408fc8947eedc5c61586f (patch)
tree7d3f61746d32268c59dae7fba8f2dcf951f15248
parent054a9e4e55a7d41a3381d83ed61de074027ec4f4 (diff)
Kill the A0 cache in GenContext.
The a0 value cache in Gencontext can just hold the value in compiling time, which may be different with the true offset value in run time when the code generates the backward jump. So just kill the cache of a0 and we will use load vector instruction to optimize it lader. Signed-off-by: Junyan He <junyan.he@linux.intel.com> Reviewed-by: "Song, Ruiling" <ruiling.song@intel.com>
-rw-r--r--backend/src/backend/gen8_context.cpp54
-rw-r--r--backend/src/backend/gen_context.cpp51
-rw-r--r--backend/src/backend/gen_context.hpp1
3 files changed, 24 insertions, 82 deletions
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index 920eb3eb..2cdb2482 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -98,8 +98,7 @@ namespace gbe
p->curr.execWidth = 4;
p->curr.predicate = GEN_PREDICATE_NONE;
p->curr.noMask = 1;
- GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
- a0[0], new_a0[0] - a0[0]);
+ GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), new_a0[0], 0);
GenRegister dst_ = dst;
dst_.type = GEN_TYPE_UB;
dst_.hstride = GEN_HORIZONTAL_STRIDE_1;
@@ -159,8 +158,7 @@ namespace gbe
p->curr.execWidth = 16;
p->curr.predicate = GEN_PREDICATE_NONE;
p->curr.noMask = 1;
- GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
- a0[0], new_a0[0] - a0[0]);
+ GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), new_a0[0], 0);
p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src);
ind_src.addr_imm += 16;
p->MOV(GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_UB), 0, 16), ind_src);
@@ -218,8 +216,7 @@ namespace gbe
p->curr.execWidth = 16;
p->curr.predicate = GEN_PREDICATE_NONE;
p->curr.noMask = 1;
- GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
- a0[0], new_a0[0] - a0[0]);
+ GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), new_a0[0], 0);
p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src);
if (simd == 16) {
ind_src.addr_imm += 16;
@@ -862,46 +859,21 @@ namespace gbe
}
void Gen8Context::setA0Content(uint16_t new_a0[16], uint16_t max_offset, int sz) {
- int16_t diff = new_a0[0] - this->a0[0];
if (sz == 0)
sz = 16;
GBE_ASSERT(sz%4 == 0);
GBE_ASSERT(new_a0[0] >= 0 && new_a0[0] < 4096);
- bool need_reset = false;
- for (int i = 1; i < sz; i++) {
- GBE_ASSERT(new_a0[i] >= 0 && new_a0[0] < 4096);
- int16_t d = new_a0[i] - this->a0[i];
- if (diff != d) {
- need_reset = true;
- break;
- }
- }
- GBE_ASSERT(this->a0[0] + diff < 4096 && this->a0[0] + diff >= 0);
- if (!need_reset && diff >= -512 && diff + max_offset <= 511) {
- return;
- } else if (!need_reset && sz == 16) {
- p->push();
- p->curr.execWidth = 16;
- p->curr.predicate = GEN_PREDICATE_NONE;
- p->curr.noMask = 1;
- p->ADD(GenRegister::retype(GenRegister::addr8(0), GEN_TYPE_W),
- GenRegister::retype(GenRegister::addr8(0), GEN_TYPE_W), GenRegister::immw(diff));
- p->pop();
- } else {
- p->push();
- p->curr.execWidth = 1;
- p->curr.predicate = GEN_PREDICATE_NONE;
- p->curr.noMask = 1;
- for (int i = 0; i < sz/4; i++) {
- uint64_t addr = (new_a0[i*4 + 3] << 16) | (new_a0[i*4 + 2]);
- addr = addr << 32;
- addr = addr | (new_a0[i*4 + 1] << 16) | (new_a0[i*4]);
- p->MOV(GenRegister::retype(GenRegister::addr1(i*4), GEN_TYPE_UL), GenRegister::immuint64(addr));
- }
- p->pop();
+ p->push();
+ p->curr.execWidth = 1;
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ for (int i = 0; i < sz/4; i++) {
+ uint64_t addr = (new_a0[i*4 + 3] << 16) | (new_a0[i*4 + 2]);
+ addr = addr << 32;
+ addr = addr | (new_a0[i*4 + 1] << 16) | (new_a0[i*4]);
+ p->MOV(GenRegister::retype(GenRegister::addr1(i*4), GEN_TYPE_UL), GenRegister::immuint64(addr));
}
- memcpy(this->a0, new_a0, sizeof(uint16_t)*sz);
+ p->pop();
}
-
}
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 094e6b49..684ecaf8 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -51,7 +51,6 @@ namespace gbe
this->ra = NULL;
this->ifEndifFix = false;
this->regSpillTick = 0;
- memset(a0, 0, sizeof(a0));
}
GenContext::~GenContext(void) {
@@ -340,8 +339,7 @@ namespace gbe
p->curr.execWidth = 4;
p->curr.predicate = GEN_PREDICATE_NONE;
p->curr.noMask = 1;
- GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
- a0[0], new_a0[0] - a0[0]);
+ GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), new_a0[0], 0);
GenRegister dst_ = dst;
dst_.type = GEN_TYPE_UB;
dst_.hstride = GEN_HORIZONTAL_STRIDE_1;
@@ -385,8 +383,7 @@ namespace gbe
p->curr.execWidth = 8;
p->curr.predicate = GEN_PREDICATE_NONE;
p->curr.noMask = 1;
- GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
- a0[0], new_a0[0] - a0[0]);
+ GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), new_a0[0], 0);
p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src);
for (int i = 1; i < 4; i++) {
ind_src.addr_imm += 8;
@@ -430,8 +427,7 @@ namespace gbe
p->curr.execWidth = 8;
p->curr.predicate = GEN_PREDICATE_NONE;
p->curr.noMask = 1;
- GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
- a0[0], new_a0[0] - a0[0]);
+ GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), new_a0[0], 0);
p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src);
for (int i = 1; i < (simd == 8 ? 2 : 4); i++) {
ind_src.addr_imm += 8;
@@ -1951,45 +1947,20 @@ namespace gbe
}
void GenContext::setA0Content(uint16_t new_a0[16], uint16_t max_offset, int sz) {
- int16_t diff = new_a0[0] - this->a0[0];
-
if (sz == 0)
sz = 8;
GBE_ASSERT(sz%4 == 0);
GBE_ASSERT(new_a0[0] >= 0 && new_a0[0] < 4096);
- bool need_reset = false;
- for (int i = 1; i < sz; i++) {
- GBE_ASSERT(new_a0[i] >= 0 && new_a0[0] < 4096);
- int16_t d = new_a0[i] - this->a0[i];
- if (diff != d) {
- need_reset = true;
- break;
- }
- }
- GBE_ASSERT(a0[0] + diff < 4096 && a0[0] + diff >= 0);
- if (!need_reset && diff >= -512 && diff + max_offset <= 511) {
- return;
- } else if (!need_reset && sz == 8) {
- p->push();
- p->curr.execWidth = 8;
- p->curr.predicate = GEN_PREDICATE_NONE;
- p->curr.noMask = 1;
- p->ADD(GenRegister::retype(GenRegister::addr8(0), GEN_TYPE_W),
- GenRegister::retype(GenRegister::addr8(0), GEN_TYPE_W), GenRegister::immw(diff));
- p->pop();
- } else {
- p->push();
- p->curr.execWidth = 1;
- p->curr.predicate = GEN_PREDICATE_NONE;
- p->curr.noMask = 1;
- for (int i = 0; i < sz/2; i++) {
- p->MOV(GenRegister::retype(GenRegister::addr1(i*2), GEN_TYPE_UD),
- GenRegister::immud(new_a0[i*2 + 1] << 16 | new_a0[i*2]));
- }
- p->pop();
+ p->push();
+ p->curr.execWidth = 1;
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ for (int i = 0; i < sz/2; i++) {
+ p->MOV(GenRegister::retype(GenRegister::addr1(i*2), GEN_TYPE_UD),
+ GenRegister::immud(new_a0[i*2 + 1] << 16 | new_a0[i*2]));
}
- memcpy(this->a0, new_a0, sizeof(uint16_t)*sz);
+ p->pop();
}
BVAR(OCL_OUTPUT_REG_ALLOC, false);
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 6ca88db1..560248ad 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -208,7 +208,6 @@ namespace gbe
/*! allocate a new curbe register and insert to curbe pool. */
void allocCurbeReg(ir::Register reg, gbe_curbe_type value, uint32_t subValue = 0);
- uint16_t a0[16];
virtual void setA0Content(uint16_t new_a0[16], uint16_t max_offset = 0, int sz = 0);
private: