summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGuo Yejun <yejun.guo@intel.com>2015-04-21 11:41:07 +0800
committerYang Rong <rong.r.yang@intel.com>2015-04-24 10:31:33 +0800
commit325e1e8e3aa2495081c77c4a129fc5743816e52d (patch)
tree85fea1f03f8d0065e81c6ba9e757c34d647adf66
parent5c6220a21dc64c2165c81ae499cf7236093da1b2 (diff)
add simd level function __gen_ocl_get_simd_id
uint __gen_ocl_get_simd_id(); return value ranges from 0 to simdsize - 1 V2: use function sel.selReg to refine code V3: correct the uniform condition in liveness.cpp Signed-off-by: Guo Yejun <yejun.guo@intel.com> Reviewed-by: "Yang, Rong R" <rong.r.yang@intel.com>
-rw-r--r--backend/src/backend/gen_context.cpp9
-rw-r--r--backend/src/backend/gen_insn_selection.cpp6
-rw-r--r--backend/src/backend/program.h1
-rw-r--r--backend/src/ir/instruction.cpp1
-rw-r--r--backend/src/ir/instruction.hpp2
-rw-r--r--backend/src/ir/instruction.hxx1
-rw-r--r--backend/src/ir/liveness.cpp5
-rw-r--r--backend/src/ir/profile.cpp2
-rw-r--r--backend/src/ir/profile.hpp5
-rw-r--r--backend/src/libocl/tmpl/ocl_simd.tmpl.h1
-rw-r--r--backend/src/llvm/llvm_gen_backend.cpp7
-rw-r--r--backend/src/llvm/llvm_gen_ocl_function.hxx1
-rw-r--r--src/cl_command_queue_gen7.c8
13 files changed, 46 insertions, 3 deletions
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 684ecaf8..62fd5962 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2013,9 +2013,14 @@ namespace gbe
if (curbeRegs.find(reg) != curbeRegs.end()) continue; \
allocCurbeReg(reg, GBE_CURBE_##PATCH); \
} else
-
+
+ bool needLaneID = false;
fn.foreachInstruction([&](ir::Instruction &insn) {
const uint32_t srcNum = insn.getSrcNum();
+ if (insn.getOpcode() == ir::OP_SIMD_ID) {
+ GBE_ASSERT(srcNum == 0);
+ needLaneID = true;
+ }
for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
const ir::Register reg = insn.getSrc(srcID);
if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) {
@@ -2054,6 +2059,8 @@ namespace gbe
});
#undef INSERT_REG
+ if (needLaneID)
+ allocCurbeReg(laneid, GBE_CURBE_LANE_ID);
// After this point the vector is immutable. Sorting it will make
// research faster
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 026a858e..19a3c24b 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -2137,6 +2137,12 @@ namespace gbe
sel.MOV(dst, src);
}
break;
+ case ir::OP_SIMD_ID:
+ {
+ const GenRegister selLaneID = sel.selReg(ir::ocl::laneid, ir::TYPE_U32);
+ sel.MOV(dst, selLaneID);
+ }
+ break;
default: NOT_SUPPORTED;
}
sel.pop();
diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h
index 554fb16d..8c171f57 100644
--- a/backend/src/backend/program.h
+++ b/backend/src/backend/program.h
@@ -101,6 +101,7 @@ enum gbe_curbe_type {
GBE_CURBE_THREAD_NUM,
GBE_CURBE_ZERO,
GBE_CURBE_ONE,
+ GBE_CURBE_LANE_ID,
GBE_CURBE_SLM_OFFSET,
};
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 86148bca..7723b90c 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -1614,6 +1614,7 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, getImageIndex(void), getImageIndex
}
DECL_EMIT_FUNCTION(SIMD_SIZE)
+ DECL_EMIT_FUNCTION(SIMD_ID)
#undef DECL_EMIT_FUNCTION
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index c603d9e2..436bfd28 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -572,6 +572,8 @@ namespace ir {
Instruction ALU0(Opcode opcode, Type type, Register dst);
/*! simd_size.type dst */
Instruction SIMD_SIZE(Type type, Register dst);
+ /*! simd_id.type dst */
+ Instruction SIMD_ID(Type type, Register dst);
/*! alu1.type dst src */
Instruction ALU1(Opcode opcode, Type type, Register dst, Register src);
/*! mov.type dst src */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index f86cfbb5..3f08a923 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -26,6 +26,7 @@
* \author Benjamin Segovia <benjamin.segovia@intel.com>
*/
DECL_INSN(SIMD_SIZE, NullaryInstruction)
+DECL_INSN(SIMD_ID, NullaryInstruction)
DECL_INSN(MOV, UnaryInstruction)
DECL_INSN(COS, UnaryInstruction)
DECL_INSN(SIN, UnaryInstruction)
diff --git a/backend/src/ir/liveness.cpp b/backend/src/ir/liveness.cpp
index 2b1ffdbb..9fa7ac39 100644
--- a/backend/src/ir/liveness.cpp
+++ b/backend/src/ir/liveness.cpp
@@ -66,6 +66,11 @@ namespace ir {
const uint32_t srcNum = insn.getSrcNum();
const uint32_t dstNum = insn.getDstNum();
bool uniform = true;
+
+ //do not change dst uniform for simd id
+ if (insn.getOpcode() == ir::OP_SIMD_ID)
+ uniform = false;
+
for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
const Register reg = insn.getSrc(srcID);
if (!fn.isUniformRegister(reg))
diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp
index ec7ab94f..2f6539a0 100644
--- a/backend/src/ir/profile.cpp
+++ b/backend/src/ir/profile.cpp
@@ -44,6 +44,7 @@ namespace ir {
"retVal", "slm_offset",
"printf_buffer_pointer", "printf_index_buffer_pointer",
"dwblockip",
+ "lane_id",
"invalid"
};
@@ -88,6 +89,7 @@ namespace ir {
DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1);
DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1);
DECL_NEW_REG(FAMILY_DWORD, dwblockip, 0);
+ DECL_NEW_REG(FAMILY_DWORD, laneid, 0);
DECL_NEW_REG(FAMILY_DWORD, invalid, 1);
}
#undef DECL_NEW_REG
diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp
index 8f69320d..4de6fe0c 100644
--- a/backend/src/ir/profile.hpp
+++ b/backend/src/ir/profile.hpp
@@ -72,8 +72,9 @@ namespace ir {
static const Register printfbptr = Register(28); // printf buffer address .
static const Register printfiptr = Register(29); // printf index buffer address.
static const Register dwblockip = Register(30); // blockip
- static const Register invalid = Register(31); // used for valid comparation.
- static const uint32_t regNum = 32; // number of special registers
+ static const Register laneid = Register(31); // lane id.
+ static const Register invalid = Register(32); // used for valid comparation.
+ static const uint32_t regNum = 33; // number of special registers
extern const char *specialRegMean[]; // special register name.
} /* namespace ocl */
diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
index b9929022..620e329a 100644
--- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
@@ -25,3 +25,4 @@
/////////////////////////////////////////////////////////////////////////////
uint __gen_ocl_get_simd_size(void);
+uint __gen_ocl_get_simd_id(void);
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index ac67add5..f46bc793 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2806,6 +2806,7 @@ namespace gbe
case GEN_OCL_SIMD_SIZE:
case GEN_OCL_READ_TM:
case GEN_OCL_REGION:
+ case GEN_OCL_SIMD_ID:
this->newRegister(&I);
break;
case GEN_OCL_PRINTF:
@@ -3461,6 +3462,12 @@ namespace gbe
ctx.ALU0(ir::OP_SIMD_SIZE, getType(ctx, I.getType()), dst);
break;
}
+ case GEN_OCL_SIMD_ID:
+ {
+ const ir::Register dst = this->getRegister(&I);
+ ctx.ALU0(ir::OP_SIMD_ID, getType(ctx, I.getType()), dst);
+ break;
+ }
default: break;
}
}
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 2b151f28..e2bffdea 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -155,6 +155,7 @@ DECL_LLVM_GEN_FUNCTION(CONV_F32_TO_F16, __gen_ocl_f32to16)
DECL_LLVM_GEN_FUNCTION(SIMD_ANY, __gen_ocl_simd_any)
DECL_LLVM_GEN_FUNCTION(SIMD_ALL, __gen_ocl_simd_all)
DECL_LLVM_GEN_FUNCTION(SIMD_SIZE, __gen_ocl_get_simd_size)
+DECL_LLVM_GEN_FUNCTION(SIMD_ID, __gen_ocl_get_simd_id)
DECL_LLVM_GEN_FUNCTION(READ_TM, __gen_ocl_read_tm)
DECL_LLVM_GEN_FUNCTION(REGION, __gen_ocl_region)
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 4adbd2b2..e27a2112 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -210,6 +210,14 @@ cl_curbe_fill(cl_kernel ker,
UPLOAD(GBE_CURBE_WORK_DIM, work_dim);
#undef UPLOAD
+ /* __gen_ocl_get_simd_id needs it */
+ if ((offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LANE_ID, 0)) >= 0) {
+ const uint32_t simd_sz = interp_kernel_get_simd_width(ker->opaque);
+ uint32_t *laneid = (uint32_t *) (ker->curbe + offset);
+ int32_t i;
+ for (i = 0; i < (int32_t) simd_sz; ++i) laneid[i] = i;
+ }
+
/* Write identity for the stack pointer. This is required by the stack pointer
* computation in the kernel
*/