summaryrefslogtreecommitdiff
path: root/backend
diff options
context:
space:
mode:
authorJunyan He <junyan.he@intel.com>2016-04-04 00:22:53 +0800
committerJunyan He <junyan.he@intel.com>2016-04-04 00:22:53 +0800
commitbdf98d61c66c98c65f8e17001fd08d11eef7159b (patch)
tree234499d55409c5965f85b1b990836b1f50735043 /backend
parent7100f990bf94eb5628051a6f2d858c1b5f6cd1bd (diff)
ndrange
Diffstat (limited to 'backend')
-rw-r--r--backend/src/driver/cl_gen_kernel.cpp32
1 files changed, 32 insertions, 0 deletions
diff --git a/backend/src/driver/cl_gen_kernel.cpp b/backend/src/driver/cl_gen_kernel.cpp
index 6bd6748f..5b70b197 100644
--- a/backend/src/driver/cl_gen_kernel.cpp
+++ b/backend/src/driver/cl_gen_kernel.cpp
@@ -443,6 +443,34 @@ static cl_int genGPUBindSamplers(GenGPUState& gpuState, cl_kernel kernel, Kernel
return CL_SUCCESS;
}
+static void genGPUSetStack(GenGPUState& gpuState, Kernel* ker,
+ cl_device_id device ,GenGPUDevice* gpuDev)
+{
+ const int32_t per_lane_stack_sz = ker->getStackSize();
+ const int32_t offset = ker->getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER);
+ int32_t stack_sz = per_lane_stack_sz;
+
+ /* No stack required for this kernel */
+ if (per_lane_stack_sz == 0)
+ return;
+
+ /* The stack size is given for *each* SIMD lane. So, we accordingly compute
+ * the size we need for the complete machine
+ */
+ assert(offset >= 0);
+ stack_sz *= ker->getSIMDWidth();
+ stack_sz *= device->max_compute_unit * gpuDev->max_thread_per_unit;
+
+ /* Because HSW calc stack offset per thread is relative with half slice, when
+ thread schedule in half slice is not balance, would out of bound. Because
+ the max half slice is 4 in GT4, multiply stack size with 4 for safe.
+ */
+ if(gpuDev->gen_ver == 75)
+ stack_sz *= 4;
+
+ gpuState.setStack(offset, stack_sz, BTI_PRIVATE);
+}
+
extern "C"
cl_int genEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel kernel, const uint32_t work_dim,
const size_t *global_wk_off, const size_t *global_wk_sz,
@@ -528,5 +556,9 @@ cl_int genEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel kernel, const u
if (err != CL_SUCCESS)
return err;
+ /* Bind a stack if needed */
+ genGPUSetStack(*ndRange->gpuState, ker, queue->device, gpuDev);
+
+
}