summaryrefslogtreecommitdiff
path: root/src/cl_command_queue_gen7.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/cl_command_queue_gen7.c')
-rw-r--r--src/cl_command_queue_gen7.c7
1 files changed, 7 insertions, 0 deletions
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 2223f4f4..978650a6 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -244,6 +244,13 @@ cl_bind_stack(cl_gpgpu gpgpu, cl_kernel ker)
assert(offset >= 0);
stack_sz *= interp_kernel_get_simd_width(ker->opaque);
stack_sz *= device->max_compute_unit;
+ /* Because HSW calc stack offset per thread is relative with half slice, when
+ thread schedule in half slice is not balance, would out of bound. Because
+ the max half slice is 4 in GT4, multiply stack size with 4 for safe.
+ */
+ if(cl_driver_get_ver(ctx->drv) == 75)
+ stack_sz *= 4;
+
cl_gpgpu_set_stack(gpgpu, offset, stack_sz, cl_gpgpu_get_cache_ctrl());
}