diff options
Diffstat (limited to 'src/cl_command_queue_gen7.c')
-rw-r--r-- | src/cl_command_queue_gen7.c | 7 |
1 files changed, 7 insertions, 0 deletions
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c index 2223f4f4..978650a6 100644 --- a/src/cl_command_queue_gen7.c +++ b/src/cl_command_queue_gen7.c @@ -244,6 +244,13 @@ cl_bind_stack(cl_gpgpu gpgpu, cl_kernel ker) assert(offset >= 0); stack_sz *= interp_kernel_get_simd_width(ker->opaque); stack_sz *= device->max_compute_unit; + /* Because HSW calc stack offset per thread is relative with half slice, when + thread schedule in half slice is not balance, would out of bound. Because + the max half slice is 4 in GT4, multiply stack size with 4 for safe. + */ + if(cl_driver_get_ver(ctx->drv) == 75) + stack_sz *= 4; + cl_gpgpu_set_stack(gpgpu, offset, stack_sz, cl_gpgpu_get_cache_ctrl()); } |