diff options
author | Junyan He <junyan.he@linux.intel.com> | 2015-12-01 16:10:37 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2015-12-14 15:11:50 +0800 |
commit | 52fda87c89d7631e9283ce94ae1ec1b6b3ad5621 (patch) | |
tree | 1d5be551c1f245e5b7105f7031ef4cadcc11d669 | |
parent | eb22b9895c97504c78c5338a6a0354b130cb6d81 (diff) |
Runtime: Add the threadid calculation for curbe.
Signed-off-by: Junyan He <junyan.he@linux.intel.com>
Reviewed-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r-- | src/cl_command_queue_gen7.c | 12 |
1 files changed, 11 insertions, 1 deletions
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c index 4d476de5..44db7ed1 100644 --- a/src/cl_command_queue_gen7.c +++ b/src/cl_command_queue_gen7.c @@ -46,8 +46,9 @@ cl_set_varying_payload(const cl_kernel ker, { uint32_t *ids[3] = {NULL,NULL,NULL}; uint16_t *block_ips = NULL; + uint32_t *thread_ids = NULL; size_t i, j, k, curr = 0; - int32_t id_offset[3], ip_offset; + int32_t id_offset[3], ip_offset, tid_offset; cl_int err = CL_SUCCESS; int32_t dw_ip_offset = -1; @@ -55,6 +56,7 @@ cl_set_varying_payload(const cl_kernel ker, id_offset[1] = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_Y, 0); id_offset[2] = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_Z, 0); ip_offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_BLOCK_IP, 0); + tid_offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_THREAD_ID, 0); if (ip_offset < 0) dw_ip_offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_DW_BLOCK_IP, 0); assert(ip_offset < 0 || dw_ip_offset < 0); @@ -67,6 +69,8 @@ cl_set_varying_payload(const cl_kernel ker, if (id_offset[2] >= 0) TRY_ALLOC(ids[2], (uint32_t*) alloca(sizeof(uint32_t)*thread_n*simd_sz)); TRY_ALLOC(block_ips, (uint16_t*) alloca(sizeof(uint16_t)*thread_n*simd_sz)); + if (tid_offset >= 0) + TRY_ALLOC(thread_ids, (uint32_t*) alloca(sizeof(uint32_t)*thread_n)); /* 0xffff means that the lane is inactivated */ memset(block_ips, 0xff, sizeof(int16_t)*thread_n*simd_sz); @@ -82,6 +86,8 @@ cl_set_varying_payload(const cl_kernel ker, if (id_offset[2] >= 0) ids[2][curr] = k; block_ips[curr] = 0; + if (thread_ids) + thread_ids[curr/simd_sz] = (k*local_wk_sz[2] + j*local_wk_sz[1] + i)/simd_sz; } /* Copy them to the curbe buffer */ @@ -92,6 +98,10 @@ cl_set_varying_payload(const cl_kernel ker, uint32_t *ids2 = (uint32_t *) (data + id_offset[2]); uint16_t *ips = (uint16_t *) (data + ip_offset); uint32_t *dw_ips = (uint32_t *) (data + dw_ip_offset); + + if (thread_ids) + *(uint32_t *)(data + tid_offset) = thread_ids[i]; + for (j = 0; j < simd_sz; ++j, ++curr) { if (id_offset[0] >= 0) ids0[j] = ids[0][curr]; |