summaryrefslogtreecommitdiff
path: root/src/cl_command_queue_gen7.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/cl_command_queue_gen7.c')
-rw-r--r--src/cl_command_queue_gen7.c16
1 files changed, 12 insertions, 4 deletions
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 253c4f2e..4adbd2b2 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -49,23 +49,27 @@ cl_set_varying_payload(const cl_kernel ker,
size_t i, j, k, curr = 0;
int32_t id_offset[3], ip_offset;
cl_int err = CL_SUCCESS;
+ int32_t dw_ip_offset = -1;
id_offset[0] = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_X, 0);
id_offset[1] = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_Y, 0);
id_offset[2] = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_Z, 0);
ip_offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_BLOCK_IP, 0);
+ if (ip_offset < 0)
+ dw_ip_offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_DW_BLOCK_IP, 0);
+ assert(ip_offset < 0 || dw_ip_offset < 0);
assert(id_offset[0] >= 0 &&
id_offset[1] >= 0 &&
id_offset[2] >= 0 &&
- ip_offset >= 0);
+ (ip_offset >= 0 || dw_ip_offset >= 0));
TRY_ALLOC(ids[0], (uint32_t*) alloca(sizeof(uint32_t)*thread_n*simd_sz));
TRY_ALLOC(ids[1], (uint32_t*) alloca(sizeof(uint32_t)*thread_n*simd_sz));
TRY_ALLOC(ids[2], (uint32_t*) alloca(sizeof(uint32_t)*thread_n*simd_sz));
TRY_ALLOC(block_ips, (uint16_t*) alloca(sizeof(uint16_t)*thread_n*simd_sz));
-
/* 0xffff means that the lane is inactivated */
- memset(block_ips, 0xff, sizeof(uint16_t)*thread_n*simd_sz);
+ memset(block_ips, 0xff, sizeof(int16_t)*thread_n*simd_sz);
+
/* Compute the IDs and the block IPs */
for (k = 0; k < local_wk_sz[2]; ++k)
@@ -84,11 +88,15 @@ cl_set_varying_payload(const cl_kernel ker,
uint32_t *ids1 = (uint32_t *) (data + id_offset[1]);
uint32_t *ids2 = (uint32_t *) (data + id_offset[2]);
uint16_t *ips = (uint16_t *) (data + ip_offset);
+ uint32_t *dw_ips = (uint32_t *) (data + dw_ip_offset);
for (j = 0; j < simd_sz; ++j, ++curr) {
ids0[j] = ids[0][curr];
ids1[j] = ids[1][curr];
ids2[j] = ids[2][curr];
- ips[j] = block_ips[curr];
+ if (ip_offset >= 0)
+ ips[j] = block_ips[curr];
+ if (dw_ip_offset >= 0)
+ dw_ips[j] = block_ips[curr];
}
}