summaryrefslogtreecommitdiff
path: root/src/cl_command_queue_gen7.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/cl_command_queue_gen7.c')
-rw-r--r--src/cl_command_queue_gen7.c37
1 files changed, 37 insertions, 0 deletions
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 2a49ec24..e5198cd5 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -270,6 +270,36 @@ cl_bind_stack(cl_gpgpu gpgpu, cl_kernel ker)
}
static int
+cl_bind_profiling(cl_gpgpu gpgpu, uint32_t simd_sz, cl_kernel ker, size_t global_sz, size_t local_sz, uint32_t bti) {
+ int32_t offset;
+ int i = 0;
+ int thread_num;
+ if (simd_sz == 16) {
+ for(i = 0; i < 3; i++) {
+ offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_PROFILING_TIMESTAMP0 + i, 0);
+ assert(offset >= 0);
+ memset(ker->curbe + offset, 0x0, sizeof(uint32_t)*8*2);
+ thread_num = (local_sz + 15)/16;
+ }
+ } else {
+ assert(simd_sz == 8);
+ for(i = 0; i < 5; i++) {
+ offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_PROFILING_TIMESTAMP0 + i, 0);
+ assert(offset >= 0);
+ memset(ker->curbe + offset, 0x0, sizeof(uint32_t)*8);
+ thread_num = (local_sz + 7)/8;
+ }
+ }
+
+ offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_PROFILING_BUF_POINTER, 0);
+ thread_num = thread_num*(global_sz/local_sz);
+ if (cl_gpgpu_set_profiling_buffer(gpgpu, thread_num*128 + 4, offset, bti))
+ return -1;
+
+ return 0;
+}
+
+static int
cl_bind_printf(cl_gpgpu gpgpu, cl_kernel ker, void* printf_info, int printf_num, size_t global_sz) {
int32_t value = GBE_CURBE_PRINTF_INDEX_POINTER;
int32_t offset = interp_kernel_get_curbe_offset(ker->opaque, value, 0);
@@ -363,6 +393,13 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
if (cl_bind_printf(gpgpu, ker, printf_info, printf_num, global_size) != 0)
goto error;
}
+ if (interp_get_profiling_bti(ker->opaque) != 0) {
+ if (cl_bind_profiling(gpgpu, simd_sz, ker, global_size, local_sz, interp_get_profiling_bti(ker->opaque)))
+ goto error;
+ cl_gpgpu_set_profiling_info(gpgpu, interp_dup_profiling(ker->opaque));
+ } else {
+ cl_gpgpu_set_profiling_info(gpgpu, NULL);
+ }
/* Bind user buffers */
cl_command_queue_bind_surface(queue, ker);