diff options
-rw-r--r-- | src/cl_command_queue.c | 8 | ||||
-rw-r--r-- | src/cl_command_queue_gen7.c | 37 | ||||
-rw-r--r-- | src/cl_driver.h | 16 | ||||
-rw-r--r-- | src/cl_driver_defs.c | 5 | ||||
-rw-r--r-- | src/intel/intel_gpgpu.c | 58 | ||||
-rw-r--r-- | src/intel/intel_gpgpu.h | 3 |
6 files changed, 126 insertions, 1 deletions
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c index 033e7df7..884c8a85 100644 --- a/src/cl_command_queue.c +++ b/src/cl_command_queue.c @@ -229,6 +229,7 @@ cl_command_queue_flush_gpgpu(cl_command_queue queue, cl_gpgpu gpgpu) size_t global_wk_sz[3]; size_t outbuf_sz = 0; void* printf_info = cl_gpgpu_get_printf_info(gpgpu, global_wk_sz, &outbuf_sz); + void* profiling_info; if (cl_gpgpu_flush(gpgpu) < 0) return CL_OUT_OF_RESOURCES; @@ -252,6 +253,13 @@ cl_command_queue_flush_gpgpu(cl_command_queue queue, cl_gpgpu gpgpu) global_wk_sz[0] = global_wk_sz[1] = global_wk_sz[2] = 0; cl_gpgpu_set_printf_info(gpgpu, NULL, global_wk_sz); } + + /* If have profiling info, output it. */ + profiling_info = cl_gpgpu_get_profiling_info(gpgpu); + if (profiling_info) { + interp_output_profiling(profiling_info, cl_gpgpu_map_profiling_buffer(gpgpu)); + cl_gpgpu_unmap_profiling_buffer(gpgpu); + } return CL_SUCCESS; } diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c index 2a49ec24..e5198cd5 100644 --- a/src/cl_command_queue_gen7.c +++ b/src/cl_command_queue_gen7.c @@ -270,6 +270,36 @@ cl_bind_stack(cl_gpgpu gpgpu, cl_kernel ker) } static int +cl_bind_profiling(cl_gpgpu gpgpu, uint32_t simd_sz, cl_kernel ker, size_t global_sz, size_t local_sz, uint32_t bti) { + int32_t offset; + int i = 0; + int thread_num; + if (simd_sz == 16) { + for(i = 0; i < 3; i++) { + offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_PROFILING_TIMESTAMP0 + i, 0); + assert(offset >= 0); + memset(ker->curbe + offset, 0x0, sizeof(uint32_t)*8*2); + thread_num = (local_sz + 15)/16; + } + } else { + assert(simd_sz == 8); + for(i = 0; i < 5; i++) { + offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_PROFILING_TIMESTAMP0 + i, 0); + assert(offset >= 0); + memset(ker->curbe + offset, 0x0, sizeof(uint32_t)*8); + thread_num = (local_sz + 7)/8; + } + } + + offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_PROFILING_BUF_POINTER, 0); + thread_num = thread_num*(global_sz/local_sz); + if (cl_gpgpu_set_profiling_buffer(gpgpu, thread_num*128 + 4, offset, bti)) + return -1; + + return 0; +} + +static int cl_bind_printf(cl_gpgpu gpgpu, cl_kernel ker, void* printf_info, int printf_num, size_t global_sz) { int32_t value = GBE_CURBE_PRINTF_INDEX_POINTER; int32_t offset = interp_kernel_get_curbe_offset(ker->opaque, value, 0); @@ -363,6 +393,13 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue, if (cl_bind_printf(gpgpu, ker, printf_info, printf_num, global_size) != 0) goto error; } + if (interp_get_profiling_bti(ker->opaque) != 0) { + if (cl_bind_profiling(gpgpu, simd_sz, ker, global_size, local_sz, interp_get_profiling_bti(ker->opaque))) + goto error; + cl_gpgpu_set_profiling_info(gpgpu, interp_dup_profiling(ker->opaque)); + } else { + cl_gpgpu_set_profiling_info(gpgpu, NULL); + } /* Bind user buffers */ cl_command_queue_bind_surface(queue, ker); diff --git a/src/cl_driver.h b/src/cl_driver.h index 9d986b1f..7081beac 100644 --- a/src/cl_driver.h +++ b/src/cl_driver.h @@ -273,6 +273,22 @@ extern cl_gpgpu_ref_batch_buf_cb *cl_gpgpu_ref_batch_buf; typedef void (cl_gpgpu_unref_batch_buf_cb)(void*); extern cl_gpgpu_unref_batch_buf_cb *cl_gpgpu_unref_batch_buf; +/* Set the profiling buffer */ +typedef int (cl_gpgpu_set_profiling_buffer_cb)(cl_gpgpu, uint32_t, uint32_t, uint8_t); +extern cl_gpgpu_set_profiling_buffer_cb *cl_gpgpu_set_profiling_buffer; + +typedef int (cl_gpgpu_set_profiling_info_cb)(cl_gpgpu, void *); +extern cl_gpgpu_set_profiling_info_cb *cl_gpgpu_set_profiling_info; + +typedef void* (cl_gpgpu_get_profiling_info_cb)(cl_gpgpu); +extern cl_gpgpu_get_profiling_info_cb *cl_gpgpu_get_profiling_info; + +typedef void* (cl_gpgpu_map_profiling_buffer_cb)(cl_gpgpu); +extern cl_gpgpu_map_profiling_buffer_cb *cl_gpgpu_map_profiling_buffer; + +typedef void (cl_gpgpu_unmap_profiling_buffer_cb)(cl_gpgpu); +extern cl_gpgpu_unmap_profiling_buffer_cb *cl_gpgpu_unmap_profiling_buffer; + /* Set the printf buffer */ typedef int (cl_gpgpu_set_printf_buffer_cb)(cl_gpgpu, uint32_t, uint32_t, uint32_t, uint8_t); extern cl_gpgpu_set_printf_buffer_cb *cl_gpgpu_set_printf_buffer; diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c index 58c4f8f0..31176a44 100644 --- a/src/cl_driver_defs.c +++ b/src/cl_driver_defs.c @@ -94,6 +94,11 @@ LOCAL cl_gpgpu_event_get_exec_timestamp_cb *cl_gpgpu_event_get_exec_timestamp = LOCAL cl_gpgpu_event_get_gpu_cur_timestamp_cb *cl_gpgpu_event_get_gpu_cur_timestamp = NULL; LOCAL cl_gpgpu_ref_batch_buf_cb *cl_gpgpu_ref_batch_buf = NULL; LOCAL cl_gpgpu_unref_batch_buf_cb *cl_gpgpu_unref_batch_buf = NULL; +LOCAL cl_gpgpu_set_profiling_buffer_cb *cl_gpgpu_set_profiling_buffer = NULL; +LOCAL cl_gpgpu_set_profiling_info_cb *cl_gpgpu_set_profiling_info = NULL; +LOCAL cl_gpgpu_get_profiling_info_cb *cl_gpgpu_get_profiling_info = NULL; +LOCAL cl_gpgpu_map_profiling_buffer_cb *cl_gpgpu_map_profiling_buffer = NULL; +LOCAL cl_gpgpu_unmap_profiling_buffer_cb *cl_gpgpu_unmap_profiling_buffer = NULL; LOCAL cl_gpgpu_set_printf_buffer_cb *cl_gpgpu_set_printf_buffer = NULL; LOCAL cl_gpgpu_reloc_printf_buffer_cb *cl_gpgpu_reloc_printf_buffer = NULL; LOCAL cl_gpgpu_map_printf_buffer_cb *cl_gpgpu_map_printf_buffer = NULL; diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index 0c34ca97..7f212e2b 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -151,6 +151,8 @@ intel_gpgpu_delete_finished(intel_gpgpu_t *gpgpu) drm_intel_bo_unreference(gpgpu->stack_b.bo); if (gpgpu->scratch_b.bo) drm_intel_bo_unreference(gpgpu->scratch_b.bo); + if (gpgpu->profiling_b.bo) + drm_intel_bo_unreference(gpgpu->profiling_b.bo); if(gpgpu->constant_b.bo) drm_intel_bo_unreference(gpgpu->constant_b.bo); @@ -905,6 +907,10 @@ intel_gpgpu_state_init(intel_gpgpu_t *gpgpu, dri_bo_unreference(gpgpu->printf_b.bo); gpgpu->printf_b.bo = NULL; + if (gpgpu->profiling_b.bo) + dri_bo_unreference(gpgpu->profiling_b.bo); + gpgpu->profiling_b.bo = NULL; + /* Set the profile buffer*/ if(gpgpu->time_stamp_b.bo) dri_bo_unreference(gpgpu->time_stamp_b.bo); @@ -2281,6 +2287,35 @@ intel_gpgpu_event_get_exec_timestamp(intel_gpgpu_t* gpgpu, intel_event_t *event, } static int +intel_gpgpu_set_profiling_buf(intel_gpgpu_t *gpgpu, uint32_t size, uint32_t offset, uint8_t bti) +{ + drm_intel_bo *bo = NULL; + + gpgpu->profiling_b.bo = drm_intel_bo_alloc(gpgpu->drv->bufmgr, "Profiling buffer", size, 64); + bo = gpgpu->profiling_b.bo; + if (!bo || (drm_intel_bo_map(bo, 1) != 0)) { + fprintf(stderr, "%s:%d: %s.\n", __FILE__, __LINE__, strerror(errno)); + return -1; + } + memset(bo->virtual, 0, size); + drm_intel_bo_unmap(bo); + cl_gpgpu_bind_buf((cl_gpgpu)gpgpu, (cl_buffer)bo, offset, 0, size, bti); + return 0; +} + +static void +intel_gpgpu_set_profiling_info(intel_gpgpu_t *gpgpu, void* profiling_info) +{ + gpgpu->profiling_info = profiling_info; +} + +static void* +intel_gpgpu_get_profiling_info(intel_gpgpu_t *gpgpu) +{ + return gpgpu->profiling_info; +} + +static int intel_gpgpu_set_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i, uint32_t size, uint32_t offset, uint8_t bti) { drm_intel_bo *bo = NULL; @@ -2311,6 +2346,24 @@ intel_gpgpu_set_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i, uint32_t size, uint } static void* +intel_gpgpu_map_profiling_buf(intel_gpgpu_t *gpgpu) +{ + drm_intel_bo *bo = NULL; + bo = gpgpu->profiling_b.bo; + drm_intel_bo_map(bo, 1); + return bo->virtual; +} + +static void +intel_gpgpu_unmap_profiling_buf_addr(intel_gpgpu_t *gpgpu) +{ + drm_intel_bo *bo = NULL; + bo = gpgpu->profiling_b.bo; + drm_intel_bo_unmap(bo); +} + + +static void* intel_gpgpu_map_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i) { drm_intel_bo *bo = NULL; @@ -2402,6 +2455,11 @@ intel_set_gpgpu_callbacks(int device_id) cl_gpgpu_event_get_gpu_cur_timestamp = (cl_gpgpu_event_get_gpu_cur_timestamp_cb *)intel_gpgpu_event_get_gpu_cur_timestamp; cl_gpgpu_ref_batch_buf = (cl_gpgpu_ref_batch_buf_cb *)intel_gpgpu_ref_batch_buf; cl_gpgpu_unref_batch_buf = (cl_gpgpu_unref_batch_buf_cb *)intel_gpgpu_unref_batch_buf; + cl_gpgpu_set_profiling_buffer = (cl_gpgpu_set_profiling_buffer_cb *)intel_gpgpu_set_profiling_buf; + cl_gpgpu_set_profiling_info = (cl_gpgpu_set_profiling_info_cb *)intel_gpgpu_set_profiling_info; + cl_gpgpu_get_profiling_info = (cl_gpgpu_get_profiling_info_cb *)intel_gpgpu_get_profiling_info; + cl_gpgpu_map_profiling_buffer = (cl_gpgpu_map_profiling_buffer_cb *)intel_gpgpu_map_profiling_buf; + cl_gpgpu_unmap_profiling_buffer = (cl_gpgpu_unmap_profiling_buffer_cb *)intel_gpgpu_unmap_profiling_buf_addr; cl_gpgpu_set_printf_buffer = (cl_gpgpu_set_printf_buffer_cb *)intel_gpgpu_set_printf_buf; cl_gpgpu_map_printf_buffer = (cl_gpgpu_map_printf_buffer_cb *)intel_gpgpu_map_printf_buf; cl_gpgpu_unmap_printf_buffer = (cl_gpgpu_unmap_printf_buffer_cb *)intel_gpgpu_unmap_printf_buf_addr; diff --git a/src/intel/intel_gpgpu.h b/src/intel/intel_gpgpu.h index ad7290e2..ccbf2fa4 100644 --- a/src/intel/intel_gpgpu.h +++ b/src/intel/intel_gpgpu.h @@ -46,6 +46,7 @@ struct intel_gpgpu void* ker_opaque; size_t global_wk_sz[3]; void* printf_info; + void* profiling_info; struct intel_driver *drv; struct intel_batchbuffer *batch; cl_gpgpu_kernel *ker; @@ -66,7 +67,7 @@ struct intel_gpgpu struct { drm_intel_bo *bo; } time_stamp_b; /* time stamp buffer */ struct { drm_intel_bo *bo; drm_intel_bo *ibo;} printf_b; /* the printf buf and index buf*/ - + struct { drm_intel_bo *bo; } profiling_b; /* the buf for profiling*/ struct { drm_intel_bo *bo; } aux_buf; struct { uint32_t surface_heap_offset; |