diff options
-rw-r--r-- | backend/src/driver/cl_gen_kernel.cpp | 31 | ||||
-rw-r--r-- | include/cl_driver.h | 1 | ||||
-rw-r--r-- | libclapi/cl_kernel.c | 16 |
3 files changed, 37 insertions, 11 deletions
diff --git a/backend/src/driver/cl_gen_kernel.cpp b/backend/src/driver/cl_gen_kernel.cpp index cfde43e5..c9ad3a39 100644 --- a/backend/src/driver/cl_gen_kernel.cpp +++ b/backend/src/driver/cl_gen_kernel.cpp @@ -79,19 +79,12 @@ static size_t genGetKernelMaxWorkGroupSize(cl_kernel kernel, Kernel* ker, const } extern "C" -cl_int GenCreateKernel(cl_kernel kernel, const cl_device_id device) +cl_int GenGetWorkgroupInfo(cl_kernel kernel, const cl_device_id device, cl_kernel_workgroup_info wgInfo) { - GBE_ASSERT(getGenKernelPrivate(kernel, device) == NULL); - - Program* p = reinterpret_cast<Program*>(getGenProgramPrivate(kernel->program, device)); - if (p == NULL) - return CL_INVALID_PROGRAM; - - Kernel* ker = p->getKernel(kernel->name); + Kernel* ker = reinterpret_cast<Kernel*>(getGenKernelPrivate(kernel, device)); if (ker == NULL) - return CL_INVALID_KERNEL_NAME; + return CL_INVALID_VALUE; - cl_kernel_workgroup_info wgInfo = &kernel->wg_info[findIndexByDevice(kernel->program->ctx, device)]; ker->getCompileWorkGroupSize(wgInfo->compile_wg_sz); wgInfo->local_mem_sz = ker->getSLMSize(); wgInfo->private_mem_sz = ker->getStackSize(); @@ -108,6 +101,22 @@ cl_int GenCreateKernel(cl_kernel kernel, const cl_device_id device) memcpy(wgInfo->global_work_sz, device->max_3d_global_work_sizes, sizeof(device->max_3d_global_work_sizes)); } + return CL_SUCCESS; +} + +extern "C" +cl_int GenCreateKernel(cl_kernel kernel, const cl_device_id device) +{ + GBE_ASSERT(getGenKernelPrivate(kernel, device) == NULL); + + Program* p = reinterpret_cast<Program*>(getGenProgramPrivate(kernel->program, device)); + if (p == NULL) + return CL_INVALID_PROGRAM; + + Kernel* ker = p->getKernel(kernel->name); + if (ker == NULL) + return CL_INVALID_KERNEL_NAME; + setGenKernelPrivate(kernel, device, ker); return CL_SUCCESS; } @@ -750,7 +759,7 @@ cl_int GenEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel kernel, const u } GenGPUWorkItemNDRange* ndRange = GBE_NEW(GenGPUWorkItemNDRange, gpuQueue->bufmgr, gpuQueue->ctx, - gpuDev->device_id, + gpuDev->device_id, event_ret, events, num_events); if (ndRange->gpuState->stateInit( queue->device->max_compute_unit * gpuDev->max_thread_per_unit, cst_sz / 32) != true) { diff --git a/include/cl_driver.h b/include/cl_driver.h index 2ace35ba..17619097 100644 --- a/include/cl_driver.h +++ b/include/cl_driver.h @@ -51,6 +51,7 @@ typedef struct _cl_driver { cl_uint name_sz, cl_uint* ret_sz, cl_uint* ker_num); cl_int (*create_kernel)(cl_kernel kernel, const cl_device_id device); cl_int (*release_kernel)(cl_kernel kernel, const cl_device_id device); + cl_int (*get_workgroup_info)(cl_kernel kernel, const cl_device_id device, cl_kernel_workgroup_info wg_info); cl_int (*get_arg_num)(cl_kernel kernel, const cl_device_id device, cl_uint* ret_num); cl_int (*get_arg_name)(cl_kernel kernel, const cl_device_id device, cl_uint index, char *name, cl_uint name_sz, cl_uint* ret_sz); diff --git a/libclapi/cl_kernel.c b/libclapi/cl_kernel.c index 9c47b91f..6cfe5462 100644 --- a/libclapi/cl_kernel.c +++ b/libclapi/cl_kernel.c @@ -270,6 +270,7 @@ LOCAL cl_kernel cl_create_kernel(cl_program p, const char* kernel_name, cl_int* char *type_name = NULL; cl_uint name_sz = 0; cl_uint type_name_sz = 0; + _cl_kernel_workgroup_info wg_info; cl_kernel_arg_type type; cl_kernel_arg_address_qualifier qualifier; cl_kernel_arg_access_qualifier access; @@ -308,6 +309,21 @@ LOCAL cl_kernel cl_create_kernel(cl_program p, const char* kernel_name, cl_int* } } + // Verify the workgroup info for kernel. + for (i = 0; i < p->ctx->device_num; i++) { + if (build_status[i] != CL_BUILD_SUCCESS) // We do not build for that device. + continue; + + memset(&wg_info, 0, sizeof(wg_info)); + err = p->ctx->devices[i]->driver->get_workgroup_info(k, p->ctx->devices[i], &wg_info); + if (err != CL_SUCCESS) { + err = CL_INVALID_PROGRAM_EXECUTABLE; + goto error; + } + + memcpy(&k->wg_info[i], &wg_info, sizeof(wg_info)); + } + // Verify and create arguments. for (i = 0; i < p->ctx->device_num; i++) { if (build_status[i] != CL_BUILD_SUCCESS) // We do not build for that device. |