diff options
author | Junyan He <junyan.he@intel.com> | 2016-03-22 14:30:25 +0800 |
---|---|---|
committer | Junyan He <junyan.he@intel.com> | 2016-03-22 14:30:25 +0800 |
commit | fd531dbceb2a01ead062bc1de982d89c412fa021 (patch) | |
tree | 5cec3bb5b0f8166236a5cc21fbf894c0ca85252d | |
parent | 957b2863d8603a6fe4e562b0e293dfea0c184af2 (diff) |
ker
-rw-r--r-- | backend/src/driver/cl_gen_driver.c | 2 | ||||
-rw-r--r-- | backend/src/driver/cl_gen_driver.h | 3 | ||||
-rw-r--r-- | backend/src/driver/cl_gen_kernel.cpp | 46 | ||||
-rw-r--r-- | libclapi/cl_kernel.c | 10 |
4 files changed, 60 insertions, 1 deletions
diff --git a/backend/src/driver/cl_gen_driver.c b/backend/src/driver/cl_gen_driver.c index 51da3228..1f44d03a 100644 --- a/backend/src/driver/cl_gen_driver.c +++ b/backend/src/driver/cl_gen_driver.c @@ -28,4 +28,6 @@ _cl_driver clgenDriver = { .build_program = GenBuildProgram, .get_program_kernel_names = GenGetProgramKernelNames, .release_program = GenReleaseProgram, + .create_kernel = GenCreateKernel, + .release_kernel = GenReleaseKernel, }; diff --git a/backend/src/driver/cl_gen_driver.h b/backend/src/driver/cl_gen_driver.h index a5dca8ab..4dfe3786 100644 --- a/backend/src/driver/cl_gen_driver.h +++ b/backend/src/driver/cl_gen_driver.h @@ -39,6 +39,9 @@ cl_int GenBuildProgram(cl_program program, const cl_device_id device, const char cl_int GenGetProgramKernelNames(cl_program program, const cl_device_id device, char *names, cl_uint name_sz, cl_uint* ret_sz, cl_uint* ker_num); cl_int GenReleaseProgram(cl_program program, const cl_device_id device); +cl_int GenCreateKernel(cl_kernel kernel, const cl_device_id device); +cl_int GenReleaseKernel(cl_kernel kernel, const cl_device_id device); + /*****************************************************************************/ int dri2OpenX11(void); diff --git a/backend/src/driver/cl_gen_kernel.cpp b/backend/src/driver/cl_gen_kernel.cpp index 86e34c6b..5b0f57a7 100644 --- a/backend/src/driver/cl_gen_kernel.cpp +++ b/backend/src/driver/cl_gen_kernel.cpp @@ -23,6 +23,7 @@ extern "C" { // for the C header files #include "cl_program.h" #include "cl_kernel.h" #include "cl_gen_driver.h" +#include "cl_gen_devices.h" #ifdef __cplusplus } #endif /* __cplusplus */ @@ -33,6 +34,32 @@ extern "C" { // for the C header files using namespace gbe; +static size_t genGetKernelMaxWorkGroupSize(cl_kernel kernel, Kernel* ker, const cl_device_id device) +{ + size_t work_group_size, thread_cnt; + int simd_width = ker->getSIMDWidth(); + GenGPUDevice* gpuDev = reinterpret_cast<GenGPUDevice*>(getGenDevicePrivate(device)); + GBE_ASSERT(gpuDev); + + if (!ker->getUseSLM()) { + if (!IS_BAYTRAIL_T(gpuDev->device_id) || simd_width == 16) + work_group_size = simd_width * 64; + else + work_group_size = device->max_compute_unit * gpuDev->max_thread_per_unit * simd_width; + + } else { + thread_cnt = device->max_compute_unit * + gpuDev->max_thread_per_unit / gpuDev->sub_slice_count; + if(thread_cnt > 64) + thread_cnt = 64; + work_group_size = thread_cnt * simd_width; + } + + if(work_group_size > device->max_work_group_size) + work_group_size = device->max_work_group_size; + return work_group_size; +} + extern "C" cl_int GenCreateKernel(cl_kernel kernel, const cl_device_id device) { @@ -46,11 +73,28 @@ cl_int GenCreateKernel(cl_kernel kernel, const cl_device_id device) if (ker == NULL) return CL_INVALID_KERNEL_NAME; - /* Set kernel parameters. If the parameters have alread been set, check and return + /* Set kernel args. If the arg have alread been set, check and return CL_INVALID_KERNEL_DEFINITION if failed. */ + cl_kernel_workgroup_info wgInfo = &kernel->wg_info[findIndexByDevice(kernel->program->ctx, device)]; + ker->getCompileWorkGroupSize(wgInfo->compile_wg_sz); + wgInfo->local_mem_sz = ker->getSLMSize(); + wgInfo->private_mem_sz = ker->getStackSize(); + wgInfo->work_group_sz = genGetKernelMaxWorkGroupSize(kernel, ker, device); + setGenKernelPrivate(kernel, device, ker); return CL_SUCCESS; } +extern "C" +cl_int GenReleaseKernel(cl_kernel kernel, const cl_device_id device) +{ + Kernel* ker = reinterpret_cast<Kernel*>(getGenKernelPrivate(kernel, device)); + if (ker == NULL) + return CL_INVALID_VALUE; + + setGenKernelPrivate(kernel, device, NULL); + return CL_SUCCESS; +} + diff --git a/libclapi/cl_kernel.c b/libclapi/cl_kernel.c index 50875ab1..cea15ec2 100644 --- a/libclapi/cl_kernel.c +++ b/libclapi/cl_kernel.c @@ -251,3 +251,13 @@ error: return kernel; } +cl_int +clReleaseKernel(cl_kernel kernel) +{ + cl_int err = CL_SUCCESS; + CHECK_KERNEL(kernel); + cl_release_kernel(kernel); +error: + return err; +} + |