diff options
author | Pan Xiuli <xiuli.pan@intel.com> | 2017-06-15 16:44:49 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2017-06-16 16:34:32 +0800 |
commit | 521ac708db7f6b679aa32c7fced3ee953ae61867 (patch) | |
tree | fecf695edafad90fdd762dac75d79c05e8bfa2e3 | |
parent | 82bc42144542322e4db59c64b2382591e7f7e943 (diff) |
Runtime: Add new API enums for cl_intel_required_subgroup_size extension
Add CL_DEVICE_SUB_GROUP_SIZES_INTEL for clGetDeviceInfo, add
CL_KERNEL_SPILL_MEM_SIZE_INTEL for clGetKernelWorkGroupInfo and add
CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL for clGetKernelSubGroupInfo.
We only have this extension for LLVM 40+ for frontend support.
V2: Add opencl-c define
Signed-off-by: Pan Xiuli <xiuli.pan@intel.com>
Reviewed-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r-- | backend/src/libocl/include/ocl.h | 4 | ||||
-rw-r--r-- | include/CL/cl_intel.h | 6 | ||||
-rw-r--r-- | src/cl_device_id.c | 27 | ||||
-rw-r--r-- | src/cl_device_id.h | 2 | ||||
-rw-r--r-- | src/cl_extensions.c | 8 | ||||
-rw-r--r-- | src/cl_extensions.h | 1 | ||||
-rw-r--r-- | src/cl_gt_device.h | 2 |
7 files changed, 50 insertions, 0 deletions
diff --git a/backend/src/libocl/include/ocl.h b/backend/src/libocl/include/ocl.h index dded5748..5819f8cb 100644 --- a/backend/src/libocl/include/ocl.h +++ b/backend/src/libocl/include/ocl.h @@ -126,6 +126,10 @@ #define cl_intel_planar_yuv #define cl_intel_media_block_io +#if __clang_major__*10 + __clang_minor__ > 40 +#define cl_intel_required_subgroup_size +#endif + #pragma OPENCL EXTENSION cl_khr_fp64 : disable #pragma OPENCL EXTENSION cl_khr_fp16 : disable #endif diff --git a/include/CL/cl_intel.h b/include/CL/cl_intel.h index 47bae46f..3cb85151 100644 --- a/include/CL/cl_intel.h +++ b/include/CL/cl_intel.h @@ -197,6 +197,12 @@ typedef CL_API_ENTRY cl_int void* /*param_value*/, size_t* /*param_value_size_ret*/ ); #endif + +/* cl_intel_required_subgroup_size extension*/ +#define CL_DEVICE_SUB_GROUP_SIZES_INTEL 0x4108 +#define CL_KERNEL_SPILL_MEM_SIZE_INTEL 0x4109 +#define CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL 0x410A + #ifdef __cplusplus } #endif diff --git a/src/cl_device_id.c b/src/cl_device_id.c index 6cba2b57..76549a43 100644 --- a/src/cl_device_id.c +++ b/src/cl_device_id.c @@ -1377,6 +1377,10 @@ cl_get_device_info(cl_device_id device, src_ptr = device->driver_version; src_size = device->driver_version_sz; break; + case CL_DEVICE_SUB_GROUP_SIZES_INTEL: + src_ptr = device->sub_group_sizes; + src_size = device->sub_group_sizes_sz; + break; default: return CL_INVALID_VALUE; @@ -1520,6 +1524,7 @@ cl_get_kernel_workgroup_info(cl_kernel kernel, DECL_FIELD(COMPILE_WORK_GROUP_SIZE, kernel->compile_wg_sz) DECL_FIELD(PRIVATE_MEM_SIZE, kernel->stack_size) case CL_KERNEL_GLOBAL_WORK_SIZE: + { dimension = cl_check_builtin_kernel_dimension(kernel, device); if ( !dimension ) return CL_INVALID_VALUE; if (param_value_size_ret != NULL) @@ -1537,6 +1542,18 @@ cl_get_kernel_workgroup_info(cl_kernel kernel, return CL_SUCCESS; } return CL_SUCCESS; + } + case CL_KERNEL_SPILL_MEM_SIZE_INTEL: + { + if (param_value && param_value_size < sizeof(cl_ulong)) + return CL_INVALID_VALUE; + if (param_value_size_ret != NULL) + *param_value_size_ret = sizeof(cl_ulong); + if (param_value) + *(cl_ulong*)param_value = (cl_ulong)interp_kernel_get_scratch_size(kernel->opaque); + return CL_SUCCESS; + } + default: return CL_INVALID_VALUE; }; @@ -1620,6 +1637,16 @@ cl_get_kernel_subgroup_info(cl_kernel kernel, } break; } + case CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL: + { + if (param_value && param_value_size < sizeof(size_t)) + return CL_INVALID_VALUE; + if (param_value_size_ret != NULL) + *param_value_size_ret = sizeof(size_t); + if (param_value) + *(size_t*)param_value = interp_kernel_get_simd_width(kernel->opaque); + return CL_SUCCESS; + } default: return CL_INVALID_VALUE; }; diff --git a/src/cl_device_id.h b/src/cl_device_id.h index 6b8f2ebe..93bd2f1f 100644 --- a/src/cl_device_id.h +++ b/src/cl_device_id.h @@ -136,6 +136,8 @@ struct _cl_device_id { uint32_t atomic_test_result; cl_uint image_pitch_alignment; cl_uint image_base_address_alignment; + size_t sub_group_sizes[2]; + size_t sub_group_sizes_sz; //inited as NULL, created only when cmrt kernel is used void* cmrt_device; //realtype: CmDevice* diff --git a/src/cl_extensions.c b/src/cl_extensions.c index d49d202f..56099ad0 100644 --- a/src/cl_extensions.c +++ b/src/cl_extensions.c @@ -69,8 +69,16 @@ check_intel_extension(cl_extensions_t *extensions) { int id; for(id = INTEL_EXT_START_ID; id <= INTEL_EXT_END_ID; id++) + { if(id != EXT_ID(intel_motion_estimation)) extensions->extensions[id].base.ext_enabled = 1; + if(id == EXT_ID(intel_required_subgroup_size)) +#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR > 40 + extensions->extensions[id].base.ext_enabled = 1; +#else + extensions->extensions[id].base.ext_enabled = 0; +#endif + } } void diff --git a/src/cl_extensions.h b/src/cl_extensions.h index af0855e4..bb61c0bc 100644 --- a/src/cl_extensions.h +++ b/src/cl_extensions.h @@ -30,6 +30,7 @@ DECL_EXT(intel_motion_estimation) \ DECL_EXT(intel_subgroups) \ DECL_EXT(intel_subgroups_short) \ + DECL_EXT(intel_required_subgroup_size) \ DECL_EXT(intel_media_block_io) \ DECL_EXT(intel_planar_yuv) diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h index ca4f3c57..f6cb5f82 100644 --- a/src/cl_gt_device.h +++ b/src/cl_gt_device.h @@ -153,4 +153,6 @@ DECL_INFO_STRING(spir_versions, "1.2") .partition_type = {0}, .image_pitch_alignment = 1, .image_base_address_alignment = 4096, +.sub_group_sizes = {8, 16}, +.sub_group_sizes_sz = sizeof(size_t) * 2, .cmrt_device = NULL |