summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPan Xiuli <xiuli.pan@intel.com>2017-06-15 16:44:49 +0800
committerYang Rong <rong.r.yang@intel.com>2017-06-16 16:34:32 +0800
commit521ac708db7f6b679aa32c7fced3ee953ae61867 (patch)
treefecf695edafad90fdd762dac75d79c05e8bfa2e3
parent82bc42144542322e4db59c64b2382591e7f7e943 (diff)
Runtime: Add new API enums for cl_intel_required_subgroup_size extension
Add CL_DEVICE_SUB_GROUP_SIZES_INTEL for clGetDeviceInfo, add CL_KERNEL_SPILL_MEM_SIZE_INTEL for clGetKernelWorkGroupInfo and add CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL for clGetKernelSubGroupInfo. We only have this extension for LLVM 40+ for frontend support. V2: Add opencl-c define Signed-off-by: Pan Xiuli <xiuli.pan@intel.com> Reviewed-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r--backend/src/libocl/include/ocl.h4
-rw-r--r--include/CL/cl_intel.h6
-rw-r--r--src/cl_device_id.c27
-rw-r--r--src/cl_device_id.h2
-rw-r--r--src/cl_extensions.c8
-rw-r--r--src/cl_extensions.h1
-rw-r--r--src/cl_gt_device.h2
7 files changed, 50 insertions, 0 deletions
diff --git a/backend/src/libocl/include/ocl.h b/backend/src/libocl/include/ocl.h
index dded5748..5819f8cb 100644
--- a/backend/src/libocl/include/ocl.h
+++ b/backend/src/libocl/include/ocl.h
@@ -126,6 +126,10 @@
#define cl_intel_planar_yuv
#define cl_intel_media_block_io
+#if __clang_major__*10 + __clang_minor__ > 40
+#define cl_intel_required_subgroup_size
+#endif
+
#pragma OPENCL EXTENSION cl_khr_fp64 : disable
#pragma OPENCL EXTENSION cl_khr_fp16 : disable
#endif
diff --git a/include/CL/cl_intel.h b/include/CL/cl_intel.h
index 47bae46f..3cb85151 100644
--- a/include/CL/cl_intel.h
+++ b/include/CL/cl_intel.h
@@ -197,6 +197,12 @@ typedef CL_API_ENTRY cl_int
void* /*param_value*/,
size_t* /*param_value_size_ret*/ );
#endif
+
+/* cl_intel_required_subgroup_size extension*/
+#define CL_DEVICE_SUB_GROUP_SIZES_INTEL 0x4108
+#define CL_KERNEL_SPILL_MEM_SIZE_INTEL 0x4109
+#define CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL 0x410A
+
#ifdef __cplusplus
}
#endif
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 6cba2b57..76549a43 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -1377,6 +1377,10 @@ cl_get_device_info(cl_device_id device,
src_ptr = device->driver_version;
src_size = device->driver_version_sz;
break;
+ case CL_DEVICE_SUB_GROUP_SIZES_INTEL:
+ src_ptr = device->sub_group_sizes;
+ src_size = device->sub_group_sizes_sz;
+ break;
default:
return CL_INVALID_VALUE;
@@ -1520,6 +1524,7 @@ cl_get_kernel_workgroup_info(cl_kernel kernel,
DECL_FIELD(COMPILE_WORK_GROUP_SIZE, kernel->compile_wg_sz)
DECL_FIELD(PRIVATE_MEM_SIZE, kernel->stack_size)
case CL_KERNEL_GLOBAL_WORK_SIZE:
+ {
dimension = cl_check_builtin_kernel_dimension(kernel, device);
if ( !dimension ) return CL_INVALID_VALUE;
if (param_value_size_ret != NULL)
@@ -1537,6 +1542,18 @@ cl_get_kernel_workgroup_info(cl_kernel kernel,
return CL_SUCCESS;
}
return CL_SUCCESS;
+ }
+ case CL_KERNEL_SPILL_MEM_SIZE_INTEL:
+ {
+ if (param_value && param_value_size < sizeof(cl_ulong))
+ return CL_INVALID_VALUE;
+ if (param_value_size_ret != NULL)
+ *param_value_size_ret = sizeof(cl_ulong);
+ if (param_value)
+ *(cl_ulong*)param_value = (cl_ulong)interp_kernel_get_scratch_size(kernel->opaque);
+ return CL_SUCCESS;
+ }
+
default:
return CL_INVALID_VALUE;
};
@@ -1620,6 +1637,16 @@ cl_get_kernel_subgroup_info(cl_kernel kernel,
}
break;
}
+ case CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL:
+ {
+ if (param_value && param_value_size < sizeof(size_t))
+ return CL_INVALID_VALUE;
+ if (param_value_size_ret != NULL)
+ *param_value_size_ret = sizeof(size_t);
+ if (param_value)
+ *(size_t*)param_value = interp_kernel_get_simd_width(kernel->opaque);
+ return CL_SUCCESS;
+ }
default:
return CL_INVALID_VALUE;
};
diff --git a/src/cl_device_id.h b/src/cl_device_id.h
index 6b8f2ebe..93bd2f1f 100644
--- a/src/cl_device_id.h
+++ b/src/cl_device_id.h
@@ -136,6 +136,8 @@ struct _cl_device_id {
uint32_t atomic_test_result;
cl_uint image_pitch_alignment;
cl_uint image_base_address_alignment;
+ size_t sub_group_sizes[2];
+ size_t sub_group_sizes_sz;
//inited as NULL, created only when cmrt kernel is used
void* cmrt_device; //realtype: CmDevice*
diff --git a/src/cl_extensions.c b/src/cl_extensions.c
index d49d202f..56099ad0 100644
--- a/src/cl_extensions.c
+++ b/src/cl_extensions.c
@@ -69,8 +69,16 @@ check_intel_extension(cl_extensions_t *extensions)
{
int id;
for(id = INTEL_EXT_START_ID; id <= INTEL_EXT_END_ID; id++)
+ {
if(id != EXT_ID(intel_motion_estimation))
extensions->extensions[id].base.ext_enabled = 1;
+ if(id == EXT_ID(intel_required_subgroup_size))
+#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR > 40
+ extensions->extensions[id].base.ext_enabled = 1;
+#else
+ extensions->extensions[id].base.ext_enabled = 0;
+#endif
+ }
}
void
diff --git a/src/cl_extensions.h b/src/cl_extensions.h
index af0855e4..bb61c0bc 100644
--- a/src/cl_extensions.h
+++ b/src/cl_extensions.h
@@ -30,6 +30,7 @@
DECL_EXT(intel_motion_estimation) \
DECL_EXT(intel_subgroups) \
DECL_EXT(intel_subgroups_short) \
+ DECL_EXT(intel_required_subgroup_size) \
DECL_EXT(intel_media_block_io) \
DECL_EXT(intel_planar_yuv)
diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index ca4f3c57..f6cb5f82 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -153,4 +153,6 @@ DECL_INFO_STRING(spir_versions, "1.2")
.partition_type = {0},
.image_pitch_alignment = 1,
.image_base_address_alignment = 4096,
+.sub_group_sizes = {8, 16},
+.sub_group_sizes_sz = sizeof(size_t) * 2,
.cmrt_device = NULL