From 531a6babe6c852bc3ed62849d7640bf2ef4659a0 Mon Sep 17 00:00:00 2001 From: Junyan He Date: Tue, 20 Oct 2015 18:55:54 +0800 Subject: Runtime: Refine ext enable function for platform. We enable fp64 extension just on BDW platform. The platforms before Gen7 will not have fp64 support. We will enable fp64 on gen8 later platforms after this feature is stable. V3: Unify the extersion setting for FP16 and FP64. Signed-off-by: Junyan He Reviewed-by: Yang Rong --- src/cl_device_id.c | 27 +++++++++++++++++++-------- src/cl_extensions.c | 44 ++++++++++++++++++++++++++++++++++---------- src/cl_extensions.h | 2 +- 3 files changed, 54 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/cl_device_id.c b/src/cl_device_id.c index 4551aa8a..d5b345cd 100644 --- a/src/cl_device_id.c +++ b/src/cl_device_id.c @@ -406,7 +406,9 @@ brw_gt1_break: intel_brw_gt1_device.device_id = device_id; intel_brw_gt1_device.platform = cl_get_platform_default(); ret = &intel_brw_gt1_device; - cl_intel_platform_enable_fp16_extension(ret); + cl_intel_platform_get_default_extension(ret); + cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id); + cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); break; case PCI_CHIP_BROADWLL_M_GT2: @@ -423,7 +425,9 @@ brw_gt2_break: intel_brw_gt2_device.device_id = device_id; intel_brw_gt2_device.platform = cl_get_platform_default(); ret = &intel_brw_gt2_device; - cl_intel_platform_enable_fp16_extension(ret); + cl_intel_platform_get_default_extension(ret); + cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id); + cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); break; case PCI_CHIP_BROADWLL_M_GT3: @@ -442,7 +446,9 @@ brw_gt3_break: intel_brw_gt3_device.device_id = device_id; intel_brw_gt3_device.platform = cl_get_platform_default(); ret = &intel_brw_gt3_device; - cl_intel_platform_enable_fp16_extension(ret); + cl_intel_platform_get_default_extension(ret); + cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id); + cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); break; case PCI_CHIP_CHV_0: @@ -454,7 +460,8 @@ chv_break: intel_chv_device.device_id = device_id; intel_chv_device.platform = cl_get_platform_default(); ret = &intel_chv_device; - cl_intel_platform_enable_fp16_extension(ret); + cl_intel_platform_get_default_extension(ret); + cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); break; @@ -472,7 +479,8 @@ skl_gt1_break: intel_skl_gt1_device.device_id = device_id; intel_skl_gt1_device.platform = cl_get_platform_default(); ret = &intel_skl_gt1_device; - cl_intel_platform_enable_fp16_extension(ret); + cl_intel_platform_get_default_extension(ret); + cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); break; case PCI_CHIP_SKYLAKE_ULT_GT2: @@ -491,7 +499,8 @@ skl_gt2_break: intel_skl_gt2_device.device_id = device_id; intel_skl_gt2_device.platform = cl_get_platform_default(); ret = &intel_skl_gt2_device; - cl_intel_platform_enable_fp16_extension(ret); + cl_intel_platform_get_default_extension(ret); + cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); break; case PCI_CHIP_SKYLAKE_ULT_GT3: @@ -504,7 +513,8 @@ skl_gt3_break: intel_skl_gt3_device.device_id = device_id; intel_skl_gt3_device.platform = cl_get_platform_default(); ret = &intel_skl_gt3_device; - cl_intel_platform_enable_fp16_extension(ret); + cl_intel_platform_get_default_extension(ret); + cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); break; case PCI_CHIP_SKYLAKE_HALO_GT4: @@ -515,7 +525,8 @@ skl_gt4_break: intel_skl_gt4_device.device_id = device_id; intel_skl_gt4_device.platform = cl_get_platform_default(); ret = &intel_skl_gt4_device; - cl_intel_platform_enable_fp16_extension(ret); + cl_intel_platform_get_default_extension(ret); + cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); break; case PCI_CHIP_SANDYBRIDGE_BRIDGE: diff --git a/src/cl_extensions.c b/src/cl_extensions.c index 6cb15797..3e714ac3 100644 --- a/src/cl_extensions.c +++ b/src/cl_extensions.c @@ -106,24 +106,48 @@ cl_intel_platform_get_default_extension(cl_device_id device) } LOCAL void -cl_intel_platform_enable_fp16_extension(cl_device_id device) +cl_intel_platform_enable_extension(cl_device_id device, uint32_t ext) { - cl_extensions_t new_ext; - cl_platform_id pf = device->platform; int id; + char* ext_str = NULL; + cl_platform_id pf = device->platform; assert(pf); - memcpy(&new_ext, pf->internal_extensions, sizeof(new_ext)); - for(id = OPT1_EXT_START_ID; id <= OPT1_EXT_END_ID; id++) { - if (id == EXT_ID(khr_fp16)) - new_ext.extensions[id].base.ext_enabled = 1; + if (id == ext) { + if (!pf->internal_extensions->extensions[id].base.ext_enabled) + ext_str = pf->internal_extensions->extensions[id].base.ext_name; + + break; + } } - process_extension_str(&new_ext); + for(id = BASE_EXT_START_ID; id <= BASE_EXT_END_ID; id++) { + if (id == ext) { + if (!pf->internal_extensions->extensions[id].base.ext_enabled) + ext_str = pf->internal_extensions->extensions[id].base.ext_name; - memcpy((char*)device->extensions, new_ext.ext_str, sizeof(device->extensions)); - device->extensions_sz = strlen(new_ext.ext_str) + 1; + break; + } + } + + /* already enabled, skip. */ + if (strstr(device->extensions, ext_str)) + ext_str = NULL; + + if (ext_str) { + if (device->extensions_sz <= 1) { + memcpy((char*)device->extensions, ext_str, strlen(ext_str)); + device->extensions_sz = strlen(ext_str) + 1; + } else { + assert(device->extensions_sz + 1 + strlen(ext_str) < 256); + *(char*)(device->extensions + device->extensions_sz - 1) = ' '; + memcpy((char*)device->extensions + device->extensions_sz, ext_str, strlen(ext_str)); + device->extensions_sz = device->extensions_sz + strlen(ext_str) + 1; + } + + *(char*)(device->extensions + device->extensions_sz - 1) = 0; + } } LOCAL void diff --git a/src/cl_extensions.h b/src/cl_extensions.h index b4544e25..0006651e 100644 --- a/src/cl_extensions.h +++ b/src/cl_extensions.h @@ -95,6 +95,6 @@ typedef struct cl_extensions { extern void cl_intel_platform_extension_init(cl_platform_id intel_platform); extern void -cl_intel_platform_enable_fp16_extension(cl_device_id device); +cl_intel_platform_enable_extension(cl_device_id device, uint32_t name); extern void cl_intel_platform_get_default_extension(cl_device_id device); -- cgit v1.2.3