diff options
author | Yang Rong <rong.r.yang@intel.com> | 2014-09-29 13:38:36 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@intel.com> | 2014-10-10 16:24:49 +0800 |
commit | 4a60269c9aad69e149603bf8f0a9690d96c7a43c (patch) | |
tree | ca83fa84a2b31d93e926742d12ac9c59a3d7518c /src/cl_device_id.c | |
parent | be4150816d512c4bce5446ac860b864a2db876d7 (diff) |
BDW: Add device's sub slice field, for cl_get_kernel_max_wg_sz.
When SLM enable, get kernal max workgroup size should return the a sub slice's max thread * simdwidth.
So need the sub slice information.
Signed-off-by: Yang Rong <rong.r.yang@intel.com>
Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
Reviewed-by: Junyan He <junyan.he@linux.intel.com>
Diffstat (limited to 'src/cl_device_id.c')
-rw-r--r-- | src/cl_device_id.c | 13 |
1 files changed, 11 insertions, 2 deletions
diff --git a/src/cl_device_id.c b/src/cl_device_id.c index 9e63e813..a1e3e822 100644 --- a/src/cl_device_id.c +++ b/src/cl_device_id.c @@ -40,6 +40,7 @@ static struct _cl_device_id intel_ivb_gt2_device = { INIT_ICD(dispatch) .max_compute_unit = 16, .max_thread_per_unit = 8, + .sub_slice_count = 2, .max_work_item_sizes = {1024, 1024, 1024}, .max_work_group_size = 1024, .max_clock_frequency = 1000, @@ -50,6 +51,7 @@ static struct _cl_device_id intel_ivb_gt1_device = { INIT_ICD(dispatch) .max_compute_unit = 6, .max_thread_per_unit = 6, + .sub_slice_count = 1, .max_work_item_sizes = {512, 512, 512}, .max_work_group_size = 512, .max_clock_frequency = 1000, @@ -60,6 +62,7 @@ static struct _cl_device_id intel_baytrail_t_device = { INIT_ICD(dispatch) .max_compute_unit = 4, .max_thread_per_unit = 8, + .sub_slice_count = 1, .max_work_item_sizes = {512, 512, 512}, .max_work_group_size = 512, .max_clock_frequency = 1000, @@ -71,6 +74,7 @@ static struct _cl_device_id intel_hsw_gt1_device = { INIT_ICD(dispatch) .max_compute_unit = 10, .max_thread_per_unit = 7, + .sub_slice_count = 1, .max_work_item_sizes = {1024, 1024, 1024}, .max_work_group_size = 1024, .max_clock_frequency = 1000, @@ -81,6 +85,7 @@ static struct _cl_device_id intel_hsw_gt2_device = { INIT_ICD(dispatch) .max_compute_unit = 20, .max_thread_per_unit = 7, + .sub_slice_count = 2, .max_work_item_sizes = {1024, 1024, 1024}, .max_work_group_size = 1024, .max_clock_frequency = 1000, @@ -91,6 +96,7 @@ static struct _cl_device_id intel_hsw_gt3_device = { INIT_ICD(dispatch) .max_compute_unit = 40, .max_thread_per_unit = 7, + .sub_slice_count = 4, .max_work_item_sizes = {1024, 1024, 1024}, .max_work_group_size = 1024, .max_clock_frequency = 1000, @@ -102,6 +108,7 @@ static struct _cl_device_id intel_brw_gt1_device = { INIT_ICD(dispatch) .max_compute_unit = 12, .max_thread_per_unit = 7, + .sub_slice_count = 2, .max_work_item_sizes = {1024, 1024, 1024}, .max_work_group_size = 1024, .max_clock_frequency = 1000, @@ -112,6 +119,7 @@ static struct _cl_device_id intel_brw_gt2_device = { INIT_ICD(dispatch) .max_compute_unit = 24, .max_thread_per_unit = 7, + .sub_slice_count = 3, .max_work_item_sizes = {1024, 1024, 1024}, .max_work_group_size = 1024, .max_clock_frequency = 1000, @@ -122,6 +130,7 @@ static struct _cl_device_id intel_brw_gt3_device = { INIT_ICD(dispatch) .max_compute_unit = 48, .max_thread_per_unit = 7, + .sub_slice_count = 6, .max_work_item_sizes = {1024, 1024, 1024}, .max_work_group_size = 1024, .max_clock_frequency = 1000, @@ -634,8 +643,8 @@ cl_get_kernel_max_wg_sz(cl_kernel kernel) work_group_size = kernel->program->ctx->device->max_compute_unit * kernel->program->ctx->device->max_thread_per_unit * simd_width; } else - work_group_size = kernel->program->ctx->device->max_work_group_size / - (16 / simd_width); + work_group_size = kernel->program->ctx->device->max_compute_unit * simd_width * + kernel->program->ctx->device->max_thread_per_unit / kernel->program->ctx->device->sub_slice_count; return work_group_size; } |