diff options
-rw-r--r-- | src/cl_device_id.c | 30 |
1 files changed, 15 insertions, 15 deletions
diff --git a/src/cl_device_id.c b/src/cl_device_id.c index 522c3c56..5ef0bdea 100644 --- a/src/cl_device_id.c +++ b/src/cl_device_id.c @@ -42,8 +42,8 @@ static struct _cl_device_id intel_ivb_gt2_device = { .max_compute_unit = 16, .max_thread_per_unit = 8, .sub_slice_count = 2, - .max_work_item_sizes = {1024, 1024, 1024}, - .max_work_group_size = 1024, + .max_work_item_sizes = {512, 512, 512}, + .max_work_group_size = 512, .max_clock_frequency = 1000, #include "cl_gen7_device.h" }; @@ -64,8 +64,8 @@ static struct _cl_device_id intel_baytrail_t_device = { .max_compute_unit = 4, .max_thread_per_unit = 8, .sub_slice_count = 1, - .max_work_item_sizes = {512, 512, 512}, - .max_work_group_size = 512, + .max_work_item_sizes = {256, 256, 256}, + .max_work_group_size = 256, .max_clock_frequency = 1000, #include "cl_gen7_device.h" }; @@ -76,8 +76,8 @@ static struct _cl_device_id intel_hsw_gt1_device = { .max_compute_unit = 10, .max_thread_per_unit = 7, .sub_slice_count = 1, - .max_work_item_sizes = {1024, 1024, 1024}, - .max_work_group_size = 1024, + .max_work_item_sizes = {512, 512, 512}, + .max_work_group_size = 512, .max_clock_frequency = 1000, #include "cl_gen75_device.h" }; @@ -87,8 +87,8 @@ static struct _cl_device_id intel_hsw_gt2_device = { .max_compute_unit = 20, .max_thread_per_unit = 7, .sub_slice_count = 2, - .max_work_item_sizes = {1024, 1024, 1024}, - .max_work_group_size = 1024, + .max_work_item_sizes = {512, 512, 512}, + .max_work_group_size = 512, .max_clock_frequency = 1000, #include "cl_gen75_device.h" }; @@ -98,8 +98,8 @@ static struct _cl_device_id intel_hsw_gt3_device = { .max_compute_unit = 40, .max_thread_per_unit = 7, .sub_slice_count = 4, - .max_work_item_sizes = {1024, 1024, 1024}, - .max_work_group_size = 1024, + .max_work_item_sizes = {512, 512, 512}, + .max_work_group_size = 512, .max_clock_frequency = 1000, #include "cl_gen75_device.h" }; @@ -110,7 +110,7 @@ static struct _cl_device_id intel_brw_gt1_device = { .max_compute_unit = 12, .max_thread_per_unit = 7, .sub_slice_count = 2, - .max_work_item_sizes = {1024, 1024, 1024}, + .max_work_item_sizes = {512, 512, 512}, .max_work_group_size = 512, .max_clock_frequency = 1000, #include "cl_gen75_device.h" @@ -121,7 +121,7 @@ static struct _cl_device_id intel_brw_gt2_device = { .max_compute_unit = 24, .max_thread_per_unit = 7, .sub_slice_count = 3, - .max_work_item_sizes = {1024, 1024, 1024}, + .max_work_item_sizes = {512, 512, 512}, .max_work_group_size = 512, .max_clock_frequency = 1000, #include "cl_gen75_device.h" @@ -132,7 +132,7 @@ static struct _cl_device_id intel_brw_gt3_device = { .max_compute_unit = 48, .max_thread_per_unit = 7, .sub_slice_count = 6, - .max_work_item_sizes = {1024, 1024, 1024}, + .max_work_item_sizes = {512, 512, 512}, .max_work_group_size = 512, .max_clock_frequency = 1000, #include "cl_gen75_device.h" @@ -669,9 +669,9 @@ cl_get_kernel_max_wg_sz(cl_kernel kernel) if(thread_cnt > 64) thread_cnt = 64; work_group_size = thread_cnt * simd_width; - if(work_group_size > kernel->program->ctx->device->max_work_group_size) - work_group_size = kernel->program->ctx->device->max_work_group_size; } + if(work_group_size > kernel->program->ctx->device->max_work_group_size) + work_group_size = kernel->program->ctx->device->max_work_group_size; return work_group_size; } |