summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/cl_device_id.c30
1 files changed, 15 insertions, 15 deletions
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 522c3c56..5ef0bdea 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -42,8 +42,8 @@ static struct _cl_device_id intel_ivb_gt2_device = {
.max_compute_unit = 16,
.max_thread_per_unit = 8,
.sub_slice_count = 2,
- .max_work_item_sizes = {1024, 1024, 1024},
- .max_work_group_size = 1024,
+ .max_work_item_sizes = {512, 512, 512},
+ .max_work_group_size = 512,
.max_clock_frequency = 1000,
#include "cl_gen7_device.h"
};
@@ -64,8 +64,8 @@ static struct _cl_device_id intel_baytrail_t_device = {
.max_compute_unit = 4,
.max_thread_per_unit = 8,
.sub_slice_count = 1,
- .max_work_item_sizes = {512, 512, 512},
- .max_work_group_size = 512,
+ .max_work_item_sizes = {256, 256, 256},
+ .max_work_group_size = 256,
.max_clock_frequency = 1000,
#include "cl_gen7_device.h"
};
@@ -76,8 +76,8 @@ static struct _cl_device_id intel_hsw_gt1_device = {
.max_compute_unit = 10,
.max_thread_per_unit = 7,
.sub_slice_count = 1,
- .max_work_item_sizes = {1024, 1024, 1024},
- .max_work_group_size = 1024,
+ .max_work_item_sizes = {512, 512, 512},
+ .max_work_group_size = 512,
.max_clock_frequency = 1000,
#include "cl_gen75_device.h"
};
@@ -87,8 +87,8 @@ static struct _cl_device_id intel_hsw_gt2_device = {
.max_compute_unit = 20,
.max_thread_per_unit = 7,
.sub_slice_count = 2,
- .max_work_item_sizes = {1024, 1024, 1024},
- .max_work_group_size = 1024,
+ .max_work_item_sizes = {512, 512, 512},
+ .max_work_group_size = 512,
.max_clock_frequency = 1000,
#include "cl_gen75_device.h"
};
@@ -98,8 +98,8 @@ static struct _cl_device_id intel_hsw_gt3_device = {
.max_compute_unit = 40,
.max_thread_per_unit = 7,
.sub_slice_count = 4,
- .max_work_item_sizes = {1024, 1024, 1024},
- .max_work_group_size = 1024,
+ .max_work_item_sizes = {512, 512, 512},
+ .max_work_group_size = 512,
.max_clock_frequency = 1000,
#include "cl_gen75_device.h"
};
@@ -110,7 +110,7 @@ static struct _cl_device_id intel_brw_gt1_device = {
.max_compute_unit = 12,
.max_thread_per_unit = 7,
.sub_slice_count = 2,
- .max_work_item_sizes = {1024, 1024, 1024},
+ .max_work_item_sizes = {512, 512, 512},
.max_work_group_size = 512,
.max_clock_frequency = 1000,
#include "cl_gen75_device.h"
@@ -121,7 +121,7 @@ static struct _cl_device_id intel_brw_gt2_device = {
.max_compute_unit = 24,
.max_thread_per_unit = 7,
.sub_slice_count = 3,
- .max_work_item_sizes = {1024, 1024, 1024},
+ .max_work_item_sizes = {512, 512, 512},
.max_work_group_size = 512,
.max_clock_frequency = 1000,
#include "cl_gen75_device.h"
@@ -132,7 +132,7 @@ static struct _cl_device_id intel_brw_gt3_device = {
.max_compute_unit = 48,
.max_thread_per_unit = 7,
.sub_slice_count = 6,
- .max_work_item_sizes = {1024, 1024, 1024},
+ .max_work_item_sizes = {512, 512, 512},
.max_work_group_size = 512,
.max_clock_frequency = 1000,
#include "cl_gen75_device.h"
@@ -669,9 +669,9 @@ cl_get_kernel_max_wg_sz(cl_kernel kernel)
if(thread_cnt > 64)
thread_cnt = 64;
work_group_size = thread_cnt * simd_width;
- if(work_group_size > kernel->program->ctx->device->max_work_group_size)
- work_group_size = kernel->program->ctx->device->max_work_group_size;
}
+ if(work_group_size > kernel->program->ctx->device->max_work_group_size)
+ work_group_size = kernel->program->ctx->device->max_work_group_size;
return work_group_size;
}