summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorrander <rander.wang@intel.com>2017-06-23 11:02:27 +0800
committerYang Rong <rong.r.yang@intel.com>2017-07-04 09:55:52 +0800
commit6fde3e082897975bacde1134c4a8096420fa5ab9 (patch)
treeab2937c4a7581acbbf5c01a25e9b3da92d475ba0
parentf0c3b352c3da283aaf4199136894b1da352832f7 (diff)
Runtime: refine max group size for SKL & KBL
Now change max group size to 256. it is a reasonable size for Gen9. According to performance test, 256 make good progress in openCV and no regression. So change it Signed-off-by: rander.wang <rander.wang@intel.com> Reviewed-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r--src/cl_device_id.c18
1 files changed, 9 insertions, 9 deletions
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index b9a60bb1..1960463e 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -149,7 +149,7 @@ static struct _cl_device_id intel_skl_gt1_device = {
.max_thread_per_unit = 7,
.sub_slice_count = 2,
.max_work_item_sizes = {512, 512, 512},
- .max_work_group_size = 512,
+ .max_work_group_size = 256,
.max_clock_frequency = 1000,
#include "cl_gen9_device.h"
};
@@ -159,7 +159,7 @@ static struct _cl_device_id intel_skl_gt2_device = {
.max_thread_per_unit = 7,
.sub_slice_count = 3,
.max_work_item_sizes = {512, 512, 512},
- .max_work_group_size = 512,
+ .max_work_group_size = 256,
.max_clock_frequency = 1000,
#include "cl_gen9_device.h"
};
@@ -169,7 +169,7 @@ static struct _cl_device_id intel_skl_gt3_device = {
.max_thread_per_unit = 7,
.sub_slice_count = 6,
.max_work_item_sizes = {512, 512, 512},
- .max_work_group_size = 512,
+ .max_work_group_size = 256,
.max_clock_frequency = 1000,
#include "cl_gen9_device.h"
};
@@ -179,7 +179,7 @@ static struct _cl_device_id intel_skl_gt4_device = {
.max_thread_per_unit = 7,
.sub_slice_count = 9,
.max_work_item_sizes = {512, 512, 512},
- .max_work_group_size = 512,
+ .max_work_group_size = 256,
.max_clock_frequency = 1000,
#include "cl_gen9_device.h"
};
@@ -209,7 +209,7 @@ static struct _cl_device_id intel_kbl_gt1_device = {
.max_thread_per_unit = 7,
.sub_slice_count = 2,
.max_work_item_sizes = {512, 512, 512},
- .max_work_group_size = 512,
+ .max_work_group_size = 256,
.max_clock_frequency = 1000,
#include "cl_gen9_device.h"
};
@@ -219,7 +219,7 @@ static struct _cl_device_id intel_kbl_gt15_device = {
.max_thread_per_unit = 7,
.sub_slice_count = 3,
.max_work_item_sizes = {512, 512, 512},
- .max_work_group_size = 512,
+ .max_work_group_size = 256,
.max_clock_frequency = 1000,
#include "cl_gen9_device.h"
};
@@ -229,7 +229,7 @@ static struct _cl_device_id intel_kbl_gt2_device = {
.max_thread_per_unit = 7,
.sub_slice_count = 3,
.max_work_item_sizes = {512, 512, 512},
- .max_work_group_size = 512,
+ .max_work_group_size = 256,
.max_clock_frequency = 1000,
#include "cl_gen9_device.h"
};
@@ -239,7 +239,7 @@ static struct _cl_device_id intel_kbl_gt3_device = {
.max_thread_per_unit = 7,
.sub_slice_count = 6,
.max_work_item_sizes = {512, 512, 512},
- .max_work_group_size = 512,
+ .max_work_group_size = 256,
.max_clock_frequency = 1000,
#include "cl_gen9_device.h"
};
@@ -249,7 +249,7 @@ static struct _cl_device_id intel_kbl_gt4_device = {
.max_thread_per_unit = 7,
.sub_slice_count = 9,
.max_work_item_sizes = {512, 512, 512},
- .max_work_group_size = 512,
+ .max_work_group_size = 256,
.max_clock_frequency = 1000,
#include "cl_gen9_device.h"
};