summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorDenis Steckelmacher <steckdenis@yahoo.fr>2011-07-23 14:16:08 +0200
committerDenis Steckelmacher <steckdenis@yahoo.fr>2011-07-23 14:16:08 +0200
commit2d1d93f7a422acbd80fde59112f51e7126920b2b (patch)
treeb1b09c95ca81a765d9c42c6305413b0bd2c36560 /tests
parent1f8b3fec4066e6b01fe234135cecccc4356c582f (diff)
Performance optimization : calculate global_id ahead of time.
This provides a 1.73 speed boost when running a simple kernel (with two modulos, though). get_global_id is hugely sped up.
Diffstat (limited to 'tests')
-rw-r--r--tests/test_kernel.cpp10
1 files changed, 5 insertions, 5 deletions
diff --git a/tests/test_kernel.cpp b/tests/test_kernel.cpp
index 659e66d..98a2898 100644
--- a/tests/test_kernel.cpp
+++ b/tests/test_kernel.cpp
@@ -17,7 +17,7 @@ static const char source[] =
"__kernel void kernel2(__global int *buf) {\n"
" size_t i = get_global_id(0);\n"
"\n"
- " buf[i] = 2 * i;\n"
+ " buf[i % 256] = 2 * (i % 256);\n"
"}\n";
static void native_kernel(void *args)
@@ -53,7 +53,7 @@ START_TEST (test_compiled_kernel)
const char *src = source;
size_t program_len = sizeof(source);
- int buffer[64];
+ int buffer[256];
result = clGetDeviceIDs(platform, CL_DEVICE_TYPE_DEFAULT, 1, &device, 0);
fail_if(
@@ -125,8 +125,8 @@ START_TEST (test_compiled_kernel)
"cannot set kernel argument"
);
- size_t global_size = sizeof(buffer) / sizeof(buffer[0]);
- size_t local_size = global_size / 2;
+ size_t local_size = sizeof(buffer) / sizeof(buffer[0]);
+ size_t global_size = 100000 * local_size;
cl_event event;
bool ok;
@@ -143,7 +143,7 @@ START_TEST (test_compiled_kernel)
);
ok = true;
- for (int i=0; i<global_size; ++i)
+ for (int i=0; i<local_size; ++i)
{
if (buffer[i] != 2 * i)
{