summaryrefslogtreecommitdiff
path: root/opencl
diff options
context:
space:
mode:
Diffstat (limited to 'opencl')
-rw-r--r--opencl/box-blur.cl113
-rw-r--r--opencl/box-blur.cl.h113
2 files changed, 122 insertions, 104 deletions
diff --git a/opencl/box-blur.cl b/opencl/box-blur.cl
index 7ed24759..bb85bcb2 100644
--- a/opencl/box-blur.cl
+++ b/opencl/box-blur.cl
@@ -25,7 +25,6 @@ __kernel void kernel_blur_ver (__global const float4 *aux,
__global float4 *out,
int width, int radius)
{
-
const int out_index = get_global_id(0) * width + get_global_id (1);
int i;
float4 mean;
@@ -33,7 +32,7 @@ __kernel void kernel_blur_ver (__global const float4 *aux,
mean = (float4)(0.0f);
int aux_index = get_global_id(0) * width + get_global_id (1);
- if(get_global_id(1) < width)
+ if (get_global_id(1) < width)
{
for (i=-radius; i <= radius; i++)
{
@@ -46,68 +45,78 @@ __kernel void kernel_blur_ver (__global const float4 *aux,
__kernel
__attribute__((reqd_work_group_size(256,1,1)))
-void kernel_box_blur_fast(const __global float4 *in,
- __global float4 *out,
- __local float4 *column_sum,
- const int width,
- const int height,
- const int radius,
- const int size)
+void kernel_box_blur_fast (__global const float4 *in,
+ __global float4 *out,
+ __local float4 *column_sum,
+ const int width,
+ const int height,
+ const int radius,
+ const int size)
{
- const int local_id0 = get_local_id(0);
- const int twice_radius = 2 * radius;
- const int in_width = twice_radius + width;
- const int in_height = twice_radius + height;
- const float4 area = (float4)( (twice_radius+1) * (twice_radius+1) );
- int column_index_start,column_index_end;
- int y = get_global_id(1) * size;
- const int out_x = get_group_id(0)
- * ( get_local_size(0) - twice_radius ) + local_id0 - radius;
- const int in_x = out_x + radius;
- int tmp_size = size;
- int tmp_index = 0;
- float4 tmp_sum = (float4)0.0f;
- float4 total_sum = (float4)0.0f;
- if( in_x < in_width )
+ const int local_id0 = get_local_id(0);
+ const int twice_radius = 2 * radius;
+ const int in_width = twice_radius + width;
+ const int in_height = twice_radius + height;
+ const float4 area = (float4)( (twice_radius + 1) * (twice_radius + 1) );
+ int column_index_start,column_index_end;
+ int y = get_global_id(1) * size;
+ const int out_x = get_group_id(0)
+ * ( get_local_size(0) - twice_radius ) + local_id0 - radius;
+ const int in_x = out_x + radius;
+ int tmp_size = size;
+ int tmp_index = 0;
+ float4 tmp_sum = (float4)0.0f;
+ float4 total_sum = (float4)0.0f;
+
+ if (in_x < in_width)
{
- column_index_start = y;
- column_index_end = y + twice_radius;
- for( int i=0; i<twice_radius+1; ++i )
- tmp_sum+=in[(y+i)*in_width+in_x];
- column_sum[local_id0] = tmp_sum;
+ column_index_start = y;
+ column_index_end = y + twice_radius;
+
+ for (int i = 0; i < twice_radius + 1; ++i)
+ {
+ tmp_sum += in[(y + i) * in_width + in_x];
+ }
+
+ column_sum[local_id0] = tmp_sum;
}
- barrier( CLK_LOCAL_MEM_FENCE );
+ barrier(CLK_LOCAL_MEM_FENCE);
- while(1)
+ while (1)
{
- if( out_x < width )
+ if (out_x < width)
{
- if( local_id0 >= radius
- && local_id0 < get_local_size(0) - radius )
+ if (local_id0 >= radius && local_id0 < get_local_size(0) - radius)
{
total_sum = (float4)0.0f;
- for( int i=0; i<twice_radius+1; ++i )
- total_sum += column_sum[local_id0-radius+i];
- out[y*width+out_x] = total_sum/area;
+
+ for (int i = 0; i < twice_radius + 1; ++i)
+ {
+ total_sum += column_sum[local_id0 - radius + i];
+ }
+
+ out[y * width + out_x] = total_sum / area;
}
}
- if( --tmp_size ==0 || y == height - 1 )
- break;
- barrier( CLK_LOCAL_MEM_FENCE );
+ if (--tmp_size == 0 || y == height - 1)
+ break;
- ++y;
- if( in_x < in_width )
- {
- tmp_sum = column_sum[local_id0];
- tmp_sum -= in[(column_index_start)*in_width+in_x];
- tmp_sum += in[(column_index_end+1)*in_width+in_x];
- ++column_index_start;
- ++column_index_end;
- column_sum[local_id0] = tmp_sum;
- }
+ barrier(CLK_LOCAL_MEM_FENCE);
- barrier( CLK_LOCAL_MEM_FENCE );
- }
+ ++y;
+
+ if (in_x < in_width)
+ {
+ tmp_sum = column_sum[local_id0];
+ tmp_sum -= in[(column_index_start) * in_width + in_x];
+ tmp_sum += in[(column_index_end + 1) * in_width + in_x];
+ ++column_index_start;
+ ++column_index_end;
+ column_sum[local_id0] = tmp_sum;
+ }
+
+ barrier(CLK_LOCAL_MEM_FENCE);
+ }
}
diff --git a/opencl/box-blur.cl.h b/opencl/box-blur.cl.h
index e4585eca..62f98bd2 100644
--- a/opencl/box-blur.cl.h
+++ b/opencl/box-blur.cl.h
@@ -26,7 +26,6 @@ static const char* box_blur_cl_source =
" __global float4 *out, \n"
" int width, int radius) \n"
"{ \n"
-" \n"
" const int out_index = get_global_id(0) * width + get_global_id (1); \n"
" int i; \n"
" float4 mean; \n"
@@ -34,7 +33,7 @@ static const char* box_blur_cl_source =
" mean = (float4)(0.0f); \n"
" int aux_index = get_global_id(0) * width + get_global_id (1); \n"
" \n"
-" if(get_global_id(1) < width) \n"
+" if (get_global_id(1) < width) \n"
" { \n"
" for (i=-radius; i <= radius; i++) \n"
" { \n"
@@ -47,69 +46,79 @@ static const char* box_blur_cl_source =
" \n"
"__kernel \n"
"__attribute__((reqd_work_group_size(256,1,1))) \n"
-"void kernel_box_blur_fast(const __global float4 *in, \n"
-" __global float4 *out, \n"
-" __local float4 *column_sum, \n"
-" const int width, \n"
-" const int height, \n"
-" const int radius, \n"
-" const int size) \n"
+"void kernel_box_blur_fast (__global const float4 *in, \n"
+" __global float4 *out, \n"
+" __local float4 *column_sum, \n"
+" const int width, \n"
+" const int height, \n"
+" const int radius, \n"
+" const int size) \n"
"{ \n"
-" const int local_id0 = get_local_id(0); \n"
-" const int twice_radius = 2 * radius; \n"
-" const int in_width = twice_radius + width; \n"
-" const int in_height = twice_radius + height; \n"
-" const float4 area = (float4)( (twice_radius+1) * (twice_radius+1) ); \n"
-" int column_index_start,column_index_end; \n"
-" int y = get_global_id(1) * size; \n"
-" const int out_x = get_group_id(0) \n"
-" * ( get_local_size(0) - twice_radius ) + local_id0 - radius; \n"
-" const int in_x = out_x + radius; \n"
-" int tmp_size = size; \n"
-" int tmp_index = 0; \n"
-" float4 tmp_sum = (float4)0.0f; \n"
-" float4 total_sum = (float4)0.0f; \n"
-" if( in_x < in_width ) \n"
+" const int local_id0 = get_local_id(0); \n"
+" const int twice_radius = 2 * radius; \n"
+" const int in_width = twice_radius + width; \n"
+" const int in_height = twice_radius + height; \n"
+" const float4 area = (float4)( (twice_radius + 1) * (twice_radius + 1) ); \n"
+" int column_index_start,column_index_end; \n"
+" int y = get_global_id(1) * size; \n"
+" const int out_x = get_group_id(0) \n"
+" * ( get_local_size(0) - twice_radius ) + local_id0 - radius;\n"
+" const int in_x = out_x + radius; \n"
+" int tmp_size = size; \n"
+" int tmp_index = 0; \n"
+" float4 tmp_sum = (float4)0.0f; \n"
+" float4 total_sum = (float4)0.0f; \n"
+" \n"
+" if (in_x < in_width) \n"
" { \n"
-" column_index_start = y; \n"
-" column_index_end = y + twice_radius; \n"
-" for( int i=0; i<twice_radius+1; ++i ) \n"
-" tmp_sum+=in[(y+i)*in_width+in_x]; \n"
-" column_sum[local_id0] = tmp_sum; \n"
+" column_index_start = y; \n"
+" column_index_end = y + twice_radius; \n"
+" \n"
+" for (int i = 0; i < twice_radius + 1; ++i) \n"
+" { \n"
+" tmp_sum += in[(y + i) * in_width + in_x]; \n"
+" } \n"
+" \n"
+" column_sum[local_id0] = tmp_sum; \n"
" } \n"
" \n"
-" barrier( CLK_LOCAL_MEM_FENCE ); \n"
+" barrier(CLK_LOCAL_MEM_FENCE); \n"
" \n"
-" while(1) \n"
+" while (1) \n"
" { \n"
-" if( out_x < width ) \n"
+" if (out_x < width) \n"
" { \n"
-" if( local_id0 >= radius \n"
-" && local_id0 < get_local_size(0) - radius ) \n"
+" if (local_id0 >= radius && local_id0 < get_local_size(0) - radius) \n"
" { \n"
" total_sum = (float4)0.0f; \n"
-" for( int i=0; i<twice_radius+1; ++i ) \n"
-" total_sum += column_sum[local_id0-radius+i]; \n"
-" out[y*width+out_x] = total_sum/area; \n"
+" \n"
+" for (int i = 0; i < twice_radius + 1; ++i) \n"
+" { \n"
+" total_sum += column_sum[local_id0 - radius + i]; \n"
+" } \n"
+" \n"
+" out[y * width + out_x] = total_sum / area; \n"
" } \n"
" } \n"
-" if( --tmp_size ==0 || y == height - 1 ) \n"
-" break; \n"
" \n"
-" barrier( CLK_LOCAL_MEM_FENCE ); \n"
+" if (--tmp_size == 0 || y == height - 1) \n"
+" break; \n"
" \n"
-" ++y; \n"
-" if( in_x < in_width ) \n"
-" { \n"
-" tmp_sum = column_sum[local_id0]; \n"
-" tmp_sum -= in[(column_index_start)*in_width+in_x]; \n"
-" tmp_sum += in[(column_index_end+1)*in_width+in_x]; \n"
-" ++column_index_start; \n"
-" ++column_index_end; \n"
-" column_sum[local_id0] = tmp_sum; \n"
-" } \n"
+" barrier(CLK_LOCAL_MEM_FENCE); \n"
" \n"
-" barrier( CLK_LOCAL_MEM_FENCE ); \n"
-" } \n"
+" ++y; \n"
+" \n"
+" if (in_x < in_width) \n"
+" { \n"
+" tmp_sum = column_sum[local_id0]; \n"
+" tmp_sum -= in[(column_index_start) * in_width + in_x]; \n"
+" tmp_sum += in[(column_index_end + 1) * in_width + in_x]; \n"
+" ++column_index_start; \n"
+" ++column_index_end; \n"
+" column_sum[local_id0] = tmp_sum; \n"
+" } \n"
+" \n"
+" barrier(CLK_LOCAL_MEM_FENCE); \n"
+" } \n"
"} \n"
;