summaryrefslogtreecommitdiff
path: root/opencl
diff options
context:
space:
mode:
authorDaniel Sabo <DanielSabo@gmail.com>2013-10-17 00:05:15 -0700
committerDaniel Sabo <DanielSabo@gmail.com>2013-10-17 00:06:25 -0700
commit2b4ecb8bda6f1dab9c819e5715f7df9af30b7c40 (patch)
treea7549c589531ba5fd63a47db1a6ef9af3e08656f /opencl
parentfa1e24b588f5df7514faa6efd0521c514c840407 (diff)
edge:laplace: Fix offsets & stride in OpenCL version
Diffstat (limited to 'opencl')
-rw-r--r--opencl/edge-laplace.cl158
-rw-r--r--opencl/edge-laplace.cl.h161
2 files changed, 149 insertions, 170 deletions
diff --git a/opencl/edge-laplace.cl b/opencl/edge-laplace.cl
index 80d37672..f154e129 100644
--- a/opencl/edge-laplace.cl
+++ b/opencl/edge-laplace.cl
@@ -40,54 +40,51 @@ void minmax(float x1, float x2, float x3,
*max_result = fmax(max2, x5);
}
+float4 get_pix(global float4 *in, int x, int y, int rowstride)
+{
+ int idx = x + y * rowstride;
+ return in[idx];
+}
+
kernel void pre_edgelaplace (global float4 *in,
global float4 *out)
{
int gidx = get_global_id(0);
int gidy = get_global_id(1);
- int src_width = get_global_size(0) + LAPLACE_RADIUS;
- int src_height = get_global_size(1);
-
- int i = gidx + LAPLACE_RADIUS - 1, j = gidy + LAPLACE_RADIUS - 1;
- int gid1d = i + j * src_width;
-
- float pix_fl[4] = {
- in[gid1d - 1 - src_width].x, in[gid1d - 1 - src_width].y,
- in[gid1d - 1 - src_width].z, in[gid1d - 1 - src_width].w
- };
- float pix_fm[4] = {
- in[gid1d - src_width].x, in[gid1d - src_width].y,
- in[gid1d - src_width].z, in[gid1d - src_width].w
- };
- float pix_fr[4] = {
- in[gid1d + 1 - src_width].x, in[gid1d + 1 - src_width].y,
- in[gid1d + 1 - src_width].z, in[gid1d + 1 - src_width].w
- };
- float pix_ml[4] = {
- in[gid1d - 1 ].x, in[gid1d - 1 ].y,
- in[gid1d - 1 ].z, in[gid1d - 1 ].w
- };
- float pix_mm[4] = {
- in[gid1d ].x, in[gid1d ].y,
- in[gid1d ].z, in[gid1d ].w
- };
- float pix_mr[4] = {
- in[gid1d + 1 ].x, in[gid1d + 1 ].y,
- in[gid1d + 1 ].z, in[gid1d + 1 ].w
- };
- float pix_bl[4] = {
- in[gid1d - 1 + src_width].x, in[gid1d - 1 + src_width].y,
- in[gid1d - 1 + src_width].z, in[gid1d - 1 + src_width].w
- };
- float pix_bm[4] = {
- in[gid1d + src_width].x, in[gid1d + src_width].y,
- in[gid1d + src_width].z, in[gid1d + src_width].w
- };
- float pix_br[4] = {
- in[gid1d + 1 + src_width].x, in[gid1d + 1 + src_width].y,
- in[gid1d + 1 + src_width].z, in[gid1d + 1 + src_width].w
- };
+ int src_width = get_global_size(0) + 2;
+ int src_height = get_global_size(1) + 2;
+
+ int i = gidx + 1, j = gidy + 1;
+
+ float4 cur_pix;
+
+ cur_pix = get_pix(in, i - 1, j - 1, src_width);
+ float pix_fl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+ cur_pix = get_pix(in, i - 0, j - 1, src_width);
+ float pix_fm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+ cur_pix = get_pix(in, i + 1, j - 1, src_width);
+ float pix_fr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+ cur_pix = get_pix(in, i - 1, j - 0, src_width);
+ float pix_ml[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+ cur_pix = get_pix(in, i - 0, j - 0, src_width);
+ float pix_mm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+ cur_pix = get_pix(in, i + 1, j - 0, src_width);
+ float pix_mr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+ cur_pix = get_pix(in, i - 1, j + 1, src_width);
+ float pix_bl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+ cur_pix = get_pix(in, i - 0, j + 1, src_width);
+ float pix_bm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+ cur_pix = get_pix(in, i + 1, j + 1, src_width);
+ float pix_br[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
int c;
float minval, maxval;
@@ -107,7 +104,7 @@ kernel void pre_edgelaplace (global float4 *in,
}
gradient[3] = pix_mm[3];
- out[gid1d] = (float4)
+ out[gidx + gidy * get_global_size(0)] = (float4)
(gradient[0], gradient[1], gradient[2], gradient[3]);
}
@@ -117,48 +114,39 @@ kernel void knl_edgelaplace (global float4 *in,
int gidx = get_global_id(0);
int gidy = get_global_id(1);
- int src_width = get_global_size(0) + LAPLACE_RADIUS * 2;
- int src_height = get_global_size(1);
-
- int i = gidx + LAPLACE_RADIUS, j = gidy + LAPLACE_RADIUS;
- int gid1d = i + j * src_width;
-
- float pix_fl[4] = {
- in[gid1d - 1 - src_width].x, in[gid1d - 1 - src_width].y,
- in[gid1d - 1 - src_width].z, in[gid1d - 1 - src_width].w
- };
- float pix_fm[4] = {
- in[gid1d - src_width].x, in[gid1d - src_width].y,
- in[gid1d - src_width].z, in[gid1d - src_width].w
- };
- float pix_fr[4] = {
- in[gid1d + 1 - src_width].x, in[gid1d + 1 - src_width].y,
- in[gid1d + 1 - src_width].z, in[gid1d + 1 - src_width].w
- };
- float pix_ml[4] = {
- in[gid1d - 1 ].x, in[gid1d - 1 ].y,
- in[gid1d - 1 ].z, in[gid1d - 1 ].w
- };
- float pix_mm[4] = {
- in[gid1d ].x, in[gid1d ].y,
- in[gid1d ].z, in[gid1d ].w
- };
- float pix_mr[4] = {
- in[gid1d + 1 ].x, in[gid1d + 1 ].y,
- in[gid1d + 1 ].z, in[gid1d + 1 ].w
- };
- float pix_bl[4] = {
- in[gid1d - 1 + src_width].x, in[gid1d - 1 + src_width].y,
- in[gid1d - 1 + src_width].z, in[gid1d - 1 + src_width].w
- };
- float pix_bm[4] = {
- in[gid1d + src_width].x, in[gid1d + src_width].y,
- in[gid1d + src_width].z, in[gid1d + src_width].w
- };
- float pix_br[4] = {
- in[gid1d + 1 + src_width].x, in[gid1d + 1 + src_width].y,
- in[gid1d + 1 + src_width].z, in[gid1d + 1 + src_width].w
- };
+ int src_width = get_global_size(0) + 2;
+ int src_height = get_global_size(1) + 2;
+
+ int i = gidx + 1, j = gidy + 1;
+
+ float4 cur_pix;
+
+ cur_pix = get_pix(in, i - 1, j - 1, src_width);
+ float pix_fl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+ cur_pix = get_pix(in, i - 0, j - 1, src_width);
+ float pix_fm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+ cur_pix = get_pix(in, i + 1, j - 1, src_width);
+ float pix_fr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+ cur_pix = get_pix(in, i - 1, j - 0, src_width);
+ float pix_ml[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+ cur_pix = get_pix(in, i - 0, j - 0, src_width);
+ float pix_mm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+ cur_pix = get_pix(in, i + 1, j - 0, src_width);
+ float pix_mr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+ cur_pix = get_pix(in, i - 1, j + 1, src_width);
+ float pix_bl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+ cur_pix = get_pix(in, i - 0, j + 1, src_width);
+ float pix_bm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+ cur_pix = get_pix(in, i + 1, j + 1, src_width);
+ float pix_br[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
int c;
float value[4];
diff --git a/opencl/edge-laplace.cl.h b/opencl/edge-laplace.cl.h
index f0f7a0cc..5aa630ae 100644
--- a/opencl/edge-laplace.cl.h
+++ b/opencl/edge-laplace.cl.h
@@ -41,54 +41,52 @@ static const char* edge_laplace_cl_source =
" *max_result = fmax(max2, x5); \n"
"} \n"
" \n"
+"float4 get_pix(global float4 *in, int x, int y, int rowstride) \n"
+"{ \n"
+" int idx = x + y * rowstride; \n"
+" return in[idx]; \n"
+"} \n"
+" \n"
"kernel void pre_edgelaplace (global float4 *in, \n"
" global float4 *out) \n"
"{ \n"
" int gidx = get_global_id(0); \n"
" int gidy = get_global_id(1); \n"
" \n"
-" int src_width = get_global_size(0) + LAPLACE_RADIUS; \n"
-" int src_height = get_global_size(1); \n"
-" \n"
-" int i = gidx + LAPLACE_RADIUS - 1, j = gidy + LAPLACE_RADIUS - 1; \n"
-" int gid1d = i + j * src_width; \n"
-" \n"
-" float pix_fl[4] = { \n"
-" in[gid1d - 1 - src_width].x, in[gid1d - 1 - src_width].y, \n"
-" in[gid1d - 1 - src_width].z, in[gid1d - 1 - src_width].w \n"
-" }; \n"
-" float pix_fm[4] = { \n"
-" in[gid1d - src_width].x, in[gid1d - src_width].y, \n"
-" in[gid1d - src_width].z, in[gid1d - src_width].w \n"
-" }; \n"
-" float pix_fr[4] = { \n"
-" in[gid1d + 1 - src_width].x, in[gid1d + 1 - src_width].y, \n"
-" in[gid1d + 1 - src_width].z, in[gid1d + 1 - src_width].w \n"
-" }; \n"
-" float pix_ml[4] = { \n"
-" in[gid1d - 1 ].x, in[gid1d - 1 ].y, \n"
-" in[gid1d - 1 ].z, in[gid1d - 1 ].w \n"
-" }; \n"
-" float pix_mm[4] = { \n"
-" in[gid1d ].x, in[gid1d ].y, \n"
-" in[gid1d ].z, in[gid1d ].w \n"
-" }; \n"
-" float pix_mr[4] = { \n"
-" in[gid1d + 1 ].x, in[gid1d + 1 ].y, \n"
-" in[gid1d + 1 ].z, in[gid1d + 1 ].w \n"
-" }; \n"
-" float pix_bl[4] = { \n"
-" in[gid1d - 1 + src_width].x, in[gid1d - 1 + src_width].y, \n"
-" in[gid1d - 1 + src_width].z, in[gid1d - 1 + src_width].w \n"
-" }; \n"
-" float pix_bm[4] = { \n"
-" in[gid1d + src_width].x, in[gid1d + src_width].y, \n"
-" in[gid1d + src_width].z, in[gid1d + src_width].w \n"
-" }; \n"
-" float pix_br[4] = { \n"
-" in[gid1d + 1 + src_width].x, in[gid1d + 1 + src_width].y, \n"
-" in[gid1d + 1 + src_width].z, in[gid1d + 1 + src_width].w \n"
-" }; \n"
+" int src_width = get_global_size(0) + 2; \n"
+" int src_height = get_global_size(1) + 2; \n"
+" \n"
+" int i = gidx + 1, j = gidy + 1; \n"
+" \n"
+" float4 cur_pix; \n"
+" \n"
+" cur_pix = get_pix(in, i - 1, j - 1, src_width); \n"
+" float pix_fl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
+" \n"
+" cur_pix = get_pix(in, i - 0, j - 1, src_width); \n"
+" float pix_fm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
+" \n"
+" cur_pix = get_pix(in, i + 1, j - 1, src_width); \n"
+" float pix_fr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
+" \n"
+" cur_pix = get_pix(in, i - 1, j - 0, src_width); \n"
+" float pix_ml[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
+" \n"
+" cur_pix = get_pix(in, i - 0, j - 0, src_width); \n"
+" float pix_mm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
+" \n"
+" cur_pix = get_pix(in, i + 1, j - 0, src_width); \n"
+" float pix_mr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
+" \n"
+" cur_pix = get_pix(in, i - 1, j + 1, src_width); \n"
+" float pix_bl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
+" \n"
+" cur_pix = get_pix(in, i - 0, j + 1, src_width); \n"
+" float pix_bm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
+" \n"
+" cur_pix = get_pix(in, i + 1, j + 1, src_width); \n"
+" float pix_br[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
+" \n"
" \n"
" int c; \n"
" float minval, maxval; \n"
@@ -108,8 +106,10 @@ static const char* edge_laplace_cl_source =
" } \n"
" gradient[3] = pix_mm[3]; \n"
" \n"
-" out[gid1d] = (float4) \n"
+" \n"
+" out[gidx + gidy * get_global_size(0)] = (float4) \n"
" (gradient[0], gradient[1], gradient[2], gradient[3]); \n"
+" \n"
"} \n"
" \n"
"kernel void knl_edgelaplace (global float4 *in, \n"
@@ -118,48 +118,39 @@ static const char* edge_laplace_cl_source =
" int gidx = get_global_id(0); \n"
" int gidy = get_global_id(1); \n"
" \n"
-" int src_width = get_global_size(0) + LAPLACE_RADIUS * 2; \n"
-" int src_height = get_global_size(1); \n"
-" \n"
-" int i = gidx + LAPLACE_RADIUS, j = gidy + LAPLACE_RADIUS; \n"
-" int gid1d = i + j * src_width; \n"
-" \n"
-" float pix_fl[4] = { \n"
-" in[gid1d - 1 - src_width].x, in[gid1d - 1 - src_width].y, \n"
-" in[gid1d - 1 - src_width].z, in[gid1d - 1 - src_width].w \n"
-" }; \n"
-" float pix_fm[4] = { \n"
-" in[gid1d - src_width].x, in[gid1d - src_width].y, \n"
-" in[gid1d - src_width].z, in[gid1d - src_width].w \n"
-" }; \n"
-" float pix_fr[4] = { \n"
-" in[gid1d + 1 - src_width].x, in[gid1d + 1 - src_width].y, \n"
-" in[gid1d + 1 - src_width].z, in[gid1d + 1 - src_width].w \n"
-" }; \n"
-" float pix_ml[4] = { \n"
-" in[gid1d - 1 ].x, in[gid1d - 1 ].y, \n"
-" in[gid1d - 1 ].z, in[gid1d - 1 ].w \n"
-" }; \n"
-" float pix_mm[4] = { \n"
-" in[gid1d ].x, in[gid1d ].y, \n"
-" in[gid1d ].z, in[gid1d ].w \n"
-" }; \n"
-" float pix_mr[4] = { \n"
-" in[gid1d + 1 ].x, in[gid1d + 1 ].y, \n"
-" in[gid1d + 1 ].z, in[gid1d + 1 ].w \n"
-" }; \n"
-" float pix_bl[4] = { \n"
-" in[gid1d - 1 + src_width].x, in[gid1d - 1 + src_width].y, \n"
-" in[gid1d - 1 + src_width].z, in[gid1d - 1 + src_width].w \n"
-" }; \n"
-" float pix_bm[4] = { \n"
-" in[gid1d + src_width].x, in[gid1d + src_width].y, \n"
-" in[gid1d + src_width].z, in[gid1d + src_width].w \n"
-" }; \n"
-" float pix_br[4] = { \n"
-" in[gid1d + 1 + src_width].x, in[gid1d + 1 + src_width].y, \n"
-" in[gid1d + 1 + src_width].z, in[gid1d + 1 + src_width].w \n"
-" }; \n"
+" int src_width = get_global_size(0) + 2; \n"
+" int src_height = get_global_size(1) + 2; \n"
+" \n"
+" int i = gidx + 1, j = gidy + 1; \n"
+" \n"
+" float4 cur_pix; \n"
+" \n"
+" cur_pix = get_pix(in, i - 1, j - 1, src_width); \n"
+" float pix_fl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
+" \n"
+" cur_pix = get_pix(in, i - 0, j - 1, src_width); \n"
+" float pix_fm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
+" \n"
+" cur_pix = get_pix(in, i + 1, j - 1, src_width); \n"
+" float pix_fr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
+" \n"
+" cur_pix = get_pix(in, i - 1, j - 0, src_width); \n"
+" float pix_ml[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
+" \n"
+" cur_pix = get_pix(in, i - 0, j - 0, src_width); \n"
+" float pix_mm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
+" \n"
+" cur_pix = get_pix(in, i + 1, j - 0, src_width); \n"
+" float pix_mr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
+" \n"
+" cur_pix = get_pix(in, i - 1, j + 1, src_width); \n"
+" float pix_bl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
+" \n"
+" cur_pix = get_pix(in, i - 0, j + 1, src_width); \n"
+" float pix_bm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
+" \n"
+" cur_pix = get_pix(in, i + 1, j + 1, src_width); \n"
+" float pix_br[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
" \n"
" int c; \n"
" float value[4]; \n"