diff options
author | Daniel Sabo <DanielSabo@gmail.com> | 2013-10-17 00:05:15 -0700 |
---|---|---|
committer | Daniel Sabo <DanielSabo@gmail.com> | 2013-10-17 00:06:25 -0700 |
commit | 2b4ecb8bda6f1dab9c819e5715f7df9af30b7c40 (patch) | |
tree | a7549c589531ba5fd63a47db1a6ef9af3e08656f /opencl | |
parent | fa1e24b588f5df7514faa6efd0521c514c840407 (diff) |
edge:laplace: Fix offsets & stride in OpenCL version
Diffstat (limited to 'opencl')
-rw-r--r-- | opencl/edge-laplace.cl | 158 | ||||
-rw-r--r-- | opencl/edge-laplace.cl.h | 161 |
2 files changed, 149 insertions, 170 deletions
diff --git a/opencl/edge-laplace.cl b/opencl/edge-laplace.cl index 80d37672..f154e129 100644 --- a/opencl/edge-laplace.cl +++ b/opencl/edge-laplace.cl @@ -40,54 +40,51 @@ void minmax(float x1, float x2, float x3, *max_result = fmax(max2, x5); } +float4 get_pix(global float4 *in, int x, int y, int rowstride) +{ + int idx = x + y * rowstride; + return in[idx]; +} + kernel void pre_edgelaplace (global float4 *in, global float4 *out) { int gidx = get_global_id(0); int gidy = get_global_id(1); - int src_width = get_global_size(0) + LAPLACE_RADIUS; - int src_height = get_global_size(1); - - int i = gidx + LAPLACE_RADIUS - 1, j = gidy + LAPLACE_RADIUS - 1; - int gid1d = i + j * src_width; - - float pix_fl[4] = { - in[gid1d - 1 - src_width].x, in[gid1d - 1 - src_width].y, - in[gid1d - 1 - src_width].z, in[gid1d - 1 - src_width].w - }; - float pix_fm[4] = { - in[gid1d - src_width].x, in[gid1d - src_width].y, - in[gid1d - src_width].z, in[gid1d - src_width].w - }; - float pix_fr[4] = { - in[gid1d + 1 - src_width].x, in[gid1d + 1 - src_width].y, - in[gid1d + 1 - src_width].z, in[gid1d + 1 - src_width].w - }; - float pix_ml[4] = { - in[gid1d - 1 ].x, in[gid1d - 1 ].y, - in[gid1d - 1 ].z, in[gid1d - 1 ].w - }; - float pix_mm[4] = { - in[gid1d ].x, in[gid1d ].y, - in[gid1d ].z, in[gid1d ].w - }; - float pix_mr[4] = { - in[gid1d + 1 ].x, in[gid1d + 1 ].y, - in[gid1d + 1 ].z, in[gid1d + 1 ].w - }; - float pix_bl[4] = { - in[gid1d - 1 + src_width].x, in[gid1d - 1 + src_width].y, - in[gid1d - 1 + src_width].z, in[gid1d - 1 + src_width].w - }; - float pix_bm[4] = { - in[gid1d + src_width].x, in[gid1d + src_width].y, - in[gid1d + src_width].z, in[gid1d + src_width].w - }; - float pix_br[4] = { - in[gid1d + 1 + src_width].x, in[gid1d + 1 + src_width].y, - in[gid1d + 1 + src_width].z, in[gid1d + 1 + src_width].w - }; + int src_width = get_global_size(0) + 2; + int src_height = get_global_size(1) + 2; + + int i = gidx + 1, j = gidy + 1; + + float4 cur_pix; + + cur_pix = get_pix(in, i - 1, j - 1, src_width); + float pix_fl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; + + cur_pix = get_pix(in, i - 0, j - 1, src_width); + float pix_fm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; + + cur_pix = get_pix(in, i + 1, j - 1, src_width); + float pix_fr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; + + cur_pix = get_pix(in, i - 1, j - 0, src_width); + float pix_ml[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; + + cur_pix = get_pix(in, i - 0, j - 0, src_width); + float pix_mm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; + + cur_pix = get_pix(in, i + 1, j - 0, src_width); + float pix_mr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; + + cur_pix = get_pix(in, i - 1, j + 1, src_width); + float pix_bl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; + + cur_pix = get_pix(in, i - 0, j + 1, src_width); + float pix_bm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; + + cur_pix = get_pix(in, i + 1, j + 1, src_width); + float pix_br[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; int c; float minval, maxval; @@ -107,7 +104,7 @@ kernel void pre_edgelaplace (global float4 *in, } gradient[3] = pix_mm[3]; - out[gid1d] = (float4) + out[gidx + gidy * get_global_size(0)] = (float4) (gradient[0], gradient[1], gradient[2], gradient[3]); } @@ -117,48 +114,39 @@ kernel void knl_edgelaplace (global float4 *in, int gidx = get_global_id(0); int gidy = get_global_id(1); - int src_width = get_global_size(0) + LAPLACE_RADIUS * 2; - int src_height = get_global_size(1); - - int i = gidx + LAPLACE_RADIUS, j = gidy + LAPLACE_RADIUS; - int gid1d = i + j * src_width; - - float pix_fl[4] = { - in[gid1d - 1 - src_width].x, in[gid1d - 1 - src_width].y, - in[gid1d - 1 - src_width].z, in[gid1d - 1 - src_width].w - }; - float pix_fm[4] = { - in[gid1d - src_width].x, in[gid1d - src_width].y, - in[gid1d - src_width].z, in[gid1d - src_width].w - }; - float pix_fr[4] = { - in[gid1d + 1 - src_width].x, in[gid1d + 1 - src_width].y, - in[gid1d + 1 - src_width].z, in[gid1d + 1 - src_width].w - }; - float pix_ml[4] = { - in[gid1d - 1 ].x, in[gid1d - 1 ].y, - in[gid1d - 1 ].z, in[gid1d - 1 ].w - }; - float pix_mm[4] = { - in[gid1d ].x, in[gid1d ].y, - in[gid1d ].z, in[gid1d ].w - }; - float pix_mr[4] = { - in[gid1d + 1 ].x, in[gid1d + 1 ].y, - in[gid1d + 1 ].z, in[gid1d + 1 ].w - }; - float pix_bl[4] = { - in[gid1d - 1 + src_width].x, in[gid1d - 1 + src_width].y, - in[gid1d - 1 + src_width].z, in[gid1d - 1 + src_width].w - }; - float pix_bm[4] = { - in[gid1d + src_width].x, in[gid1d + src_width].y, - in[gid1d + src_width].z, in[gid1d + src_width].w - }; - float pix_br[4] = { - in[gid1d + 1 + src_width].x, in[gid1d + 1 + src_width].y, - in[gid1d + 1 + src_width].z, in[gid1d + 1 + src_width].w - }; + int src_width = get_global_size(0) + 2; + int src_height = get_global_size(1) + 2; + + int i = gidx + 1, j = gidy + 1; + + float4 cur_pix; + + cur_pix = get_pix(in, i - 1, j - 1, src_width); + float pix_fl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; + + cur_pix = get_pix(in, i - 0, j - 1, src_width); + float pix_fm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; + + cur_pix = get_pix(in, i + 1, j - 1, src_width); + float pix_fr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; + + cur_pix = get_pix(in, i - 1, j - 0, src_width); + float pix_ml[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; + + cur_pix = get_pix(in, i - 0, j - 0, src_width); + float pix_mm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; + + cur_pix = get_pix(in, i + 1, j - 0, src_width); + float pix_mr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; + + cur_pix = get_pix(in, i - 1, j + 1, src_width); + float pix_bl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; + + cur_pix = get_pix(in, i - 0, j + 1, src_width); + float pix_bm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; + + cur_pix = get_pix(in, i + 1, j + 1, src_width); + float pix_br[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; int c; float value[4]; diff --git a/opencl/edge-laplace.cl.h b/opencl/edge-laplace.cl.h index f0f7a0cc..5aa630ae 100644 --- a/opencl/edge-laplace.cl.h +++ b/opencl/edge-laplace.cl.h @@ -41,54 +41,52 @@ static const char* edge_laplace_cl_source = " *max_result = fmax(max2, x5); \n" "} \n" " \n" +"float4 get_pix(global float4 *in, int x, int y, int rowstride) \n" +"{ \n" +" int idx = x + y * rowstride; \n" +" return in[idx]; \n" +"} \n" +" \n" "kernel void pre_edgelaplace (global float4 *in, \n" " global float4 *out) \n" "{ \n" " int gidx = get_global_id(0); \n" " int gidy = get_global_id(1); \n" " \n" -" int src_width = get_global_size(0) + LAPLACE_RADIUS; \n" -" int src_height = get_global_size(1); \n" -" \n" -" int i = gidx + LAPLACE_RADIUS - 1, j = gidy + LAPLACE_RADIUS - 1; \n" -" int gid1d = i + j * src_width; \n" -" \n" -" float pix_fl[4] = { \n" -" in[gid1d - 1 - src_width].x, in[gid1d - 1 - src_width].y, \n" -" in[gid1d - 1 - src_width].z, in[gid1d - 1 - src_width].w \n" -" }; \n" -" float pix_fm[4] = { \n" -" in[gid1d - src_width].x, in[gid1d - src_width].y, \n" -" in[gid1d - src_width].z, in[gid1d - src_width].w \n" -" }; \n" -" float pix_fr[4] = { \n" -" in[gid1d + 1 - src_width].x, in[gid1d + 1 - src_width].y, \n" -" in[gid1d + 1 - src_width].z, in[gid1d + 1 - src_width].w \n" -" }; \n" -" float pix_ml[4] = { \n" -" in[gid1d - 1 ].x, in[gid1d - 1 ].y, \n" -" in[gid1d - 1 ].z, in[gid1d - 1 ].w \n" -" }; \n" -" float pix_mm[4] = { \n" -" in[gid1d ].x, in[gid1d ].y, \n" -" in[gid1d ].z, in[gid1d ].w \n" -" }; \n" -" float pix_mr[4] = { \n" -" in[gid1d + 1 ].x, in[gid1d + 1 ].y, \n" -" in[gid1d + 1 ].z, in[gid1d + 1 ].w \n" -" }; \n" -" float pix_bl[4] = { \n" -" in[gid1d - 1 + src_width].x, in[gid1d - 1 + src_width].y, \n" -" in[gid1d - 1 + src_width].z, in[gid1d - 1 + src_width].w \n" -" }; \n" -" float pix_bm[4] = { \n" -" in[gid1d + src_width].x, in[gid1d + src_width].y, \n" -" in[gid1d + src_width].z, in[gid1d + src_width].w \n" -" }; \n" -" float pix_br[4] = { \n" -" in[gid1d + 1 + src_width].x, in[gid1d + 1 + src_width].y, \n" -" in[gid1d + 1 + src_width].z, in[gid1d + 1 + src_width].w \n" -" }; \n" +" int src_width = get_global_size(0) + 2; \n" +" int src_height = get_global_size(1) + 2; \n" +" \n" +" int i = gidx + 1, j = gidy + 1; \n" +" \n" +" float4 cur_pix; \n" +" \n" +" cur_pix = get_pix(in, i - 1, j - 1, src_width); \n" +" float pix_fl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" +" \n" +" cur_pix = get_pix(in, i - 0, j - 1, src_width); \n" +" float pix_fm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" +" \n" +" cur_pix = get_pix(in, i + 1, j - 1, src_width); \n" +" float pix_fr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" +" \n" +" cur_pix = get_pix(in, i - 1, j - 0, src_width); \n" +" float pix_ml[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" +" \n" +" cur_pix = get_pix(in, i - 0, j - 0, src_width); \n" +" float pix_mm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" +" \n" +" cur_pix = get_pix(in, i + 1, j - 0, src_width); \n" +" float pix_mr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" +" \n" +" cur_pix = get_pix(in, i - 1, j + 1, src_width); \n" +" float pix_bl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" +" \n" +" cur_pix = get_pix(in, i - 0, j + 1, src_width); \n" +" float pix_bm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" +" \n" +" cur_pix = get_pix(in, i + 1, j + 1, src_width); \n" +" float pix_br[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" +" \n" " \n" " int c; \n" " float minval, maxval; \n" @@ -108,8 +106,10 @@ static const char* edge_laplace_cl_source = " } \n" " gradient[3] = pix_mm[3]; \n" " \n" -" out[gid1d] = (float4) \n" +" \n" +" out[gidx + gidy * get_global_size(0)] = (float4) \n" " (gradient[0], gradient[1], gradient[2], gradient[3]); \n" +" \n" "} \n" " \n" "kernel void knl_edgelaplace (global float4 *in, \n" @@ -118,48 +118,39 @@ static const char* edge_laplace_cl_source = " int gidx = get_global_id(0); \n" " int gidy = get_global_id(1); \n" " \n" -" int src_width = get_global_size(0) + LAPLACE_RADIUS * 2; \n" -" int src_height = get_global_size(1); \n" -" \n" -" int i = gidx + LAPLACE_RADIUS, j = gidy + LAPLACE_RADIUS; \n" -" int gid1d = i + j * src_width; \n" -" \n" -" float pix_fl[4] = { \n" -" in[gid1d - 1 - src_width].x, in[gid1d - 1 - src_width].y, \n" -" in[gid1d - 1 - src_width].z, in[gid1d - 1 - src_width].w \n" -" }; \n" -" float pix_fm[4] = { \n" -" in[gid1d - src_width].x, in[gid1d - src_width].y, \n" -" in[gid1d - src_width].z, in[gid1d - src_width].w \n" -" }; \n" -" float pix_fr[4] = { \n" -" in[gid1d + 1 - src_width].x, in[gid1d + 1 - src_width].y, \n" -" in[gid1d + 1 - src_width].z, in[gid1d + 1 - src_width].w \n" -" }; \n" -" float pix_ml[4] = { \n" -" in[gid1d - 1 ].x, in[gid1d - 1 ].y, \n" -" in[gid1d - 1 ].z, in[gid1d - 1 ].w \n" -" }; \n" -" float pix_mm[4] = { \n" -" in[gid1d ].x, in[gid1d ].y, \n" -" in[gid1d ].z, in[gid1d ].w \n" -" }; \n" -" float pix_mr[4] = { \n" -" in[gid1d + 1 ].x, in[gid1d + 1 ].y, \n" -" in[gid1d + 1 ].z, in[gid1d + 1 ].w \n" -" }; \n" -" float pix_bl[4] = { \n" -" in[gid1d - 1 + src_width].x, in[gid1d - 1 + src_width].y, \n" -" in[gid1d - 1 + src_width].z, in[gid1d - 1 + src_width].w \n" -" }; \n" -" float pix_bm[4] = { \n" -" in[gid1d + src_width].x, in[gid1d + src_width].y, \n" -" in[gid1d + src_width].z, in[gid1d + src_width].w \n" -" }; \n" -" float pix_br[4] = { \n" -" in[gid1d + 1 + src_width].x, in[gid1d + 1 + src_width].y, \n" -" in[gid1d + 1 + src_width].z, in[gid1d + 1 + src_width].w \n" -" }; \n" +" int src_width = get_global_size(0) + 2; \n" +" int src_height = get_global_size(1) + 2; \n" +" \n" +" int i = gidx + 1, j = gidy + 1; \n" +" \n" +" float4 cur_pix; \n" +" \n" +" cur_pix = get_pix(in, i - 1, j - 1, src_width); \n" +" float pix_fl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" +" \n" +" cur_pix = get_pix(in, i - 0, j - 1, src_width); \n" +" float pix_fm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" +" \n" +" cur_pix = get_pix(in, i + 1, j - 1, src_width); \n" +" float pix_fr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" +" \n" +" cur_pix = get_pix(in, i - 1, j - 0, src_width); \n" +" float pix_ml[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" +" \n" +" cur_pix = get_pix(in, i - 0, j - 0, src_width); \n" +" float pix_mm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" +" \n" +" cur_pix = get_pix(in, i + 1, j - 0, src_width); \n" +" float pix_mr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" +" \n" +" cur_pix = get_pix(in, i - 1, j + 1, src_width); \n" +" float pix_bl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" +" \n" +" cur_pix = get_pix(in, i - 0, j + 1, src_width); \n" +" float pix_bm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" +" \n" +" cur_pix = get_pix(in, i + 1, j + 1, src_width); \n" +" float pix_br[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" " \n" " int c; \n" " float value[4]; \n" |