diff options
author | Nanley Chery <nanleychery@gmail.com> | 2014-11-06 01:18:48 -0500 |
---|---|---|
committer | Øyvind Kolås <pippin@gimp.org> | 2015-05-23 23:08:41 +0200 |
commit | 88b582a7b1f0879ab5b40d17a27f62648fb5dcbb (patch) | |
tree | 71f9ea4e2ddfff68a17cbf4d5eda675fef4d89fb /opencl | |
parent | 17c9e8de2fbe783232bfc5590f73eb3e9b07fdc8 (diff) |
edge-laplace: use float4 vectors for pixels
Signed-off-by: Nanley Chery <nanleychery@gmail.com>
Diffstat (limited to 'opencl')
-rw-r--r-- | opencl/edge-laplace.cl | 203 | ||||
-rw-r--r-- | opencl/edge-laplace.cl.h | 203 |
2 files changed, 120 insertions, 286 deletions
diff --git a/opencl/edge-laplace.cl b/opencl/edge-laplace.cl index f154e129..a61f6c23 100644 --- a/opencl/edge-laplace.cl +++ b/opencl/edge-laplace.cl @@ -1,170 +1,87 @@ #define LAPLACE_RADIUS 2 #define EPSILON 1e-5f -void minmax(float x1, float x2, float x3, - float x4, float x5, - float *min_result, - float *max_result) +void minmax(float4 x1, float4 x2, float4 x3, + float4 x4, float4 x5, + float4 *min_result, + float4 *max_result) { - float min1, min2, max1, max2; + // Step 0 + float16 first = (float16)(x1, x2, x3, x4); - if (x1 > x2) - { - max1 = x1; - min1 = x2; - } - else - { - max1 = x2; - min1 = x1; - } + // Step 1 + float8 min1 = fmin(first.hi, first.lo); + float8 max1 = fmax(first.hi, first.lo); - if (x3 > x4) - { - max2 = x3; - min2 = x4; - } - else - { - max2 = x4; - min2 = x3; - } + // Step 2 + float4 min2 = fmin(min1.hi, min1.lo); + float4 max2 = fmax(max1.hi, max1.lo); - if (min1 < min2) - *min_result = fmin(min1, x5); - else - *min_result = fmin(min2, x5); - if (max1 > max2) - *max_result = fmax(max1, x5); - else - *max_result = fmax(max2, x5); + // Step 3 + *min_result = fmin(min2, x5); + *max_result = fmax(max2, x5); } -float4 get_pix(global float4 *in, int x, int y, int rowstride) -{ - int idx = x + y * rowstride; - return in[idx]; -} - -kernel void pre_edgelaplace (global float4 *in, +kernel void pre_edgelaplace (const global float4 *in, global float4 *out) { int gidx = get_global_id(0); int gidy = get_global_id(1); - - int src_width = get_global_size(0) + 2; - int src_height = get_global_size(1) + 2; + int src_width = get_global_size(0) + LAPLACE_RADIUS; + int src_height = get_global_size(1) + LAPLACE_RADIUS; int i = gidx + 1, j = gidy + 1; - float4 cur_pix; - - cur_pix = get_pix(in, i - 1, j - 1, src_width); - float pix_fl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; - - cur_pix = get_pix(in, i - 0, j - 1, src_width); - float pix_fm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; - - cur_pix = get_pix(in, i + 1, j - 1, src_width); - float pix_fr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; - - cur_pix = get_pix(in, i - 1, j - 0, src_width); - float pix_ml[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; - - cur_pix = get_pix(in, i - 0, j - 0, src_width); - float pix_mm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; - - cur_pix = get_pix(in, i + 1, j - 0, src_width); - float pix_mr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; - - cur_pix = get_pix(in, i - 1, j + 1, src_width); - float pix_bl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; - - cur_pix = get_pix(in, i - 0, j + 1, src_width); - float pix_bm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; - - cur_pix = get_pix(in, i + 1, j + 1, src_width); - float pix_br[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; - - int c; - float minval, maxval; - float gradient[4]; - - for (c = 0;c < 3; ++c) - { - minmax(pix_fm[c], pix_bm[c], pix_ml[c], pix_mr[c], - pix_mm[c], &minval, &maxval); - gradient[c] = 0.5f * - fmax((maxval - pix_mm[c]),(pix_mm[c] - minval)); - gradient[c] = - (pix_fl[c] + pix_fm[c] + pix_fr[c] + - pix_bm[c] - 8.0f * pix_mm[c] + pix_br[c] + - pix_ml[c] + pix_mr[c] + pix_bl[c]) < - EPSILON ? -1.0f * gradient[c] : gradient[c]; - } - gradient[3] = pix_mm[3]; - - out[gidx + gidy * get_global_size(0)] = (float4) - (gradient[0], gradient[1], gradient[2], gradient[3]); + float4 pix_fl = in[(i - 1) + (j - 1)*src_width]; + float4 pix_fm = in[(i - 0) + (j - 1)*src_width]; + float4 pix_fr = in[(i + 1) + (j - 1)*src_width]; + float4 pix_ml = in[(i - 1) + (j - 0)*src_width]; + float4 pix_mm = in[(i - 0) + (j - 0)*src_width]; + float4 pix_mr = in[(i + 1) + (j - 0)*src_width]; + float4 pix_bl = in[(i - 1) + (j + 1)*src_width]; + float4 pix_bm = in[(i - 0) + (j + 1)*src_width]; + float4 pix_br = in[(i + 1) + (j + 1)*src_width]; + + float4 minval, maxval; + minmax(pix_fm, pix_bm, pix_ml, pix_mr, + pix_mm, &minval, &maxval); + float4 gradient = fmax((maxval - pix_mm), (pix_mm - minval)) + * select((float4)0.5f, (float4)-0.5f, + (pix_fl + pix_fm + pix_fr + + pix_bm - 8.0f * pix_mm + pix_br + + pix_ml + pix_mr + pix_bl) < EPSILON); + gradient.w = pix_mm.w; + + out[gidx + gidy * get_global_size(0)] = gradient; } -kernel void knl_edgelaplace (global float4 *in, +kernel void knl_edgelaplace (const global float4 *in, global float4 *out) { int gidx = get_global_id(0); int gidy = get_global_id(1); - int src_width = get_global_size(0) + 2; - int src_height = get_global_size(1) + 2; + int src_width = get_global_size(0) + LAPLACE_RADIUS; + int src_height = get_global_size(1) + LAPLACE_RADIUS; int i = gidx + 1, j = gidy + 1; - float4 cur_pix; - - cur_pix = get_pix(in, i - 1, j - 1, src_width); - float pix_fl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; - - cur_pix = get_pix(in, i - 0, j - 1, src_width); - float pix_fm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; - - cur_pix = get_pix(in, i + 1, j - 1, src_width); - float pix_fr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; - - cur_pix = get_pix(in, i - 1, j - 0, src_width); - float pix_ml[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; - - cur_pix = get_pix(in, i - 0, j - 0, src_width); - float pix_mm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; - - cur_pix = get_pix(in, i + 1, j - 0, src_width); - float pix_mr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; - - cur_pix = get_pix(in, i - 1, j + 1, src_width); - float pix_bl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; - - cur_pix = get_pix(in, i - 0, j + 1, src_width); - float pix_bm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; - - cur_pix = get_pix(in, i + 1, j + 1, src_width); - float pix_br[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; - - int c; - float value[4]; - - for (c = 0;c < 3; ++c) - { - float current = pix_mm[c]; - current = - ((current > 0.0f) && - (pix_fl[c] < 0.0f || pix_fm[c] < 0.0f || - pix_fr[c] < 0.0f || pix_ml[c] < 0.0f || - pix_mr[c] < 0.0f || pix_bl[c] < 0.0f || - pix_bm[c] < 0.0f || pix_br[c] < 0.0f ) - ) ? current : 0.0f; - value[c] = current; - } - value[3] = pix_mm[3]; - - out[gidx + gidy * get_global_size(0)] = (float4) - (value[0], value[1], value[2], value[3]); + float4 pix_fl = in[(i - 1) + (j - 1)*src_width]; + float4 pix_fm = in[(i - 0) + (j - 1)*src_width]; + float4 pix_fr = in[(i + 1) + (j - 1)*src_width]; + float4 pix_ml = in[(i - 1) + (j - 0)*src_width]; + float4 pix_mm = in[(i - 0) + (j - 0)*src_width]; + float4 pix_mr = in[(i + 1) + (j - 0)*src_width]; + float4 pix_bl = in[(i - 1) + (j + 1)*src_width]; + float4 pix_bm = in[(i - 0) + (j + 1)*src_width]; + float4 pix_br = in[(i + 1) + (j + 1)*src_width]; + + float4 value = select(0.0f, pix_mm, (pix_mm > 0.0f) && + (pix_fl < 0.0f || pix_fm < 0.0f || + pix_fr < 0.0f || pix_ml < 0.0f || + pix_mr < 0.0f || pix_bl < 0.0f || + pix_bm < 0.0f || pix_br < 0.0f )); + value.w = pix_mm.w; + + out[gidx + gidy * get_global_size(0)] = value; } diff --git a/opencl/edge-laplace.cl.h b/opencl/edge-laplace.cl.h index eaba4b4e..febd2d83 100644 --- a/opencl/edge-laplace.cl.h +++ b/opencl/edge-laplace.cl.h @@ -2,171 +2,88 @@ static const char* edge_laplace_cl_source = "#define LAPLACE_RADIUS 2 \n" "#define EPSILON 1e-5f \n" " \n" -"void minmax(float x1, float x2, float x3, \n" -" float x4, float x5, \n" -" float *min_result, \n" -" float *max_result) \n" +"void minmax(float4 x1, float4 x2, float4 x3, \n" +" float4 x4, float4 x5, \n" +" float4 *min_result, \n" +" float4 *max_result) \n" "{ \n" -" float min1, min2, max1, max2; \n" +" // Step 0 \n" +" float16 first = (float16)(x1, x2, x3, x4); \n" " \n" -" if (x1 > x2) \n" -" { \n" -" max1 = x1; \n" -" min1 = x2; \n" -" } \n" -" else \n" -" { \n" -" max1 = x2; \n" -" min1 = x1; \n" -" } \n" +" // Step 1 \n" +" float8 min1 = fmin(first.hi, first.lo); \n" +" float8 max1 = fmax(first.hi, first.lo); \n" " \n" -" if (x3 > x4) \n" -" { \n" -" max2 = x3; \n" -" min2 = x4; \n" -" } \n" -" else \n" -" { \n" -" max2 = x4; \n" -" min2 = x3; \n" -" } \n" +" // Step 2 \n" +" float4 min2 = fmin(min1.hi, min1.lo); \n" +" float4 max2 = fmax(max1.hi, max1.lo); \n" " \n" -" if (min1 < min2) \n" -" *min_result = fmin(min1, x5); \n" -" else \n" -" *min_result = fmin(min2, x5); \n" -" if (max1 > max2) \n" -" *max_result = fmax(max1, x5); \n" -" else \n" -" *max_result = fmax(max2, x5); \n" +" // Step 3 \n" +" *min_result = fmin(min2, x5); \n" +" *max_result = fmax(max2, x5); \n" "} \n" " \n" -"float4 get_pix(global float4 *in, int x, int y, int rowstride) \n" -"{ \n" -" int idx = x + y * rowstride; \n" -" return in[idx]; \n" -"} \n" -" \n" -"kernel void pre_edgelaplace (global float4 *in, \n" +"kernel void pre_edgelaplace (const global float4 *in, \n" " global float4 *out) \n" "{ \n" " int gidx = get_global_id(0); \n" " int gidy = get_global_id(1); \n" -" \n" -" int src_width = get_global_size(0) + 2; \n" -" int src_height = get_global_size(1) + 2; \n" +" int src_width = get_global_size(0) + LAPLACE_RADIUS; \n" +" int src_height = get_global_size(1) + LAPLACE_RADIUS; \n" " \n" " int i = gidx + 1, j = gidy + 1; \n" " \n" -" float4 cur_pix; \n" -" \n" -" cur_pix = get_pix(in, i - 1, j - 1, src_width); \n" -" float pix_fl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" -" \n" -" cur_pix = get_pix(in, i - 0, j - 1, src_width); \n" -" float pix_fm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" -" \n" -" cur_pix = get_pix(in, i + 1, j - 1, src_width); \n" -" float pix_fr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" -" \n" -" cur_pix = get_pix(in, i - 1, j - 0, src_width); \n" -" float pix_ml[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" -" \n" -" cur_pix = get_pix(in, i - 0, j - 0, src_width); \n" -" float pix_mm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" -" \n" -" cur_pix = get_pix(in, i + 1, j - 0, src_width); \n" -" float pix_mr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" -" \n" -" cur_pix = get_pix(in, i - 1, j + 1, src_width); \n" -" float pix_bl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" -" \n" -" cur_pix = get_pix(in, i - 0, j + 1, src_width); \n" -" float pix_bm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" -" \n" -" cur_pix = get_pix(in, i + 1, j + 1, src_width); \n" -" float pix_br[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" -" \n" -" int c; \n" -" float minval, maxval; \n" -" float gradient[4]; \n" -" \n" -" for (c = 0;c < 3; ++c) \n" -" { \n" -" minmax(pix_fm[c], pix_bm[c], pix_ml[c], pix_mr[c], \n" -" pix_mm[c], &minval, &maxval); \n" -" gradient[c] = 0.5f * \n" -" fmax((maxval - pix_mm[c]),(pix_mm[c] - minval)); \n" -" gradient[c] = \n" -" (pix_fl[c] + pix_fm[c] + pix_fr[c] + \n" -" pix_bm[c] - 8.0f * pix_mm[c] + pix_br[c] + \n" -" pix_ml[c] + pix_mr[c] + pix_bl[c]) < \n" -" EPSILON ? -1.0f * gradient[c] : gradient[c]; \n" -" } \n" -" gradient[3] = pix_mm[3]; \n" -" \n" -" out[gidx + gidy * get_global_size(0)] = (float4) \n" -" (gradient[0], gradient[1], gradient[2], gradient[3]); \n" +" float4 pix_fl = in[(i - 1) + (j - 1)*src_width]; \n" +" float4 pix_fm = in[(i - 0) + (j - 1)*src_width]; \n" +" float4 pix_fr = in[(i + 1) + (j - 1)*src_width]; \n" +" float4 pix_ml = in[(i - 1) + (j - 0)*src_width]; \n" +" float4 pix_mm = in[(i - 0) + (j - 0)*src_width]; \n" +" float4 pix_mr = in[(i + 1) + (j - 0)*src_width]; \n" +" float4 pix_bl = in[(i - 1) + (j + 1)*src_width]; \n" +" float4 pix_bm = in[(i - 0) + (j + 1)*src_width]; \n" +" float4 pix_br = in[(i + 1) + (j + 1)*src_width]; \n" +" \n" +" float4 minval, maxval; \n" +" minmax(pix_fm, pix_bm, pix_ml, pix_mr, \n" +" pix_mm, &minval, &maxval); \n" +" float4 gradient = fmax((maxval - pix_mm), (pix_mm - minval)) \n" +" * select((float4)0.5f, (float4)-0.5f, \n" +" (pix_fl + pix_fm + pix_fr + \n" +" pix_bm - 8.0f * pix_mm + pix_br + \n" +" pix_ml + pix_mr + pix_bl) < EPSILON); \n" +" gradient.w = pix_mm.w; \n" +" \n" +" out[gidx + gidy * get_global_size(0)] = gradient; \n" "} \n" " \n" -"kernel void knl_edgelaplace (global float4 *in, \n" +"kernel void knl_edgelaplace (const global float4 *in, \n" " global float4 *out) \n" "{ \n" " int gidx = get_global_id(0); \n" " int gidy = get_global_id(1); \n" " \n" -" int src_width = get_global_size(0) + 2; \n" -" int src_height = get_global_size(1) + 2; \n" +" int src_width = get_global_size(0) + LAPLACE_RADIUS; \n" +" int src_height = get_global_size(1) + LAPLACE_RADIUS; \n" " \n" " int i = gidx + 1, j = gidy + 1; \n" " \n" -" float4 cur_pix; \n" -" \n" -" cur_pix = get_pix(in, i - 1, j - 1, src_width); \n" -" float pix_fl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" -" \n" -" cur_pix = get_pix(in, i - 0, j - 1, src_width); \n" -" float pix_fm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" -" \n" -" cur_pix = get_pix(in, i + 1, j - 1, src_width); \n" -" float pix_fr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" -" \n" -" cur_pix = get_pix(in, i - 1, j - 0, src_width); \n" -" float pix_ml[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" -" \n" -" cur_pix = get_pix(in, i - 0, j - 0, src_width); \n" -" float pix_mm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" -" \n" -" cur_pix = get_pix(in, i + 1, j - 0, src_width); \n" -" float pix_mr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" -" \n" -" cur_pix = get_pix(in, i - 1, j + 1, src_width); \n" -" float pix_bl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" -" \n" -" cur_pix = get_pix(in, i - 0, j + 1, src_width); \n" -" float pix_bm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" -" \n" -" cur_pix = get_pix(in, i + 1, j + 1, src_width); \n" -" float pix_br[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n" -" \n" -" int c; \n" -" float value[4]; \n" -" \n" -" for (c = 0;c < 3; ++c) \n" -" { \n" -" float current = pix_mm[c]; \n" -" current = \n" -" ((current > 0.0f) && \n" -" (pix_fl[c] < 0.0f || pix_fm[c] < 0.0f || \n" -" pix_fr[c] < 0.0f || pix_ml[c] < 0.0f || \n" -" pix_mr[c] < 0.0f || pix_bl[c] < 0.0f || \n" -" pix_bm[c] < 0.0f || pix_br[c] < 0.0f ) \n" -" ) ? current : 0.0f; \n" -" value[c] = current; \n" -" } \n" -" value[3] = pix_mm[3]; \n" -" \n" -" out[gidx + gidy * get_global_size(0)] = (float4) \n" -" (value[0], value[1], value[2], value[3]); \n" +" float4 pix_fl = in[(i - 1) + (j - 1)*src_width]; \n" +" float4 pix_fm = in[(i - 0) + (j - 1)*src_width]; \n" +" float4 pix_fr = in[(i + 1) + (j - 1)*src_width]; \n" +" float4 pix_ml = in[(i - 1) + (j - 0)*src_width]; \n" +" float4 pix_mm = in[(i - 0) + (j - 0)*src_width]; \n" +" float4 pix_mr = in[(i + 1) + (j - 0)*src_width]; \n" +" float4 pix_bl = in[(i - 1) + (j + 1)*src_width]; \n" +" float4 pix_bm = in[(i - 0) + (j + 1)*src_width]; \n" +" float4 pix_br = in[(i + 1) + (j + 1)*src_width]; \n" +" \n" +" float4 value = select(0.0f, pix_mm, (pix_mm > 0.0f) && \n" +" (pix_fl < 0.0f || pix_fm < 0.0f || \n" +" pix_fr < 0.0f || pix_ml < 0.0f || \n" +" pix_mr < 0.0f || pix_bl < 0.0f || \n" +" pix_bm < 0.0f || pix_br < 0.0f )); \n" +" value.w = pix_mm.w; \n" +" \n" +" out[gidx + gidy * get_global_size(0)] = value; \n" "} \n" ; |