summaryrefslogtreecommitdiff
path: root/opencl
diff options
context:
space:
mode:
authorNanley Chery <nanleychery@gmail.com>2014-11-06 01:18:48 -0500
committerØyvind Kolås <pippin@gimp.org>2015-05-23 23:08:41 +0200
commit88b582a7b1f0879ab5b40d17a27f62648fb5dcbb (patch)
tree71f9ea4e2ddfff68a17cbf4d5eda675fef4d89fb /opencl
parent17c9e8de2fbe783232bfc5590f73eb3e9b07fdc8 (diff)
edge-laplace: use float4 vectors for pixels
Signed-off-by: Nanley Chery <nanleychery@gmail.com>
Diffstat (limited to 'opencl')
-rw-r--r--opencl/edge-laplace.cl203
-rw-r--r--opencl/edge-laplace.cl.h203
2 files changed, 120 insertions, 286 deletions
diff --git a/opencl/edge-laplace.cl b/opencl/edge-laplace.cl
index f154e129..a61f6c23 100644
--- a/opencl/edge-laplace.cl
+++ b/opencl/edge-laplace.cl
@@ -1,170 +1,87 @@
#define LAPLACE_RADIUS 2
#define EPSILON 1e-5f
-void minmax(float x1, float x2, float x3,
- float x4, float x5,
- float *min_result,
- float *max_result)
+void minmax(float4 x1, float4 x2, float4 x3,
+ float4 x4, float4 x5,
+ float4 *min_result,
+ float4 *max_result)
{
- float min1, min2, max1, max2;
+ // Step 0
+ float16 first = (float16)(x1, x2, x3, x4);
- if (x1 > x2)
- {
- max1 = x1;
- min1 = x2;
- }
- else
- {
- max1 = x2;
- min1 = x1;
- }
+ // Step 1
+ float8 min1 = fmin(first.hi, first.lo);
+ float8 max1 = fmax(first.hi, first.lo);
- if (x3 > x4)
- {
- max2 = x3;
- min2 = x4;
- }
- else
- {
- max2 = x4;
- min2 = x3;
- }
+ // Step 2
+ float4 min2 = fmin(min1.hi, min1.lo);
+ float4 max2 = fmax(max1.hi, max1.lo);
- if (min1 < min2)
- *min_result = fmin(min1, x5);
- else
- *min_result = fmin(min2, x5);
- if (max1 > max2)
- *max_result = fmax(max1, x5);
- else
- *max_result = fmax(max2, x5);
+ // Step 3
+ *min_result = fmin(min2, x5);
+ *max_result = fmax(max2, x5);
}
-float4 get_pix(global float4 *in, int x, int y, int rowstride)
-{
- int idx = x + y * rowstride;
- return in[idx];
-}
-
-kernel void pre_edgelaplace (global float4 *in,
+kernel void pre_edgelaplace (const global float4 *in,
global float4 *out)
{
int gidx = get_global_id(0);
int gidy = get_global_id(1);
-
- int src_width = get_global_size(0) + 2;
- int src_height = get_global_size(1) + 2;
+ int src_width = get_global_size(0) + LAPLACE_RADIUS;
+ int src_height = get_global_size(1) + LAPLACE_RADIUS;
int i = gidx + 1, j = gidy + 1;
- float4 cur_pix;
-
- cur_pix = get_pix(in, i - 1, j - 1, src_width);
- float pix_fl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
-
- cur_pix = get_pix(in, i - 0, j - 1, src_width);
- float pix_fm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
-
- cur_pix = get_pix(in, i + 1, j - 1, src_width);
- float pix_fr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
-
- cur_pix = get_pix(in, i - 1, j - 0, src_width);
- float pix_ml[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
-
- cur_pix = get_pix(in, i - 0, j - 0, src_width);
- float pix_mm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
-
- cur_pix = get_pix(in, i + 1, j - 0, src_width);
- float pix_mr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
-
- cur_pix = get_pix(in, i - 1, j + 1, src_width);
- float pix_bl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
-
- cur_pix = get_pix(in, i - 0, j + 1, src_width);
- float pix_bm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
-
- cur_pix = get_pix(in, i + 1, j + 1, src_width);
- float pix_br[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
-
- int c;
- float minval, maxval;
- float gradient[4];
-
- for (c = 0;c < 3; ++c)
- {
- minmax(pix_fm[c], pix_bm[c], pix_ml[c], pix_mr[c],
- pix_mm[c], &minval, &maxval);
- gradient[c] = 0.5f *
- fmax((maxval - pix_mm[c]),(pix_mm[c] - minval));
- gradient[c] =
- (pix_fl[c] + pix_fm[c] + pix_fr[c] +
- pix_bm[c] - 8.0f * pix_mm[c] + pix_br[c] +
- pix_ml[c] + pix_mr[c] + pix_bl[c]) <
- EPSILON ? -1.0f * gradient[c] : gradient[c];
- }
- gradient[3] = pix_mm[3];
-
- out[gidx + gidy * get_global_size(0)] = (float4)
- (gradient[0], gradient[1], gradient[2], gradient[3]);
+ float4 pix_fl = in[(i - 1) + (j - 1)*src_width];
+ float4 pix_fm = in[(i - 0) + (j - 1)*src_width];
+ float4 pix_fr = in[(i + 1) + (j - 1)*src_width];
+ float4 pix_ml = in[(i - 1) + (j - 0)*src_width];
+ float4 pix_mm = in[(i - 0) + (j - 0)*src_width];
+ float4 pix_mr = in[(i + 1) + (j - 0)*src_width];
+ float4 pix_bl = in[(i - 1) + (j + 1)*src_width];
+ float4 pix_bm = in[(i - 0) + (j + 1)*src_width];
+ float4 pix_br = in[(i + 1) + (j + 1)*src_width];
+
+ float4 minval, maxval;
+ minmax(pix_fm, pix_bm, pix_ml, pix_mr,
+ pix_mm, &minval, &maxval);
+ float4 gradient = fmax((maxval - pix_mm), (pix_mm - minval))
+ * select((float4)0.5f, (float4)-0.5f,
+ (pix_fl + pix_fm + pix_fr +
+ pix_bm - 8.0f * pix_mm + pix_br +
+ pix_ml + pix_mr + pix_bl) < EPSILON);
+ gradient.w = pix_mm.w;
+
+ out[gidx + gidy * get_global_size(0)] = gradient;
}
-kernel void knl_edgelaplace (global float4 *in,
+kernel void knl_edgelaplace (const global float4 *in,
global float4 *out)
{
int gidx = get_global_id(0);
int gidy = get_global_id(1);
- int src_width = get_global_size(0) + 2;
- int src_height = get_global_size(1) + 2;
+ int src_width = get_global_size(0) + LAPLACE_RADIUS;
+ int src_height = get_global_size(1) + LAPLACE_RADIUS;
int i = gidx + 1, j = gidy + 1;
- float4 cur_pix;
-
- cur_pix = get_pix(in, i - 1, j - 1, src_width);
- float pix_fl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
-
- cur_pix = get_pix(in, i - 0, j - 1, src_width);
- float pix_fm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
-
- cur_pix = get_pix(in, i + 1, j - 1, src_width);
- float pix_fr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
-
- cur_pix = get_pix(in, i - 1, j - 0, src_width);
- float pix_ml[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
-
- cur_pix = get_pix(in, i - 0, j - 0, src_width);
- float pix_mm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
-
- cur_pix = get_pix(in, i + 1, j - 0, src_width);
- float pix_mr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
-
- cur_pix = get_pix(in, i - 1, j + 1, src_width);
- float pix_bl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
-
- cur_pix = get_pix(in, i - 0, j + 1, src_width);
- float pix_bm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
-
- cur_pix = get_pix(in, i + 1, j + 1, src_width);
- float pix_br[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
-
- int c;
- float value[4];
-
- for (c = 0;c < 3; ++c)
- {
- float current = pix_mm[c];
- current =
- ((current > 0.0f) &&
- (pix_fl[c] < 0.0f || pix_fm[c] < 0.0f ||
- pix_fr[c] < 0.0f || pix_ml[c] < 0.0f ||
- pix_mr[c] < 0.0f || pix_bl[c] < 0.0f ||
- pix_bm[c] < 0.0f || pix_br[c] < 0.0f )
- ) ? current : 0.0f;
- value[c] = current;
- }
- value[3] = pix_mm[3];
-
- out[gidx + gidy * get_global_size(0)] = (float4)
- (value[0], value[1], value[2], value[3]);
+ float4 pix_fl = in[(i - 1) + (j - 1)*src_width];
+ float4 pix_fm = in[(i - 0) + (j - 1)*src_width];
+ float4 pix_fr = in[(i + 1) + (j - 1)*src_width];
+ float4 pix_ml = in[(i - 1) + (j - 0)*src_width];
+ float4 pix_mm = in[(i - 0) + (j - 0)*src_width];
+ float4 pix_mr = in[(i + 1) + (j - 0)*src_width];
+ float4 pix_bl = in[(i - 1) + (j + 1)*src_width];
+ float4 pix_bm = in[(i - 0) + (j + 1)*src_width];
+ float4 pix_br = in[(i + 1) + (j + 1)*src_width];
+
+ float4 value = select(0.0f, pix_mm, (pix_mm > 0.0f) &&
+ (pix_fl < 0.0f || pix_fm < 0.0f ||
+ pix_fr < 0.0f || pix_ml < 0.0f ||
+ pix_mr < 0.0f || pix_bl < 0.0f ||
+ pix_bm < 0.0f || pix_br < 0.0f ));
+ value.w = pix_mm.w;
+
+ out[gidx + gidy * get_global_size(0)] = value;
}
diff --git a/opencl/edge-laplace.cl.h b/opencl/edge-laplace.cl.h
index eaba4b4e..febd2d83 100644
--- a/opencl/edge-laplace.cl.h
+++ b/opencl/edge-laplace.cl.h
@@ -2,171 +2,88 @@ static const char* edge_laplace_cl_source =
"#define LAPLACE_RADIUS 2 \n"
"#define EPSILON 1e-5f \n"
" \n"
-"void minmax(float x1, float x2, float x3, \n"
-" float x4, float x5, \n"
-" float *min_result, \n"
-" float *max_result) \n"
+"void minmax(float4 x1, float4 x2, float4 x3, \n"
+" float4 x4, float4 x5, \n"
+" float4 *min_result, \n"
+" float4 *max_result) \n"
"{ \n"
-" float min1, min2, max1, max2; \n"
+" // Step 0 \n"
+" float16 first = (float16)(x1, x2, x3, x4); \n"
" \n"
-" if (x1 > x2) \n"
-" { \n"
-" max1 = x1; \n"
-" min1 = x2; \n"
-" } \n"
-" else \n"
-" { \n"
-" max1 = x2; \n"
-" min1 = x1; \n"
-" } \n"
+" // Step 1 \n"
+" float8 min1 = fmin(first.hi, first.lo); \n"
+" float8 max1 = fmax(first.hi, first.lo); \n"
" \n"
-" if (x3 > x4) \n"
-" { \n"
-" max2 = x3; \n"
-" min2 = x4; \n"
-" } \n"
-" else \n"
-" { \n"
-" max2 = x4; \n"
-" min2 = x3; \n"
-" } \n"
+" // Step 2 \n"
+" float4 min2 = fmin(min1.hi, min1.lo); \n"
+" float4 max2 = fmax(max1.hi, max1.lo); \n"
" \n"
-" if (min1 < min2) \n"
-" *min_result = fmin(min1, x5); \n"
-" else \n"
-" *min_result = fmin(min2, x5); \n"
-" if (max1 > max2) \n"
-" *max_result = fmax(max1, x5); \n"
-" else \n"
-" *max_result = fmax(max2, x5); \n"
+" // Step 3 \n"
+" *min_result = fmin(min2, x5); \n"
+" *max_result = fmax(max2, x5); \n"
"} \n"
" \n"
-"float4 get_pix(global float4 *in, int x, int y, int rowstride) \n"
-"{ \n"
-" int idx = x + y * rowstride; \n"
-" return in[idx]; \n"
-"} \n"
-" \n"
-"kernel void pre_edgelaplace (global float4 *in, \n"
+"kernel void pre_edgelaplace (const global float4 *in, \n"
" global float4 *out) \n"
"{ \n"
" int gidx = get_global_id(0); \n"
" int gidy = get_global_id(1); \n"
-" \n"
-" int src_width = get_global_size(0) + 2; \n"
-" int src_height = get_global_size(1) + 2; \n"
+" int src_width = get_global_size(0) + LAPLACE_RADIUS; \n"
+" int src_height = get_global_size(1) + LAPLACE_RADIUS; \n"
" \n"
" int i = gidx + 1, j = gidy + 1; \n"
" \n"
-" float4 cur_pix; \n"
-" \n"
-" cur_pix = get_pix(in, i - 1, j - 1, src_width); \n"
-" float pix_fl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
-" \n"
-" cur_pix = get_pix(in, i - 0, j - 1, src_width); \n"
-" float pix_fm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
-" \n"
-" cur_pix = get_pix(in, i + 1, j - 1, src_width); \n"
-" float pix_fr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
-" \n"
-" cur_pix = get_pix(in, i - 1, j - 0, src_width); \n"
-" float pix_ml[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
-" \n"
-" cur_pix = get_pix(in, i - 0, j - 0, src_width); \n"
-" float pix_mm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
-" \n"
-" cur_pix = get_pix(in, i + 1, j - 0, src_width); \n"
-" float pix_mr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
-" \n"
-" cur_pix = get_pix(in, i - 1, j + 1, src_width); \n"
-" float pix_bl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
-" \n"
-" cur_pix = get_pix(in, i - 0, j + 1, src_width); \n"
-" float pix_bm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
-" \n"
-" cur_pix = get_pix(in, i + 1, j + 1, src_width); \n"
-" float pix_br[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
-" \n"
-" int c; \n"
-" float minval, maxval; \n"
-" float gradient[4]; \n"
-" \n"
-" for (c = 0;c < 3; ++c) \n"
-" { \n"
-" minmax(pix_fm[c], pix_bm[c], pix_ml[c], pix_mr[c], \n"
-" pix_mm[c], &minval, &maxval); \n"
-" gradient[c] = 0.5f * \n"
-" fmax((maxval - pix_mm[c]),(pix_mm[c] - minval)); \n"
-" gradient[c] = \n"
-" (pix_fl[c] + pix_fm[c] + pix_fr[c] + \n"
-" pix_bm[c] - 8.0f * pix_mm[c] + pix_br[c] + \n"
-" pix_ml[c] + pix_mr[c] + pix_bl[c]) < \n"
-" EPSILON ? -1.0f * gradient[c] : gradient[c]; \n"
-" } \n"
-" gradient[3] = pix_mm[3]; \n"
-" \n"
-" out[gidx + gidy * get_global_size(0)] = (float4) \n"
-" (gradient[0], gradient[1], gradient[2], gradient[3]); \n"
+" float4 pix_fl = in[(i - 1) + (j - 1)*src_width]; \n"
+" float4 pix_fm = in[(i - 0) + (j - 1)*src_width]; \n"
+" float4 pix_fr = in[(i + 1) + (j - 1)*src_width]; \n"
+" float4 pix_ml = in[(i - 1) + (j - 0)*src_width]; \n"
+" float4 pix_mm = in[(i - 0) + (j - 0)*src_width]; \n"
+" float4 pix_mr = in[(i + 1) + (j - 0)*src_width]; \n"
+" float4 pix_bl = in[(i - 1) + (j + 1)*src_width]; \n"
+" float4 pix_bm = in[(i - 0) + (j + 1)*src_width]; \n"
+" float4 pix_br = in[(i + 1) + (j + 1)*src_width]; \n"
+" \n"
+" float4 minval, maxval; \n"
+" minmax(pix_fm, pix_bm, pix_ml, pix_mr, \n"
+" pix_mm, &minval, &maxval); \n"
+" float4 gradient = fmax((maxval - pix_mm), (pix_mm - minval)) \n"
+" * select((float4)0.5f, (float4)-0.5f, \n"
+" (pix_fl + pix_fm + pix_fr + \n"
+" pix_bm - 8.0f * pix_mm + pix_br + \n"
+" pix_ml + pix_mr + pix_bl) < EPSILON); \n"
+" gradient.w = pix_mm.w; \n"
+" \n"
+" out[gidx + gidy * get_global_size(0)] = gradient; \n"
"} \n"
" \n"
-"kernel void knl_edgelaplace (global float4 *in, \n"
+"kernel void knl_edgelaplace (const global float4 *in, \n"
" global float4 *out) \n"
"{ \n"
" int gidx = get_global_id(0); \n"
" int gidy = get_global_id(1); \n"
" \n"
-" int src_width = get_global_size(0) + 2; \n"
-" int src_height = get_global_size(1) + 2; \n"
+" int src_width = get_global_size(0) + LAPLACE_RADIUS; \n"
+" int src_height = get_global_size(1) + LAPLACE_RADIUS; \n"
" \n"
" int i = gidx + 1, j = gidy + 1; \n"
" \n"
-" float4 cur_pix; \n"
-" \n"
-" cur_pix = get_pix(in, i - 1, j - 1, src_width); \n"
-" float pix_fl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
-" \n"
-" cur_pix = get_pix(in, i - 0, j - 1, src_width); \n"
-" float pix_fm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
-" \n"
-" cur_pix = get_pix(in, i + 1, j - 1, src_width); \n"
-" float pix_fr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
-" \n"
-" cur_pix = get_pix(in, i - 1, j - 0, src_width); \n"
-" float pix_ml[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
-" \n"
-" cur_pix = get_pix(in, i - 0, j - 0, src_width); \n"
-" float pix_mm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
-" \n"
-" cur_pix = get_pix(in, i + 1, j - 0, src_width); \n"
-" float pix_mr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
-" \n"
-" cur_pix = get_pix(in, i - 1, j + 1, src_width); \n"
-" float pix_bl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
-" \n"
-" cur_pix = get_pix(in, i - 0, j + 1, src_width); \n"
-" float pix_bm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
-" \n"
-" cur_pix = get_pix(in, i + 1, j + 1, src_width); \n"
-" float pix_br[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w}; \n"
-" \n"
-" int c; \n"
-" float value[4]; \n"
-" \n"
-" for (c = 0;c < 3; ++c) \n"
-" { \n"
-" float current = pix_mm[c]; \n"
-" current = \n"
-" ((current > 0.0f) && \n"
-" (pix_fl[c] < 0.0f || pix_fm[c] < 0.0f || \n"
-" pix_fr[c] < 0.0f || pix_ml[c] < 0.0f || \n"
-" pix_mr[c] < 0.0f || pix_bl[c] < 0.0f || \n"
-" pix_bm[c] < 0.0f || pix_br[c] < 0.0f ) \n"
-" ) ? current : 0.0f; \n"
-" value[c] = current; \n"
-" } \n"
-" value[3] = pix_mm[3]; \n"
-" \n"
-" out[gidx + gidy * get_global_size(0)] = (float4) \n"
-" (value[0], value[1], value[2], value[3]); \n"
+" float4 pix_fl = in[(i - 1) + (j - 1)*src_width]; \n"
+" float4 pix_fm = in[(i - 0) + (j - 1)*src_width]; \n"
+" float4 pix_fr = in[(i + 1) + (j - 1)*src_width]; \n"
+" float4 pix_ml = in[(i - 1) + (j - 0)*src_width]; \n"
+" float4 pix_mm = in[(i - 0) + (j - 0)*src_width]; \n"
+" float4 pix_mr = in[(i + 1) + (j - 0)*src_width]; \n"
+" float4 pix_bl = in[(i - 1) + (j + 1)*src_width]; \n"
+" float4 pix_bm = in[(i - 0) + (j + 1)*src_width]; \n"
+" float4 pix_br = in[(i + 1) + (j + 1)*src_width]; \n"
+" \n"
+" float4 value = select(0.0f, pix_mm, (pix_mm > 0.0f) && \n"
+" (pix_fl < 0.0f || pix_fm < 0.0f || \n"
+" pix_fr < 0.0f || pix_ml < 0.0f || \n"
+" pix_mr < 0.0f || pix_bl < 0.0f || \n"
+" pix_bm < 0.0f || pix_br < 0.0f )); \n"
+" value.w = pix_mm.w; \n"
+" \n"
+" out[gidx + gidy * get_global_size(0)] = value; \n"
"} \n"
;