summaryrefslogtreecommitdiff
path: root/opencl
diff options
context:
space:
mode:
authorCarlos Zubieta <czubieta.dev@gmail.com>2013-09-23 00:39:50 -0500
committerTéo Mazars <teo.mazars@ensimag.fr>2013-10-31 11:41:44 +0100
commitcab389d42b1067d2c518ce1baa9f3ec8c0d637c3 (patch)
treeeda25c3c743e2dd9919a56cae5c65e1e25a5b8c7 /opencl
parent3beaae431979dcabc1aa4bea0a35e00b67115bc9 (diff)
Operations: Add OpenCL support to stretch-contrast
Diffstat (limited to 'opencl')
-rw-r--r--opencl/stretch-contrast.cl136
-rw-r--r--opencl/stretch-contrast.cl.h138
2 files changed, 274 insertions, 0 deletions
diff --git a/opencl/stretch-contrast.cl b/opencl/stretch-contrast.cl
new file mode 100644
index 00000000..83f7ff9d
--- /dev/null
+++ b/opencl/stretch-contrast.cl
@@ -0,0 +1,136 @@
+/* This file is an image processing operation for GEGL
+ *
+ * GEGL is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * GEGL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GEGL; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright 2013 Carlos Zubieta <czubieta.dev@gmail.com>
+ */
+
+
+__kernel void init_stretch (__global float *out_min,
+ __global float *out_max)
+{
+ int gid = get_global_id (0);
+
+ out_min[gid] = FLT_MAX;
+ out_max[gid] = -FLT_MAX;
+}
+
+__kernel void two_stages_local_min_max_reduce (__global const float4 *in,
+ __global float *out_min,
+ __global float *out_max,
+ __local float *aux_min,
+ __local float *aux_max,
+ int n_pixels)
+{
+ int gid = get_global_id(0);
+ int gsize = get_global_size(0);
+ int lid = get_local_id(0);
+ int lsize = get_local_size(0);
+ float4 min_v = (float4)( FLT_MAX);
+ float4 max_v = (float4)(-FLT_MAX);
+ float4 in_v;
+ float aux0, aux1;
+ int it;
+
+ /* Loop sequentially over chunks of input vector */
+ for (it = gid; it < n_pixels; it += gsize)
+ {
+ in_v = in[it];
+ min_v = fmin (min_v, in_v);
+ max_v = fmax (max_v, in_v);
+ }
+
+ /* Perform parallel reduction */
+ aux_min[lid] = min (min (min_v.x, min_v.y), min_v.z);
+ aux_max[lid] = max (max (max_v.x, max_v.y), max_v.z);
+
+ barrier (CLK_LOCAL_MEM_FENCE);
+
+ for(it = lsize / 2; it > 0; it >>= 1)
+ {
+ if (lid < it)
+ {
+ aux0 = aux_min[lid + it];
+ aux1 = aux_min[lid];
+ aux_min[lid] = fmin (aux0, aux1);
+
+ aux0 = aux_max[lid + it];
+ aux1 = aux_max[lid];
+ aux_max[lid] = fmax (aux0, aux1);
+ }
+ barrier (CLK_LOCAL_MEM_FENCE);
+ }
+ if (lid == 0)
+ {
+ out_min[get_group_id(0)] = aux_min[0];
+ out_max[get_group_id(0)] = aux_max[0];
+ }
+
+ /* the work-group size is the size of the buffer.
+ * Make sure it's fully initialized */
+ if (gid == 0)
+ {
+ /* No special case handling, gsize is a multiple of lsize */
+ int nb_wg = gsize / lsize;
+ for (it = nb_wg; it < lsize; it++)
+ {
+ out_min[it] = FLT_MAX;
+ out_max[it] = -FLT_MAX;
+ }
+ }
+}
+
+__kernel void global_min_max_reduce (__global float *in_min,
+ __global float *in_max,
+ __global float *out_min_max)
+{
+ int gid = get_global_id(0);
+ int lid = get_local_id(0);
+ int lsize = get_local_size(0);
+ float aux0, aux1;
+ int it;
+
+ /* Perform parallel reduction */
+ for (it = lsize / 2; it > 0; it >>= 1)
+ {
+ if (lid < it)
+ {
+ aux0 = in_min[lid + it];
+ aux1 = in_min[lid];
+ in_min[gid] = fmin (aux0, aux1);
+
+ aux0 = in_max[lid + it];
+ aux1 = in_max[lid];
+ in_max[gid] = fmax (aux0, aux1);
+ }
+ barrier (CLK_GLOBAL_MEM_FENCE);
+ }
+ if (lid == 0)
+ {
+ out_min_max[0] = in_min[gid];
+ out_min_max[1] = in_max[gid];
+ }
+}
+
+__kernel void cl_stretch_contrast (__global const float4 *in,
+ __global float4 *out,
+ float min,
+ float diff)
+{
+ int gid = get_global_id(0);
+ float4 in_v = in[gid];
+
+ in_v.xyz = (in_v.xyz - min) / diff;
+ out[gid] = in_v;
+}
diff --git a/opencl/stretch-contrast.cl.h b/opencl/stretch-contrast.cl.h
new file mode 100644
index 00000000..c4bd39ee
--- /dev/null
+++ b/opencl/stretch-contrast.cl.h
@@ -0,0 +1,138 @@
+static const char* stretch_contrast_cl_source =
+"/* This file is an image processing operation for GEGL \n"
+" * \n"
+" * GEGL is free software; you can redistribute it and/or \n"
+" * modify it under the terms of the GNU Lesser General Public \n"
+" * License as published by the Free Software Foundation; either \n"
+" * version 3 of the License, or (at your option) any later version. \n"
+" * \n"
+" * GEGL is distributed in the hope that it will be useful, \n"
+" * but WITHOUT ANY WARRANTY; without even the implied warranty of \n"
+" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU \n"
+" * Lesser General Public License for more details. \n"
+" * \n"
+" * You should have received a copy of the GNU Lesser General Public \n"
+" * License along with GEGL; if not, see <http://www.gnu.org/licenses/>. \n"
+" * \n"
+" * Copyright 2013 Carlos Zubieta <czubieta.dev@gmail.com> \n"
+" */ \n"
+" \n"
+" \n"
+"__kernel void init_stretch (__global float *out_min, \n"
+" __global float *out_max) \n"
+"{ \n"
+" int gid = get_global_id (0); \n"
+" \n"
+" out_min[gid] = FLT_MAX; \n"
+" out_max[gid] = -FLT_MAX; \n"
+"} \n"
+" \n"
+"__kernel void two_stages_local_min_max_reduce (__global const float4 *in, \n"
+" __global float *out_min,\n"
+" __global float *out_max,\n"
+" __local float *aux_min,\n"
+" __local float *aux_max,\n"
+" int n_pixels)\n"
+"{ \n"
+" int gid = get_global_id(0); \n"
+" int gsize = get_global_size(0); \n"
+" int lid = get_local_id(0); \n"
+" int lsize = get_local_size(0); \n"
+" float4 min_v = (float4)( FLT_MAX); \n"
+" float4 max_v = (float4)(-FLT_MAX); \n"
+" float4 in_v; \n"
+" float aux0, aux1; \n"
+" int it; \n"
+" \n"
+" /* Loop sequentially over chunks of input vector */ \n"
+" for (it = gid; it < n_pixels; it += gsize) \n"
+" { \n"
+" in_v = in[it]; \n"
+" min_v = fmin (min_v, in_v); \n"
+" max_v = fmax (max_v, in_v); \n"
+" } \n"
+" \n"
+" /* Perform parallel reduction */ \n"
+" aux_min[lid] = min (min (min_v.x, min_v.y), min_v.z); \n"
+" aux_max[lid] = max (max (max_v.x, max_v.y), max_v.z); \n"
+" \n"
+" barrier (CLK_LOCAL_MEM_FENCE); \n"
+" \n"
+" for(it = lsize / 2; it > 0; it >>= 1) \n"
+" { \n"
+" if (lid < it) \n"
+" { \n"
+" aux0 = aux_min[lid + it]; \n"
+" aux1 = aux_min[lid]; \n"
+" aux_min[lid] = fmin (aux0, aux1); \n"
+" \n"
+" aux0 = aux_max[lid + it]; \n"
+" aux1 = aux_max[lid]; \n"
+" aux_max[lid] = fmax (aux0, aux1); \n"
+" } \n"
+" barrier (CLK_LOCAL_MEM_FENCE); \n"
+" } \n"
+" if (lid == 0) \n"
+" { \n"
+" out_min[get_group_id(0)] = aux_min[0]; \n"
+" out_max[get_group_id(0)] = aux_max[0]; \n"
+" } \n"
+" \n"
+" /* the work-group size is the size of the buffer. \n"
+" * Make sure it's fully initialized */ \n"
+" if (gid == 0) \n"
+" { \n"
+" /* No special case handling, gsize is a multiple of lsize */ \n"
+" int nb_wg = gsize / lsize; \n"
+" for (it = nb_wg; it < lsize; it++) \n"
+" { \n"
+" out_min[it] = FLT_MAX; \n"
+" out_max[it] = -FLT_MAX; \n"
+" } \n"
+" } \n"
+"} \n"
+" \n"
+"__kernel void global_min_max_reduce (__global float *in_min, \n"
+" __global float *in_max, \n"
+" __global float *out_min_max) \n"
+"{ \n"
+" int gid = get_global_id(0); \n"
+" int lid = get_local_id(0); \n"
+" int lsize = get_local_size(0); \n"
+" float aux0, aux1; \n"
+" int it; \n"
+" \n"
+" /* Perform parallel reduction */ \n"
+" for (it = lsize / 2; it > 0; it >>= 1) \n"
+" { \n"
+" if (lid < it) \n"
+" { \n"
+" aux0 = in_min[lid + it]; \n"
+" aux1 = in_min[lid]; \n"
+" in_min[gid] = fmin (aux0, aux1); \n"
+" \n"
+" aux0 = in_max[lid + it]; \n"
+" aux1 = in_max[lid]; \n"
+" in_max[gid] = fmax (aux0, aux1); \n"
+" } \n"
+" barrier (CLK_GLOBAL_MEM_FENCE); \n"
+" } \n"
+" if (lid == 0) \n"
+" { \n"
+" out_min_max[0] = in_min[gid]; \n"
+" out_min_max[1] = in_max[gid]; \n"
+" } \n"
+"} \n"
+" \n"
+"__kernel void cl_stretch_contrast (__global const float4 *in, \n"
+" __global float4 *out, \n"
+" float min, \n"
+" float diff) \n"
+"{ \n"
+" int gid = get_global_id(0); \n"
+" float4 in_v = in[gid]; \n"
+" \n"
+" in_v.xyz = (in_v.xyz - min) / diff; \n"
+" out[gid] = in_v; \n"
+"} \n"
+;