Operations: Add OpenCL support to stretch-contrast

author: Carlos Zubieta <czubieta.dev@gmail.com> 2013-09-23 00:39:50 -0500
committer: Téo Mazars <teo.mazars@ensimag.fr> 2013-10-31 11:41:44 +0100
commit: cab389d42b1067d2c518ce1baa9f3ec8c0d637c3 (patch)
tree: eda25c3c743e2dd9919a56cae5c65e1e25a5b8c7 /opencl
parent: 3beaae431979dcabc1aa4bea0a35e00b67115bc9 (diff)
2 files changed, 274 insertions, 0 deletions
diff --git a/opencl/stretch-contrast.cl b/opencl/stretch-contrast.cl
new file mode 100644
index 00000000..83f7ff9d
--- /dev/null
+++ b/opencl/stretch-contrast.cl
@@ -0,0 +1,136 @@
+/* This file is an image processing operation for GEGL
+ *
+ * GEGL is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * GEGL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GEGL; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright 2013 Carlos Zubieta <czubieta.dev@gmail.com>
+ */
+
+
+__kernel void init_stretch (__global float *out_min,
+                            __global float *out_max)
+{
+  int gid = get_global_id (0);
+
+  out_min[gid] =  FLT_MAX;
+  out_max[gid] = -FLT_MAX;
+}
+
+__kernel void two_stages_local_min_max_reduce (__global const float4 *in,
+                                               __global       float  *out_min,
+                                               __global       float  *out_max,
+                                               __local        float  *aux_min,
+                                               __local        float  *aux_max,
+                                                              int    n_pixels)
+{
+  int    gid   = get_global_id(0);
+  int    gsize = get_global_size(0);
+  int    lid   = get_local_id(0);
+  int    lsize = get_local_size(0);
+  float4 min_v = (float4)( FLT_MAX);
+  float4 max_v = (float4)(-FLT_MAX);
+  float4 in_v;
+  float  aux0, aux1;
+  int    it;
+
+  /* Loop sequentially over chunks of input vector */
+  for (it = gid; it < n_pixels; it += gsize)
+    {
+      in_v  =  in[it];
+      min_v =  fmin (min_v, in_v);
+      max_v =  fmax (max_v, in_v);
+    }
+
+  /* Perform parallel reduction */
+  aux_min[lid] = min (min (min_v.x, min_v.y), min_v.z);
+  aux_max[lid] = max (max (max_v.x, max_v.y), max_v.z);
+
+  barrier (CLK_LOCAL_MEM_FENCE);
+
+  for(it = lsize / 2; it > 0; it >>= 1)
+    {
+      if (lid < it)
+        {
+          aux0         = aux_min[lid + it];
+          aux1         = aux_min[lid];
+          aux_min[lid] = fmin (aux0, aux1);
+
+          aux0         = aux_max[lid + it];
+          aux1         = aux_max[lid];
+          aux_max[lid] = fmax (aux0, aux1);
+        }
+      barrier (CLK_LOCAL_MEM_FENCE);
+  }
+  if (lid == 0)
+    {
+      out_min[get_group_id(0)] = aux_min[0];
+      out_max[get_group_id(0)] = aux_max[0];
+    }
+
+  /* the work-group size is the size of the buffer.
+   * Make sure it's fully initialized */
+  if (gid == 0)
+    {
+      /* No special case handling, gsize is a multiple of lsize */
+      int nb_wg = gsize / lsize;
+      for (it = nb_wg; it < lsize; it++)
+        {
+          out_min[it] =  FLT_MAX;
+          out_max[it] = -FLT_MAX;
+        }
+    }
+}
+
+__kernel void global_min_max_reduce (__global float *in_min,
+                                     __global float *in_max,
+                                     __global float *out_min_max)
+{
+  int   gid   = get_global_id(0);
+  int   lid   = get_local_id(0);
+  int   lsize = get_local_size(0);
+  float aux0, aux1;
+  int   it;
+
+  /* Perform parallel reduction */
+  for (it = lsize / 2; it > 0; it >>= 1)
+    {
+      if (lid < it)
+        {
+          aux0        = in_min[lid + it];
+          aux1        = in_min[lid];
+          in_min[gid] = fmin (aux0, aux1);
+
+          aux0        = in_max[lid + it];
+          aux1        = in_max[lid];
+          in_max[gid] = fmax (aux0, aux1);
+        }
+      barrier (CLK_GLOBAL_MEM_FENCE);
+  }
+  if (lid == 0)
+    {
+      out_min_max[0] = in_min[gid];
+      out_min_max[1] = in_max[gid];
+    }
+}
+
+__kernel void cl_stretch_contrast (__global const float4 *in,
+                                   __global       float4 *out,
+                                                  float   min,
+                                                  float   diff)
+{
+  int    gid  = get_global_id(0);
+  float4 in_v = in[gid];
+
+  in_v.xyz = (in_v.xyz - min) / diff;
+  out[gid] = in_v;
+}
diff --git a/opencl/stretch-contrast.cl.h b/opencl/stretch-contrast.cl.h
new file mode 100644
index 00000000..c4bd39ee
--- /dev/null
+++ b/opencl/stretch-contrast.cl.h
@@ -0,0 +1,138 @@
+static const char* stretch_contrast_cl_source =
+"/* This file is an image processing operation for GEGL                        \n"
+" *                                                                            \n"
+" * GEGL is free software; you can redistribute it and/or                      \n"
+" * modify it under the terms of the GNU Lesser General Public                 \n"
+" * License as published by the Free Software Foundation; either               \n"
+" * version 3 of the License, or (at your option) any later version.           \n"
+" *                                                                            \n"
+" * GEGL is distributed in the hope that it will be useful,                    \n"
+" * but WITHOUT ANY WARRANTY; without even the implied warranty of             \n"
+" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU          \n"
+" * Lesser General Public License for more details.                            \n"
+" *                                                                            \n"
+" * You should have received a copy of the GNU Lesser General Public           \n"
+" * License along with GEGL; if not, see <http://www.gnu.org/licenses/>.       \n"
+" *                                                                            \n"
+" * Copyright 2013 Carlos Zubieta <czubieta.dev@gmail.com>                     \n"
+" */                                                                           \n"
+"                                                                              \n"
+"                                                                              \n"
+"__kernel void init_stretch (__global float *out_min,                          \n"
+"                            __global float *out_max)                          \n"
+"{                                                                             \n"
+"  int gid = get_global_id (0);                                                \n"
+"                                                                              \n"
+"  out_min[gid] =  FLT_MAX;                                                    \n"
+"  out_max[gid] = -FLT_MAX;                                                    \n"
+"}                                                                             \n"
+"                                                                              \n"
+"__kernel void two_stages_local_min_max_reduce (__global const float4 *in,     \n"
+"                                               __global       float  *out_min,\n"
+"                                               __global       float  *out_max,\n"
+"                                               __local        float  *aux_min,\n"
+"                                               __local        float  *aux_max,\n"
+"                                                              int    n_pixels)\n"
+"{                                                                             \n"
+"  int    gid   = get_global_id(0);                                            \n"
+"  int    gsize = get_global_size(0);                                          \n"
+"  int    lid   = get_local_id(0);                                             \n"
+"  int    lsize = get_local_size(0);                                           \n"
+"  float4 min_v = (float4)( FLT_MAX);                                          \n"
+"  float4 max_v = (float4)(-FLT_MAX);                                          \n"
+"  float4 in_v;                                                                \n"
+"  float  aux0, aux1;                                                          \n"
+"  int    it;                                                                  \n"
+"                                                                              \n"
+"  /* Loop sequentially over chunks of input vector */                         \n"
+"  for (it = gid; it < n_pixels; it += gsize)                                  \n"
+"    {                                                                         \n"
+"      in_v  =  in[it];                                                        \n"
+"      min_v =  fmin (min_v, in_v);                                            \n"
+"      max_v =  fmax (max_v, in_v);                                            \n"
+"    }                                                                         \n"
+"                                                                              \n"
+"  /* Perform parallel reduction */                                            \n"
+"  aux_min[lid] = min (min (min_v.x, min_v.y), min_v.z);                       \n"
+"  aux_max[lid] = max (max (max_v.x, max_v.y), max_v.z);                       \n"
+"                                                                              \n"
+"  barrier (CLK_LOCAL_MEM_FENCE);                                              \n"
+"                                                                              \n"
+"  for(it = lsize / 2; it > 0; it >>= 1)                                       \n"
+"    {                                                                         \n"
+"      if (lid < it)                                                           \n"
+"        {                                                                     \n"
+"          aux0         = aux_min[lid + it];                                   \n"
+"          aux1         = aux_min[lid];                                        \n"
+"          aux_min[lid] = fmin (aux0, aux1);                                   \n"
+"                                                                              \n"
+"          aux0         = aux_max[lid + it];                                   \n"
+"          aux1         = aux_max[lid];                                        \n"
+"          aux_max[lid] = fmax (aux0, aux1);                                   \n"
+"        }                                                                     \n"
+"      barrier (CLK_LOCAL_MEM_FENCE);                                          \n"
+"  }                                                                           \n"
+"  if (lid == 0)                                                               \n"
+"    {                                                                         \n"
+"      out_min[get_group_id(0)] = aux_min[0];                                  \n"
+"      out_max[get_group_id(0)] = aux_max[0];                                  \n"
+"    }                                                                         \n"
+"                                                                              \n"
+"  /* the work-group size is the size of the buffer.                           \n"
+"   * Make sure it's fully initialized */                                      \n"
+"  if (gid == 0)                                                               \n"
+"    {                                                                         \n"
+"      /* No special case handling, gsize is a multiple of lsize */            \n"
+"      int nb_wg = gsize / lsize;                                              \n"
+"      for (it = nb_wg; it < lsize; it++)                                      \n"
+"        {                                                                     \n"
+"          out_min[it] =  FLT_MAX;                                             \n"
+"          out_max[it] = -FLT_MAX;                                             \n"
+"        }                                                                     \n"
+"    }                                                                         \n"
+"}                                                                             \n"
+"                                                                              \n"
+"__kernel void global_min_max_reduce (__global float *in_min,                  \n"
+"                                     __global float *in_max,                  \n"
+"                                     __global float *out_min_max)             \n"
+"{                                                                             \n"
+"  int   gid   = get_global_id(0);                                             \n"
+"  int   lid   = get_local_id(0);                                              \n"
+"  int   lsize = get_local_size(0);                                            \n"
+"  float aux0, aux1;                                                           \n"
+"  int   it;                                                                   \n"
+"                                                                              \n"
+"  /* Perform parallel reduction */                                            \n"
+"  for (it = lsize / 2; it > 0; it >>= 1)                                      \n"
+"    {                                                                         \n"
+"      if (lid < it)                                                           \n"
+"        {                                                                     \n"
+"          aux0        = in_min[lid + it];                                     \n"
+"          aux1        = in_min[lid];                                          \n"
+"          in_min[gid] = fmin (aux0, aux1);                                    \n"
+"                                                                              \n"
+"          aux0        = in_max[lid + it];                                     \n"
+"          aux1        = in_max[lid];                                          \n"
+"          in_max[gid] = fmax (aux0, aux1);                                    \n"
+"        }                                                                     \n"
+"      barrier (CLK_GLOBAL_MEM_FENCE);                                         \n"
+"  }                                                                           \n"
+"  if (lid == 0)                                                               \n"
+"    {                                                                         \n"
+"      out_min_max[0] = in_min[gid];                                           \n"
+"      out_min_max[1] = in_max[gid];                                           \n"
+"    }                                                                         \n"
+"}                                                                             \n"
+"                                                                              \n"
+"__kernel void cl_stretch_contrast (__global const float4 *in,                 \n"
+"                                   __global       float4 *out,                \n"
+"                                                  float   min,                \n"
+"                                                  float   diff)               \n"
+"{                                                                             \n"
+"  int    gid  = get_global_id(0);                                             \n"
+"  float4 in_v = in[gid];                                                      \n"
+"                                                                              \n"
+"  in_v.xyz = (in_v.xyz - min) / diff;                                         \n"
+"  out[gid] = in_v;                                                            \n"
+"}                                                                             \n"
+;
author	Carlos Zubieta <czubieta.dev@gmail.com>	2013-09-23 00:39:50 -0500
committer	Téo Mazars <teo.mazars@ensimag.fr>	2013-10-31 11:41:44 +0100
commit	cab389d42b1067d2c518ce1baa9f3ec8c0d637c3 (patch)
tree	eda25c3c743e2dd9919a56cae5c65e1e25a5b8c7 /opencl
parent	3beaae431979dcabc1aa4bea0a35e00b67115bc9 (diff)