diff options
author | Carlos Zubieta <czubieta.dev@gmail.com> | 2013-09-23 00:39:50 -0500 |
---|---|---|
committer | Téo Mazars <teo.mazars@ensimag.fr> | 2013-10-31 11:41:44 +0100 |
commit | cab389d42b1067d2c518ce1baa9f3ec8c0d637c3 (patch) | |
tree | eda25c3c743e2dd9919a56cae5c65e1e25a5b8c7 /opencl | |
parent | 3beaae431979dcabc1aa4bea0a35e00b67115bc9 (diff) |
Operations: Add OpenCL support to stretch-contrast
Diffstat (limited to 'opencl')
-rw-r--r-- | opencl/stretch-contrast.cl | 136 | ||||
-rw-r--r-- | opencl/stretch-contrast.cl.h | 138 |
2 files changed, 274 insertions, 0 deletions
diff --git a/opencl/stretch-contrast.cl b/opencl/stretch-contrast.cl new file mode 100644 index 00000000..83f7ff9d --- /dev/null +++ b/opencl/stretch-contrast.cl @@ -0,0 +1,136 @@ +/* This file is an image processing operation for GEGL + * + * GEGL is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * GEGL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GEGL; if not, see <http://www.gnu.org/licenses/>. + * + * Copyright 2013 Carlos Zubieta <czubieta.dev@gmail.com> + */ + + +__kernel void init_stretch (__global float *out_min, + __global float *out_max) +{ + int gid = get_global_id (0); + + out_min[gid] = FLT_MAX; + out_max[gid] = -FLT_MAX; +} + +__kernel void two_stages_local_min_max_reduce (__global const float4 *in, + __global float *out_min, + __global float *out_max, + __local float *aux_min, + __local float *aux_max, + int n_pixels) +{ + int gid = get_global_id(0); + int gsize = get_global_size(0); + int lid = get_local_id(0); + int lsize = get_local_size(0); + float4 min_v = (float4)( FLT_MAX); + float4 max_v = (float4)(-FLT_MAX); + float4 in_v; + float aux0, aux1; + int it; + + /* Loop sequentially over chunks of input vector */ + for (it = gid; it < n_pixels; it += gsize) + { + in_v = in[it]; + min_v = fmin (min_v, in_v); + max_v = fmax (max_v, in_v); + } + + /* Perform parallel reduction */ + aux_min[lid] = min (min (min_v.x, min_v.y), min_v.z); + aux_max[lid] = max (max (max_v.x, max_v.y), max_v.z); + + barrier (CLK_LOCAL_MEM_FENCE); + + for(it = lsize / 2; it > 0; it >>= 1) + { + if (lid < it) + { + aux0 = aux_min[lid + it]; + aux1 = aux_min[lid]; + aux_min[lid] = fmin (aux0, aux1); + + aux0 = aux_max[lid + it]; + aux1 = aux_max[lid]; + aux_max[lid] = fmax (aux0, aux1); + } + barrier (CLK_LOCAL_MEM_FENCE); + } + if (lid == 0) + { + out_min[get_group_id(0)] = aux_min[0]; + out_max[get_group_id(0)] = aux_max[0]; + } + + /* the work-group size is the size of the buffer. + * Make sure it's fully initialized */ + if (gid == 0) + { + /* No special case handling, gsize is a multiple of lsize */ + int nb_wg = gsize / lsize; + for (it = nb_wg; it < lsize; it++) + { + out_min[it] = FLT_MAX; + out_max[it] = -FLT_MAX; + } + } +} + +__kernel void global_min_max_reduce (__global float *in_min, + __global float *in_max, + __global float *out_min_max) +{ + int gid = get_global_id(0); + int lid = get_local_id(0); + int lsize = get_local_size(0); + float aux0, aux1; + int it; + + /* Perform parallel reduction */ + for (it = lsize / 2; it > 0; it >>= 1) + { + if (lid < it) + { + aux0 = in_min[lid + it]; + aux1 = in_min[lid]; + in_min[gid] = fmin (aux0, aux1); + + aux0 = in_max[lid + it]; + aux1 = in_max[lid]; + in_max[gid] = fmax (aux0, aux1); + } + barrier (CLK_GLOBAL_MEM_FENCE); + } + if (lid == 0) + { + out_min_max[0] = in_min[gid]; + out_min_max[1] = in_max[gid]; + } +} + +__kernel void cl_stretch_contrast (__global const float4 *in, + __global float4 *out, + float min, + float diff) +{ + int gid = get_global_id(0); + float4 in_v = in[gid]; + + in_v.xyz = (in_v.xyz - min) / diff; + out[gid] = in_v; +} diff --git a/opencl/stretch-contrast.cl.h b/opencl/stretch-contrast.cl.h new file mode 100644 index 00000000..c4bd39ee --- /dev/null +++ b/opencl/stretch-contrast.cl.h @@ -0,0 +1,138 @@ +static const char* stretch_contrast_cl_source = +"/* This file is an image processing operation for GEGL \n" +" * \n" +" * GEGL is free software; you can redistribute it and/or \n" +" * modify it under the terms of the GNU Lesser General Public \n" +" * License as published by the Free Software Foundation; either \n" +" * version 3 of the License, or (at your option) any later version. \n" +" * \n" +" * GEGL is distributed in the hope that it will be useful, \n" +" * but WITHOUT ANY WARRANTY; without even the implied warranty of \n" +" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU \n" +" * Lesser General Public License for more details. \n" +" * \n" +" * You should have received a copy of the GNU Lesser General Public \n" +" * License along with GEGL; if not, see <http://www.gnu.org/licenses/>. \n" +" * \n" +" * Copyright 2013 Carlos Zubieta <czubieta.dev@gmail.com> \n" +" */ \n" +" \n" +" \n" +"__kernel void init_stretch (__global float *out_min, \n" +" __global float *out_max) \n" +"{ \n" +" int gid = get_global_id (0); \n" +" \n" +" out_min[gid] = FLT_MAX; \n" +" out_max[gid] = -FLT_MAX; \n" +"} \n" +" \n" +"__kernel void two_stages_local_min_max_reduce (__global const float4 *in, \n" +" __global float *out_min,\n" +" __global float *out_max,\n" +" __local float *aux_min,\n" +" __local float *aux_max,\n" +" int n_pixels)\n" +"{ \n" +" int gid = get_global_id(0); \n" +" int gsize = get_global_size(0); \n" +" int lid = get_local_id(0); \n" +" int lsize = get_local_size(0); \n" +" float4 min_v = (float4)( FLT_MAX); \n" +" float4 max_v = (float4)(-FLT_MAX); \n" +" float4 in_v; \n" +" float aux0, aux1; \n" +" int it; \n" +" \n" +" /* Loop sequentially over chunks of input vector */ \n" +" for (it = gid; it < n_pixels; it += gsize) \n" +" { \n" +" in_v = in[it]; \n" +" min_v = fmin (min_v, in_v); \n" +" max_v = fmax (max_v, in_v); \n" +" } \n" +" \n" +" /* Perform parallel reduction */ \n" +" aux_min[lid] = min (min (min_v.x, min_v.y), min_v.z); \n" +" aux_max[lid] = max (max (max_v.x, max_v.y), max_v.z); \n" +" \n" +" barrier (CLK_LOCAL_MEM_FENCE); \n" +" \n" +" for(it = lsize / 2; it > 0; it >>= 1) \n" +" { \n" +" if (lid < it) \n" +" { \n" +" aux0 = aux_min[lid + it]; \n" +" aux1 = aux_min[lid]; \n" +" aux_min[lid] = fmin (aux0, aux1); \n" +" \n" +" aux0 = aux_max[lid + it]; \n" +" aux1 = aux_max[lid]; \n" +" aux_max[lid] = fmax (aux0, aux1); \n" +" } \n" +" barrier (CLK_LOCAL_MEM_FENCE); \n" +" } \n" +" if (lid == 0) \n" +" { \n" +" out_min[get_group_id(0)] = aux_min[0]; \n" +" out_max[get_group_id(0)] = aux_max[0]; \n" +" } \n" +" \n" +" /* the work-group size is the size of the buffer. \n" +" * Make sure it's fully initialized */ \n" +" if (gid == 0) \n" +" { \n" +" /* No special case handling, gsize is a multiple of lsize */ \n" +" int nb_wg = gsize / lsize; \n" +" for (it = nb_wg; it < lsize; it++) \n" +" { \n" +" out_min[it] = FLT_MAX; \n" +" out_max[it] = -FLT_MAX; \n" +" } \n" +" } \n" +"} \n" +" \n" +"__kernel void global_min_max_reduce (__global float *in_min, \n" +" __global float *in_max, \n" +" __global float *out_min_max) \n" +"{ \n" +" int gid = get_global_id(0); \n" +" int lid = get_local_id(0); \n" +" int lsize = get_local_size(0); \n" +" float aux0, aux1; \n" +" int it; \n" +" \n" +" /* Perform parallel reduction */ \n" +" for (it = lsize / 2; it > 0; it >>= 1) \n" +" { \n" +" if (lid < it) \n" +" { \n" +" aux0 = in_min[lid + it]; \n" +" aux1 = in_min[lid]; \n" +" in_min[gid] = fmin (aux0, aux1); \n" +" \n" +" aux0 = in_max[lid + it]; \n" +" aux1 = in_max[lid]; \n" +" in_max[gid] = fmax (aux0, aux1); \n" +" } \n" +" barrier (CLK_GLOBAL_MEM_FENCE); \n" +" } \n" +" if (lid == 0) \n" +" { \n" +" out_min_max[0] = in_min[gid]; \n" +" out_min_max[1] = in_max[gid]; \n" +" } \n" +"} \n" +" \n" +"__kernel void cl_stretch_contrast (__global const float4 *in, \n" +" __global float4 *out, \n" +" float min, \n" +" float diff) \n" +"{ \n" +" int gid = get_global_id(0); \n" +" float4 in_v = in[gid]; \n" +" \n" +" in_v.xyz = (in_v.xyz - min) / diff; \n" +" out[gid] = in_v; \n" +"} \n" +; |