/* This file is an image processing operation for GEGL
*
* GEGL is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* GEGL is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with GEGL; if not, see .
*
* Copyright 2013 Carlos Zubieta
*/
__kernel void init_stretch (__global float4 *out_min,
__global float4 *out_max)
{
int gid = get_global_id (0);
out_min[gid] = (float4)( FLT_MAX);
out_max[gid] = (float4)(-FLT_MAX);
}
__kernel void two_stages_local_min_max_reduce (__global const float4 *in,
__global float4 *out_min,
__global float4 *out_max,
__local float4 *aux_min,
__local float4 *aux_max,
int n_pixels)
{
int gid = get_global_id(0);
int gsize = get_global_size(0);
int lid = get_local_id(0);
int lsize = get_local_size(0);
float4 min_v = (float4)( FLT_MAX);
float4 max_v = (float4)(-FLT_MAX);
float4 in_v;
float4 aux0, aux1;
int it;
/* Loop sequentially over chunks of input vector */
for (it = gid; it < n_pixels; it += gsize)
{
in_v = in[it];
min_v = min (min_v, in_v);
max_v = max (max_v, in_v);
}
/* Perform parallel reduction */
aux_min[lid] = min_v;
aux_max[lid] = max_v;
barrier (CLK_LOCAL_MEM_FENCE);
for(it = lsize / 2; it > 0; it >>= 1)
{
if (lid < it)
{
aux0 = aux_min[lid + it];
aux1 = aux_min[lid];
aux_min[lid] = min (aux0, aux1);
aux0 = aux_max[lid + it];
aux1 = aux_max[lid];
aux_max[lid] = max (aux0, aux1);
}
barrier (CLK_LOCAL_MEM_FENCE);
}
if (lid == 0)
{
out_min[get_group_id(0)] = aux_min[0];
out_max[get_group_id(0)] = aux_max[0];
}
/* the work-group size is the size of the buffer.
* Make sure it's fully initialized */
if (gid == 0)
{
/* No special case handling, gsize is a multiple of lsize */
int nb_wg = gsize / lsize;
for (it = nb_wg; it < lsize; it++)
{
out_min[it] = (float4)( FLT_MAX);
out_max[it] = (float4)(-FLT_MAX);
}
}
}
__kernel void global_min_max_reduce (__global float4 *in_min,
__global float4 *in_max,
__global float4 *out_min_max)
{
int gid = get_global_id(0);
int lid = get_local_id(0);
int lsize = get_local_size(0);
float4 aux0, aux1;
int it;
/* Perform parallel reduction */
for (it = lsize / 2; it > 0; it >>= 1)
{
if (lid < it)
{
aux0 = in_min[lid + it];
aux1 = in_min[lid];
in_min[gid] = min (aux0, aux1);
aux0 = in_max[lid + it];
aux1 = in_max[lid];
in_max[gid] = max (aux0, aux1);
}
barrier (CLK_GLOBAL_MEM_FENCE);
}
if (lid == 0)
{
out_min_max[0] = in_min[gid];
out_min_max[1] = in_max[gid];
}
}
__kernel void cl_stretch_contrast (__global const float4 *in,
__global float4 *out,
float4 min,
float4 diff)
{
int gid = get_global_id(0);
float4 in_v = in[gid];
in_v = (in_v - min) / diff;
out[gid] = in_v;
}