1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
|
/* This file is an image processing operation for GEGL
*
* GEGL is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* GEGL is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with GEGL; if not, see <http://www.gnu.org/licenses/>.
*
* Copyright 2013 Carlos Zubieta <czubieta.dev@gmail.com>
*/
__kernel void init_stretch (__global float4 *out_min,
__global float4 *out_max)
{
int gid = get_global_id (0);
out_min[gid] = (float4)( FLT_MAX);
out_max[gid] = (float4)(-FLT_MAX);
}
__kernel void two_stages_local_min_max_reduce (__global const float4 *in,
__global float4 *out_min,
__global float4 *out_max,
__local float4 *aux_min,
__local float4 *aux_max,
int n_pixels)
{
int gid = get_global_id(0);
int gsize = get_global_size(0);
int lid = get_local_id(0);
int lsize = get_local_size(0);
float4 min_v = (float4)( FLT_MAX);
float4 max_v = (float4)(-FLT_MAX);
float4 in_v;
float4 aux0, aux1;
int it;
/* Loop sequentially over chunks of input vector */
for (it = gid; it < n_pixels; it += gsize)
{
in_v = in[it];
min_v = min (min_v, in_v);
max_v = max (max_v, in_v);
}
/* Perform parallel reduction */
aux_min[lid] = min_v;
aux_max[lid] = max_v;
barrier (CLK_LOCAL_MEM_FENCE);
for(it = lsize / 2; it > 0; it >>= 1)
{
if (lid < it)
{
aux0 = aux_min[lid + it];
aux1 = aux_min[lid];
aux_min[lid] = min (aux0, aux1);
aux0 = aux_max[lid + it];
aux1 = aux_max[lid];
aux_max[lid] = max (aux0, aux1);
}
barrier (CLK_LOCAL_MEM_FENCE);
}
if (lid == 0)
{
out_min[get_group_id(0)] = aux_min[0];
out_max[get_group_id(0)] = aux_max[0];
}
/* the work-group size is the size of the buffer.
* Make sure it's fully initialized */
if (gid == 0)
{
/* No special case handling, gsize is a multiple of lsize */
int nb_wg = gsize / lsize;
for (it = nb_wg; it < lsize; it++)
{
out_min[it] = (float4)( FLT_MAX);
out_max[it] = (float4)(-FLT_MAX);
}
}
}
__kernel void global_min_max_reduce (__global float4 *in_min,
__global float4 *in_max,
__global float4 *out_min_max)
{
int gid = get_global_id(0);
int lid = get_local_id(0);
int lsize = get_local_size(0);
float4 aux0, aux1;
int it;
/* Perform parallel reduction */
for (it = lsize / 2; it > 0; it >>= 1)
{
if (lid < it)
{
aux0 = in_min[lid + it];
aux1 = in_min[lid];
in_min[gid] = min (aux0, aux1);
aux0 = in_max[lid + it];
aux1 = in_max[lid];
in_max[gid] = max (aux0, aux1);
}
barrier (CLK_GLOBAL_MEM_FENCE);
}
if (lid == 0)
{
out_min_max[0] = in_min[gid];
out_min_max[1] = in_max[gid];
}
}
__kernel void cl_stretch_contrast (__global const float4 *in,
__global float4 *out,
float4 min,
float4 diff)
{
int gid = get_global_id(0);
float4 in_v = in[gid];
in_v = (in_v - min) / diff;
out[gid] = in_v;
}
|