summaryrefslogtreecommitdiff
path: root/opencl/snn-mean.cl
blob: 8270e56b8aa211715bbfdb506edce98e33e37896 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
float colordiff (float4 pixA,
                 float4 pixB)
{
    float4 pix = pixA-pixB;
    pix *= pix;
    return pix.x+pix.y+pix.z;
}

__kernel void snn_mean (__global const   float4 *src_buf,
                                         int src_width,
                                         int src_height,
                        __global         float4 *dst_buf,
                                         int radius,
                                         int pairs)
{
    int gidx   =get_global_id(0);
    int gidy   =get_global_id(1);
    int offset =gidy * get_global_size(0) + gidx;

    __global const float4 *center_pix=
        src_buf + ((radius+gidx) + (gidy+radius)* src_width);
    float4 accumulated=0;

    int count=0;
    if(pairs==2)
    {
        for(int i=-radius;i<0;i++)
        {
            for(int j=-radius;j<0;j++)
            {
                __global const float4 *selected_pix = center_pix;
                float  best_diff = 1000.0f;

                    int xs[4]={
                        gidx+j+radius, gidx-j+radius,
                        gidx-j+radius, gidx+j+radius
                    };
                    int ys[4]={
                        gidy+i+radius, gidy-i+radius,
                        gidy+i+radius, gidy-i+radius};

                    for (int k=0;k<4;k++)
                    {
                        if (xs[k] >= 0 && xs[k] < src_width &&
                            ys[k] >= 0 && ys[k] < src_height)
                        {
                            __global const float4 *tpix =
                                src_buf + (xs[k] + ys[k] * src_width);
                            float diff=colordiff(*tpix, *center_pix);
                            if (diff < best_diff)
                            {
                                best_diff = diff;
                                selected_pix = tpix;
                            }
                        }
                    }

                accumulated += *selected_pix;

                ++count;
                if (i==0 && j==0)
                    break;
            }
        }
        dst_buf[offset] = accumulated/(float4)(count);
        return;
    }
    else if(pairs==1)
    {
        for(int i=-radius;i<=0;i++)
        {
            for(int j=-radius;j<=radius;j++)
            {
                __global const float4 *selected_pix = center_pix;
                float  best_diff = 1000.0f;

                /* skip computations for the center pixel */
                if (i != 0 && j != 0)
                {
                    int xs[4]={
                        gidx+i+radius, gidx-i+radius,
                        gidx-i+radius, gidx+i+radius
                    };
                    int ys[4]={
                        gidy+j+radius, gidy-j+radius,
                        gidy+j+radius, gidy-j+radius
                    };

                    for (i=0;i<2;i++)
                    {
                        if (xs[i] >= 0 && xs[i] < src_width &&
                            ys[i] >= 0 && ys[i] < src_height)
                        {
                            __global const float4 *tpix =
                                src_buf + (xs[i] + ys[i] * src_width);
                            float diff=colordiff (*tpix, *center_pix);
                            if (diff < best_diff)
                            {
                                best_diff = diff;
                                selected_pix = tpix;
                            }
                        }
                    }
                }
                accumulated += *selected_pix;
                ++count;
                if (i==0 && j==0)
                    break;
            }
        }
        dst_buf[offset] = accumulated / (float4)(count);
        return;
    }
    return;
}