summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRPG <roshin@scriptumplus.ru>2012-12-29 20:08:31 -0200
committerVictor Oliveira <victormatheus@gmail.com>2012-12-29 20:23:03 -0200
commit295863957b16472de192638b2bf3ea487edd2f36 (patch)
tree57240b2ddf8dbbf98dc4eed3ddbf7a7ee5dffc85
parent4bf52f7b7703841a4798d0a355342ba1b60880fe (diff)
Performance improvements in oilfy and opencl support.
-rw-r--r--operations/common/oilify.c458
1 files changed, 365 insertions, 93 deletions
diff --git a/operations/common/oilify.c b/operations/common/oilify.c
index e5cac3c9..305cfb48 100644
--- a/operations/common/oilify.c
+++ b/operations/common/oilify.c
@@ -27,9 +27,12 @@
gegl_chant_int (mask_radius, _("Mask Radius"), 1, 25, 4,
_("Radius of circle around pixel"))
-gegl_chant_double (exponent, _("Exponent"), 1.0, 20.0, 8.0,
+gegl_chant_int (exponent, _("Exponent"), 1, 20, 8,
_("Exponent"))
+gegl_chant_int (intensities, _("Number of intensities"), 8, 256, 128,
+ _("Histogram size"))
+
gegl_chant_boolean (use_inten, _("Intensity Mode"), TRUE,
_("Use pixel luminance values"))
@@ -58,7 +61,6 @@ get_pixel (gint x,
dst[b] = src[b];
}
}
-
static void
get_pixel_inten (gint x,
gint y,
@@ -68,45 +70,35 @@ get_pixel_inten (gint x,
{
*dst = *(inten_begin + (x + buf_width*y));
}
-
static void
-oilify_pixel (gint x,
- gint y,
- GeglRectangle *whole_rect,
- gboolean use_inten,
- gdouble radius,
- gdouble exponent,
- gint buf_width,
- gfloat *src_buf,
- gfloat *inten_buf,
- gfloat *dst_pixel)
+oilify_pixel_inten (gint x,
+ gint y,
+ gdouble radius,
+ gint exponent,
+ gint intensities,
+ gint buf_width,
+ gfloat *src_buf,
+ gfloat *inten_buf,
+ gfloat *dst_pixel)
{
- gint hist[4][NUM_INTENSITIES];
gfloat cumulative_rgb[4][NUM_INTENSITIES];
gint hist_inten[NUM_INTENSITIES];
- gfloat mult_inten[NUM_INTENSITIES];
+ gfloat mult_inten;
gfloat temp_pixel[4];
- gfloat temp_inten_pixel;
gint ceil_radius = ceil (radius);
gdouble radius_sq = radius*radius;
gint i, j, b;
- gint hist_max[4];
gint inten_max;
gint intensity;
- gfloat sum;
- gfloat ratio;
+ gfloat ratio, temp_inten_pixel;
gfloat weight;
- gfloat color;
- gfloat result;
+ gfloat color[4];
gfloat div;
- for (i = 0; i < NUM_INTENSITIES; i++)
+ for (i = 0; i < intensities; i++)
{
hist_inten[i] = 0;
for (b = 0; b < 4; b++)
- {
- hist[b][i] = 0;
cumulative_rgb[b][i] = 0.0;
- }
}
/* calculate histograms */
@@ -121,28 +113,16 @@ oilify_pixel (gint x,
buf_width,
src_buf,
temp_pixel);
-
- if (use_inten)
- {
- get_pixel_inten (x + i,
- y + j,
- buf_width,
- inten_buf,
- &temp_inten_pixel);
- intensity = temp_inten_pixel * NUM_INTENSITIES;
- hist_inten[intensity]++;
- for (b = 0; b < 4; b++)
- {
- cumulative_rgb[b][intensity] += temp_pixel[b];
- }
- }
- else
+ get_pixel_inten (x + i,
+ y + j,
+ buf_width,
+ inten_buf,
+ &temp_inten_pixel);
+ intensity = temp_inten_pixel * (intensities - 1);
+ hist_inten[intensity]++;
+ for (b = 0; b < 4; b++)
{
- for (b = 0; b < 4; b++)
- {
- intensity = temp_pixel[b] * NUM_INTENSITIES;
- hist[b][intensity]++;
- }
+ cumulative_rgb[b][intensity] += temp_pixel[b];
}
}
}
@@ -151,57 +131,128 @@ oilify_pixel (gint x,
inten_max = 1;
/* calculated maximums */
- for (i = 0; i < NUM_INTENSITIES; i++) {
+ for (i = 0; i < intensities; i++) {
inten_max = MAX (inten_max, hist_inten[i]);
}
+ /* calculate weight and use it to set the pixel */
+ div = 0.0;
+
for (b = 0; b < 4; b++)
+ color[b] = 0.0;
+ for (i = 0; i < intensities; i++)
{
- hist_max[b] = 1;
- for (i = 0; i < NUM_INTENSITIES; i++) {
- hist_max[b] = MAX (hist_max[b], hist[b][i]);
+ if (hist_inten[i] > 0)
+ {
+ ratio = (gfloat) hist_inten[i] / (gfloat) inten_max;
+
+ /* using this instead of pow function gives HUGE performance improvement
+ but we cannot use floating point exponent... */
+ weight = 1.;
+ for(j = 0; j < exponent; j++)
+ weight *= ratio;
+ /* weight = powf(ratio, exponent); */
+ mult_inten = weight / (gfloat) hist_inten[i];
+
+ div += weight;
+ for (b = 0; b < 4; b++)
+ color[b] += mult_inten * cumulative_rgb[b][i];
}
}
+ for (b = 0; b < 4; b++)
+ dst_pixel[b] = color[b]/div;
+}
- /* calculate weight and use it to set the pixel */
- div = 0.0;
- for (i = 0; i < NUM_INTENSITIES; i++)
+static void
+oilify_pixel (gint x,
+ gint y,
+ gdouble radius,
+ gint exponent,
+ gint intensities,
+ gint buf_width,
+ gfloat *src_buf,
+ gfloat *dst_pixel)
+{
+ gint hist[4][NUM_INTENSITIES];
+ gfloat temp_pixel[4];
+ gint ceil_radius = ceil (radius);
+ gdouble radius_sq = radius*radius;
+ gint i, j, b;
+ gint hist_max[4];
+ gint intensity;
+ gfloat sum[4];
+ gfloat ratio;
+ gfloat weight;
+ gfloat result[4];
+ gfloat div[4];
+ for (i = 0; i < intensities; i++)
{
- if (use_inten && hist_inten[i] > 0)
+ for (b = 0; b < 4; b++)
{
- ratio = (gfloat) hist_inten[i] / (gfloat) inten_max;
- weight = pow (ratio, exponent);
- mult_inten[i] = weight / (gfloat) hist_inten[i];
- div += weight;
+ hist[b][i] = 0;
}
}
- for (b = 0; b < 4; b++)
+
+ /* calculate histograms */
+ for (i = -ceil_radius; i <= ceil_radius; i++)
{
- sum = 0.0;
- color = 0.0;
- if (use_inten)
- {
- for (i = 0; i < NUM_INTENSITIES; i++)
- {
- if (hist_inten[i] > 0)
- color += mult_inten[i] * cumulative_rgb[b][i];
- }
- dst_pixel[b] = color/div;
- }
- else
+ for (j = -ceil_radius; j <= ceil_radius; j++)
{
- div = 0.0;
- for (i = 0; i < NUM_INTENSITIES; i++)
+ if (i*i + j*j <= radius_sq)
{
- ratio = (gfloat) hist[b][i] / (gfloat) hist_max[b];
- weight = pow (ratio, exponent);
- sum += weight * (gfloat) i;
- div += weight;
+ get_pixel (x + i,
+ y + j,
+ buf_width,
+ src_buf,
+ temp_pixel);
+ for (b = 0; b < 4; b++)
+ {
+ intensity = temp_pixel[b] * (intensities - 1);
+ hist[b][intensity]++;
+ }
}
- result = sum / (gfloat) NUM_INTENSITIES;
- dst_pixel[b] = result/div;
}
}
+ for (b = 0; b < 4; b++)
+ hist_max[b] = 1;
+ for (i = 0; i < intensities; i++) {
+ for (b = 0; b < 4; b++)
+ if(hist_max[b] < hist[b][i]) /* MAX macros too slow here */
+ hist_max[b] = hist[b][i];
+ }
+
+ /* calculate weight and use it to set the pixel */
+
+ for (b = 0; b < 4; b++)
+ {
+ sum[b] = 0.0;
+ div[b] = 0.0;
+ }
+ for (i = 0; i < intensities; i++)
+ {
+ /* UNROLL this bottleneck loop, up to 50% faster */
+ #define DO_HIST_STEP(b) if(hist[b][i] > 0) \
+ { \
+ ratio = (gfloat) hist[b][i] / (gfloat) hist_max[b]; \
+ weight = 1.; \
+ for(j = 0; j < exponent; j++) \
+ weight *= ratio; \
+ sum[b] += weight * (gfloat) i; \
+ div[b] += weight; \
+ }
+
+ DO_HIST_STEP(0)
+ DO_HIST_STEP(1)
+ DO_HIST_STEP(2)
+ DO_HIST_STEP(3)
+ #undef DO_HIST_STEP
+ }
+
+ for (b = 0; b < 4; b++)
+ {
+ result[b] = sum[b] / (gfloat) (intensities - 1);
+ dst_pixel[b] = result[b]/div[b];
+ }
}
static void
@@ -214,9 +265,9 @@ prepare (GeglOperation *operation)
o = GEGL_CHANT_PROPERTIES (operation);
op_area->left =
- op_area->right =
- op_area->top =
- op_area->bottom = o->mask_radius;
+ op_area->right =
+ op_area->top =
+ op_area->bottom = o->mask_radius;
gegl_operation_set_format (operation, "input",
babl_format ("RGBA float"));
@@ -224,6 +275,218 @@ prepare (GeglOperation *operation)
babl_format ("RGBA float"));
}
+#include "opencl/gegl-cl.h"
+#include "buffer/gegl-buffer-cl-iterator.h"
+
+/* two small different kernels are better than one big */
+static const char* kernel_source =
+"#define NUM_INTENSITIES 256 \n"
+"kernel void kernel_oilify(global float4 *in, \n"
+" global float4 *out, \n"
+" const int mask_radius, \n"
+" const int intensities, \n"
+" const float exponent) \n"
+"{ \n"
+" int gidx = get_global_id(0); \n"
+" int gidy = get_global_id(1); \n"
+" int x = gidx + mask_radius; \n"
+" int y = gidy + mask_radius; \n"
+" int dst_width = get_global_size(0); \n"
+" int src_width = dst_width + mask_radius * 2; \n"
+" float4 hist[NUM_INTENSITIES]; \n"
+" float4 hist_max = 1.0; \n"
+" int i, j, intensity; \n"
+" int radius_sq = mask_radius * mask_radius; \n"
+" float4 temp_pixel; \n"
+" for (i = 0; i < intensities; i++) \n"
+" hist[i] = 0.0; \n"
+" \n"
+" for (i = -mask_radius; i <= mask_radius; i++) \n"
+" { \n"
+" for (j = -mask_radius; j <= mask_radius; j++) \n"
+" { \n"
+" if (i*i + j*j <= radius_sq) \n"
+" { \n"
+" temp_pixel = in[x + i + (y + j) * src_width]; \n"
+" hist[(int)(temp_pixel.x * (intensities - 1))].x+=1; \n"
+" hist[(int)(temp_pixel.y * (intensities - 1))].y+=1; \n"
+" hist[(int)(temp_pixel.z * (intensities - 1))].z+=1; \n"
+" hist[(int)(temp_pixel.w * (intensities - 1))].w+=1; \n"
+" } \n"
+" } \n"
+" } \n"
+" \n"
+" for (i = 0; i < intensities; i++) { \n"
+" if(hist_max.x < hist[i].x) \n"
+" hist_max.x = hist[i].x; \n"
+" if(hist_max.y < hist[i].y) \n"
+" hist_max.y = hist[i].y; \n"
+" if(hist_max.z < hist[i].z) \n"
+" hist_max.z = hist[i].z; \n"
+" if(hist_max.w < hist[i].w) \n"
+" hist_max.w = hist[i].w; \n"
+" } \n"
+" float4 div = 0.0; \n"
+" float4 sum = 0.0; \n"
+" float4 ratio, weight; \n"
+" for (i = 0; i < intensities; i++) \n"
+" { \n"
+" ratio = hist[i] / hist_max; \n"
+" weight = pow(ratio, (float4)exponent); \n"
+" sum += weight * (float4)i; \n"
+" div += weight; \n"
+" } \n"
+" out[gidx + gidy * dst_width] = sum / div / (float)(intensities - 1); \n"
+"} \n"
+" \n"
+"kernel void kernel_oilify_inten(global float4 *in, \n"
+" global float4 *out, \n"
+" const int mask_radius, \n"
+" const int intensities, \n"
+" const float exponent) \n"
+"{ \n"
+" int gidx = get_global_id(0); \n"
+" int gidy = get_global_id(1); \n"
+" int x = gidx + mask_radius; \n"
+" int y = gidy + mask_radius; \n"
+" int dst_width = get_global_size(0); \n"
+" int src_width = dst_width + mask_radius * 2; \n"
+" float4 cumulative_rgb[NUM_INTENSITIES]; \n"
+" int hist_inten[NUM_INTENSITIES], inten_max; \n"
+" int i, j, intensity; \n"
+" int radius_sq = mask_radius * mask_radius; \n"
+" float4 temp_pixel; \n"
+" for (i = 0; i < intensities; i++) \n"
+" { \n"
+" hist_inten[i] = 0; \n"
+" cumulative_rgb[i] = 0.0; \n"
+" } \n"
+" for (i = -mask_radius; i <= mask_radius; i++) \n"
+" { \n"
+" for (j = -mask_radius; j <= mask_radius; j++) \n"
+" { \n"
+" if (i*i + j*j <= radius_sq) \n"
+" { \n"
+" temp_pixel = in[x + i + (y + j) * src_width]; \n"
+" /*Calculate intensity on the fly, GPU does it fast*/ \n"
+" intensity = (int)((0.299 * temp_pixel.x \n"
+" +0.587 * temp_pixel.y \n"
+" +0.114 * temp_pixel.z) * (float)(intensities-1)); \n"
+" hist_inten[intensity] += 1; \n"
+" cumulative_rgb[intensity] += temp_pixel; \n"
+" } \n"
+" } \n"
+" } \n"
+" inten_max = 1; \n"
+" \n"
+" /* calculated maximums */ \n"
+" for (i = 0; i < intensities; i++) { \n"
+" if(hist_inten[i] > inten_max) \n"
+" inten_max = hist_inten[i]; \n"
+" } \n"
+" float div = 0.0; \n"
+" float ratio, weight, mult_inten; \n"
+" \n"
+" float4 color = 0.0; \n"
+" for (i = 0; i < intensities; i++) \n"
+" { \n"
+" if (hist_inten[i] > 0) \n"
+" { \n"
+" ratio = (float)(hist_inten[i]) / (float)(inten_max); \n"
+" weight = pow(ratio, exponent); \n"
+" mult_inten = weight / (float)(hist_inten[i]); \n"
+" \n"
+" div += weight; \n"
+" color += mult_inten * cumulative_rgb[i]; \n"
+" } \n"
+" } \n"
+" out[gidx + gidy * dst_width] = color/div; \n"
+"} \n";
+
+
+static GeglClRunData *cl_data = NULL;
+
+static cl_int
+cl_oilify (cl_mem in_tex,
+ cl_mem out_tex,
+ size_t global_worksize,
+ const GeglRectangle *roi,
+ gint mask_radius,
+ gint number_of_intensities,
+ gint exponent,
+ gboolean use_inten)
+{
+ if (!cl_data)
+ {
+ const char *kernel_name[] = {"kernel_oilify", "kernel_oilify_inten", NULL};
+ cl_data = gegl_cl_compile_and_build(kernel_source, kernel_name);
+ }
+ if (!cl_data) return 0;
+
+ const size_t gbl_size[2] = {roi->width,roi->height};
+ cl_int radius = mask_radius;
+ cl_int intensities = number_of_intensities;
+ cl_float exp = (gfloat)exponent;
+ cl_int cl_err = 0;
+ gint arg = 0;
+
+ /* simple hack: select suitable kernel using boolean, 0 - no intensity mode, 1 - intensity mode */
+ cl_err |= gegl_clSetKernelArg(cl_data->kernel[use_inten], arg++, sizeof(cl_mem), (void*)&in_tex);
+ cl_err |= gegl_clSetKernelArg(cl_data->kernel[use_inten], arg++, sizeof(cl_mem), (void*)&out_tex);
+ cl_err |= gegl_clSetKernelArg(cl_data->kernel[use_inten], arg++, sizeof(cl_int), (void*)&radius);
+ cl_err |= gegl_clSetKernelArg(cl_data->kernel[use_inten], arg++, sizeof(cl_int), (void*)&intensities);
+ cl_err |= gegl_clSetKernelArg(cl_data->kernel[use_inten], arg++, sizeof(cl_float), (void*)&exp);
+ if (cl_err != CL_SUCCESS) return cl_err;
+
+ cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue(),
+ cl_data->kernel[use_inten], 2,
+ NULL, gbl_size, NULL,
+ 0, NULL, NULL);
+ if (cl_err != CL_SUCCESS) return cl_err;
+
+ return CL_SUCCESS;
+}
+
+static gboolean
+cl_process (GeglOperation *operation,
+ GeglBuffer *input,
+ GeglBuffer *output,
+ const GeglRectangle *result)
+{
+ const Babl *in_format = gegl_operation_get_format (operation, "input");
+ const Babl *out_format = gegl_operation_get_format (operation, "output");
+ gint err;
+ gint j;
+ cl_int cl_err;
+
+ GeglOperationAreaFilter *op_area = GEGL_OPERATION_AREA_FILTER (operation);
+ GeglChantO *o = GEGL_CHANT_PROPERTIES (operation);
+
+ GeglBufferClIterator *i = gegl_buffer_cl_iterator_new (output,result, out_format, GEGL_CL_BUFFER_WRITE, GEGL_ABYSS_NONE);
+ gint read = gegl_buffer_cl_iterator_add_2 (i, input, result, in_format, GEGL_CL_BUFFER_READ,
+ o->mask_radius, o->mask_radius, o->mask_radius, o->mask_radius, GEGL_ABYSS_NONE);
+ while (gegl_buffer_cl_iterator_next (i, &err))
+ {
+ if (err) return FALSE;
+ for (j=0; j < i->n; j++)
+ {
+ cl_err = cl_oilify(i->tex[read][j],
+ i->tex[0][j],
+ i->size[0][j],&i->roi[0][j],
+ o->mask_radius,
+ o->intensities,
+ o->exponent,
+ o->use_inten);
+ if (cl_err != CL_SUCCESS)
+ {
+ g_warning("[OpenCL] Error in gegl:oilify: %s", gegl_cl_errstring(cl_err));
+ return FALSE;
+ }
+ }
+ }
+ return TRUE;
+}
+
static gboolean
process (GeglOperation *operation,
GeglBuffer *input,
@@ -234,7 +497,6 @@ process (GeglOperation *operation,
GeglChantO *o = GEGL_CHANT_PROPERTIES (operation);
GeglOperationAreaFilter *op_area = GEGL_OPERATION_AREA_FILTER (operation);
- GeglRectangle* whole_rect;
gint x = o->mask_radius; /* initial x */
gint y = o->mask_radius; /* and y coordinates */
gfloat *src_buf;
@@ -245,7 +507,9 @@ process (GeglOperation *operation,
GeglRectangle src_rect;
gint total_pixels;
- whole_rect = gegl_operation_source_get_bounding_box (operation, "input");
+ if (gegl_cl_is_accelerated ())
+ if(cl_process(operation, input, output, result))
+ return TRUE;
src_rect.x = result->x - op_area->left;
src_rect.width = result->width + op_area->left + op_area->right;
@@ -255,21 +519,27 @@ process (GeglOperation *operation,
total_pixels = src_rect.width * src_rect.height;
src_buf = g_slice_alloc (4 * total_pixels * sizeof (gfloat));
- dst_buf = g_slice_alloc (4 * total_pixels * sizeof (gfloat));
- inten_buf = g_slice_alloc (total_pixels * sizeof (gfloat));
+ dst_buf = g_slice_alloc (4 * n_pixels * sizeof (gfloat));
+ if(o->use_inten)
+ inten_buf = g_slice_alloc (total_pixels * sizeof (gfloat));
gegl_buffer_get (input, &src_rect, 1.0, babl_format ("RGBA float"),
- src_buf, GEGL_AUTO_ROWSTRIDE, GEGL_ABYSS_NONE);
+ src_buf, GEGL_AUTO_ROWSTRIDE, GEGL_ABYSS_CLAMP);
- gegl_buffer_get (input, &src_rect, 1.0, babl_format ("Y float"),
- inten_buf, GEGL_AUTO_ROWSTRIDE, GEGL_ABYSS_NONE);
+ if(o->use_inten)
+ gegl_buffer_get (input, &src_rect, 1.0, babl_format ("Y float"),
+ inten_buf, GEGL_AUTO_ROWSTRIDE, GEGL_ABYSS_CLAMP);
out_pixel = dst_buf;
+
while (n_pixels--)
{
- oilify_pixel (x, y, whole_rect, o->use_inten, o->mask_radius, o->exponent,
+ if(o->use_inten)
+ oilify_pixel_inten (x, y, o->mask_radius, o->exponent, o->intensities,
src_rect.width, src_buf, inten_buf, out_pixel);
-
+ else
+ oilify_pixel (x, y, o->mask_radius, o->exponent, o->intensities,
+ src_rect.width, src_buf, out_pixel);
out_pixel += 4;
/* update x and y coordinates */
@@ -281,12 +551,14 @@ process (GeglOperation *operation,
}
}
+
gegl_buffer_set (output, result, 0,
babl_format ("RGBA float"),
dst_buf, GEGL_AUTO_ROWSTRIDE);
g_slice_free1 (4 * total_pixels * sizeof (gfloat), src_buf);
- g_slice_free1 (4 * total_pixels * sizeof (gfloat), dst_buf);
- g_slice_free1 (total_pixels * sizeof (gfloat), inten_buf);
+ g_slice_free1 (4 * n_pixels * sizeof (gfloat), dst_buf);
+ if(o->use_inten)
+ g_slice_free1 (total_pixels * sizeof (gfloat), inten_buf);
return TRUE;
}