summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSøren Sandmann Pedersen <ssp@redhat.com>2011-09-04 02:53:39 -0400
committerSøren Sandmann Pedersen <ssp@redhat.com>2011-09-21 18:55:25 -0400
commitad5c6bbb36c1c5e72313f7c7bc7c6e6b7e79daba (patch)
tree058ed035d51ba3875bde741ac5f5f52dbb49cbcf
parenteb2e7ed81b324af730c1a7639c9ca9ed60152875 (diff)
Strength-reduce BILINEAR filter to NEAREST filter for identity transforms
An image with a bilinear filter and an identity transform is equivalent to one with a nearest filter, so there is no reason the standard fast paths shouldn't be usable. But because a BILINEAR filter samples a 2x2 pixel block in the source image, FAST_PATH_SAMPLES_COVER_CLIP can't be set in the case where the source area is the entire image, because some compositing operations might then read pixels outside the image. This patch fixes the problem by splitting the FAST_PATH_SAMPLES_COVER_CLIP flag into two separate flags FAST_PATH_SAMPLES_COVER_CLIP_NEAREST and FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR that indicate that the clip covers the samples taking into account NEAREST/BILINEAR filters respectively. All the existing compositing operations that require FAST_PATH_SAMPLES_COVER_CLIP then have their flags modified to pick either COVER_CLIP_NEAREST or COVER_CLIP_BILINEAR depending on which filter they depend on. In compute_image_info() both COVER_CILP_NEAREST and COVER_CLIP_BILINEAR can be set depending on how much room there is around the clip rectangle. Finally, images with an identity transform and a bilinear filter get FAST_PATH_NEAREST_FILTER set as well as FAST_PATH_BILINEAR_FILTER. Performance measurementas with render_bench against Xephyr: Before *** ROUND 1 *** --------------------------------------------------------------- Test: Test Xrender doing non-scaled Over blends Time: 5.720 sec. --------------------------------------------------------------- Test: Test Xrender (offscreen) doing non-scaled Over blends Time: 5.149 sec. --------------------------------------------------------------- Test: Test Imlib2 doing non-scaled Over blends Time: 6.237 sec. After: *** ROUND 1 *** --------------------------------------------------------------- Test: Test Xrender doing non-scaled Over blends Time: 4.947 sec. --------------------------------------------------------------- Test: Test Xrender (offscreen) doing non-scaled Over blends Time: 4.487 sec. --------------------------------------------------------------- Test: Test Imlib2 doing non-scaled Over blends Time: 6.235 sec.
-rw-r--r--pixman/pixman-fast-path.c2
-rw-r--r--pixman/pixman-image.c4
-rw-r--r--pixman/pixman-inlines.h12
-rw-r--r--pixman/pixman-private.h19
-rw-r--r--pixman/pixman.c63
5 files changed, 62 insertions, 38 deletions
diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index bbdc8e8b..033efd7b 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -1764,7 +1764,7 @@ static const pixman_fast_path_t c_fast_paths[] =
#define SIMPLE_ROTATE_FLAGS(angle) \
(FAST_PATH_ROTATE_ ## angle ## _TRANSFORM | \
FAST_PATH_NEAREST_FILTER | \
- FAST_PATH_SAMPLES_COVER_CLIP | \
+ FAST_PATH_SAMPLES_COVER_CLIP_NEAREST | \
FAST_PATH_STANDARD_FLAGS)
#define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix) \
diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c
index 84bacf87..88262f71 100644
--- a/pixman/pixman-image.c
+++ b/pixman/pixman-image.c
@@ -250,6 +250,10 @@ compute_image_info (pixman_image_t *image)
case PIXMAN_FILTER_GOOD:
case PIXMAN_FILTER_BEST:
flags |= (FAST_PATH_BILINEAR_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER);
+
+ /* Reduce BILINEAR to NEAREST for identity transforms */
+ if (flags & FAST_PATH_ID_TRANSFORM)
+ flags |= FAST_PATH_NEAREST_FILTER;
break;
case PIXMAN_FILTER_CONVOLUTION:
diff --git a/pixman/pixman-inlines.h b/pixman/pixman-inlines.h
index f1e0cbd7..3532867a 100644
--- a/pixman/pixman-inlines.h
+++ b/pixman/pixman-inlines.h
@@ -585,7 +585,7 @@ fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp,
#define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
- SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
+ SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \
PIXMAN_null, 0, \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
@@ -627,7 +627,7 @@ fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp,
#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
- SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
+ SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \
PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
@@ -669,7 +669,7 @@ fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp,
#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
- SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
+ SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \
PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
@@ -1157,7 +1157,7 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
#define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
- SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
+ SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \
PIXMAN_null, 0, \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
@@ -1199,7 +1199,7 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
- SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
+ SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \
PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
@@ -1241,7 +1241,7 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
- SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
+ SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \
PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 90d9011d..6e716c6e 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -609,14 +609,15 @@ _pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask);
#define FAST_PATH_IS_OPAQUE (1 << 13)
#define FAST_PATH_NO_NORMAL_REPEAT (1 << 14)
#define FAST_PATH_NO_NONE_REPEAT (1 << 15)
-#define FAST_PATH_SAMPLES_COVER_CLIP (1 << 16)
-#define FAST_PATH_X_UNIT_POSITIVE (1 << 17)
-#define FAST_PATH_AFFINE_TRANSFORM (1 << 18)
-#define FAST_PATH_Y_UNIT_ZERO (1 << 19)
-#define FAST_PATH_BILINEAR_FILTER (1 << 20)
-#define FAST_PATH_ROTATE_90_TRANSFORM (1 << 21)
-#define FAST_PATH_ROTATE_180_TRANSFORM (1 << 22)
-#define FAST_PATH_ROTATE_270_TRANSFORM (1 << 23)
+#define FAST_PATH_X_UNIT_POSITIVE (1 << 16)
+#define FAST_PATH_AFFINE_TRANSFORM (1 << 17)
+#define FAST_PATH_Y_UNIT_ZERO (1 << 18)
+#define FAST_PATH_BILINEAR_FILTER (1 << 19)
+#define FAST_PATH_ROTATE_90_TRANSFORM (1 << 20)
+#define FAST_PATH_ROTATE_180_TRANSFORM (1 << 21)
+#define FAST_PATH_ROTATE_270_TRANSFORM (1 << 22)
+#define FAST_PATH_SAMPLES_COVER_CLIP_NEAREST (1 << 23)
+#define FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR (1 << 24)
#define FAST_PATH_PAD_REPEAT \
(FAST_PATH_NO_NONE_REPEAT | \
@@ -652,7 +653,7 @@ _pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask);
#define SOURCE_FLAGS(format) \
(FAST_PATH_STANDARD_FLAGS | \
((PIXMAN_ ## format == PIXMAN_solid) ? \
- 0 : (FAST_PATH_SAMPLES_COVER_CLIP | FAST_PATH_ID_TRANSFORM)))
+ 0 : (FAST_PATH_SAMPLES_COVER_CLIP_NEAREST | FAST_PATH_NEAREST_FILTER | FAST_PATH_ID_TRANSFORM)))
#define MASK_FLAGS(format, extra) \
((PIXMAN_ ## format == PIXMAN_null) ? 0 : (SOURCE_FLAGS (format) | extra))
diff --git a/pixman/pixman.c b/pixman/pixman.c
index 19eda086..87f5a933 100644
--- a/pixman/pixman.c
+++ b/pixman/pixman.c
@@ -524,9 +524,9 @@ analyze_extent (pixman_image_t *image,
uint32_t *flags)
{
pixman_transform_t *transform;
- pixman_fixed_t *params;
pixman_fixed_t x_off, y_off;
pixman_fixed_t width, height;
+ pixman_fixed_t *params;
box_48_16_t transformed;
pixman_box32_t exp_extents;
@@ -556,15 +556,13 @@ analyze_extent (pixman_image_t *image,
if (image->bits.width >= 0x7fff || image->bits.height >= 0x7fff)
return FALSE;
-#define ID_AND_NEAREST (FAST_PATH_ID_TRANSFORM | FAST_PATH_NEAREST_FILTER)
-
- if ((image->common.flags & ID_AND_NEAREST) == ID_AND_NEAREST &&
+ if ((image->common.flags & FAST_PATH_ID_TRANSFORM) == FAST_PATH_ID_TRANSFORM &&
extents->x1 >= 0 &&
extents->y1 >= 0 &&
extents->x2 <= image->bits.width &&
extents->y2 <= image->bits.height)
{
- *flags |= FAST_PATH_SAMPLES_COVER_CLIP;
+ *flags |= FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
return TRUE;
}
@@ -614,18 +612,28 @@ analyze_extent (pixman_image_t *image,
* may happen during sampling. Note that (8 * pixman_fixed_e) is very far from
* 0.5 so this won't cause the area computed to be overly pessimistic.
*/
- transformed.x1 += x_off - 8 * pixman_fixed_e;
- transformed.y1 += y_off - 8 * pixman_fixed_e;
- transformed.x2 += x_off + width + 8 * pixman_fixed_e;
- transformed.y2 += y_off + height + 8 * pixman_fixed_e;
-
- if (image->common.type == BITS &&
- pixman_fixed_to_int (transformed.x1) >= 0 &&
- pixman_fixed_to_int (transformed.y1) >= 0 &&
- pixman_fixed_to_int (transformed.x2) < image->bits.width &&
- pixman_fixed_to_int (transformed.y2) < image->bits.height)
+ transformed.x1 -= 8 * pixman_fixed_e;
+ transformed.y1 -= 8 * pixman_fixed_e;
+ transformed.x2 += 8 * pixman_fixed_e;
+ transformed.y2 += 8 * pixman_fixed_e;
+
+ if (image->common.type == BITS)
{
- *flags |= FAST_PATH_SAMPLES_COVER_CLIP;
+ if (pixman_fixed_to_int (transformed.x1) >= 0 &&
+ pixman_fixed_to_int (transformed.y1) >= 0 &&
+ pixman_fixed_to_int (transformed.x2) < image->bits.width &&
+ pixman_fixed_to_int (transformed.y2) < image->bits.height)
+ {
+ *flags |= FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
+ }
+
+ if (pixman_fixed_to_int (transformed.x1 - pixman_fixed_1 / 2) >= 0 &&
+ pixman_fixed_to_int (transformed.y1 - pixman_fixed_1 / 2) >= 0 &&
+ pixman_fixed_to_int (transformed.x2 + pixman_fixed_1 / 2) < image->bits.width &&
+ pixman_fixed_to_int (transformed.y2 + pixman_fixed_1 / 2) < image->bits.height)
+ {
+ *flags |= FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR;
+ }
}
/* Check we don't overflow when the destination extents are expanded by one.
@@ -753,16 +761,27 @@ pixman_image_composite32 (pixman_op_t op,
if (!analyze_extent (mask, &extents, &mask_flags))
goto out;
- /* If the clip is within the source samples, and the samples are opaque,
- * then the source is effectively opaque.
+ /* If the clip is within the source samples, and the samples are
+ * opaque, then the source is effectively opaque.
*/
-#define BOTH (FAST_PATH_SAMPLES_OPAQUE | FAST_PATH_SAMPLES_COVER_CLIP)
-
- if ((src_flags & BOTH) == BOTH)
+#define NEAREST_OPAQUE (FAST_PATH_SAMPLES_OPAQUE | \
+ FAST_PATH_NEAREST_FILTER | \
+ FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
+#define BILINEAR_OPAQUE (FAST_PATH_SAMPLES_OPAQUE | \
+ FAST_PATH_BILINEAR_FILTER | \
+ FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR)
+
+ if ((src_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE ||
+ (src_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE)
+ {
src_flags |= FAST_PATH_IS_OPAQUE;
+ }
- if ((mask_flags & BOTH) == BOTH)
+ if ((mask_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE ||
+ (mask_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE)
+ {
mask_flags |= FAST_PATH_IS_OPAQUE;
+ }
/*
* Check if we can replace our operator by a simpler one