summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2010-05-21 14:33:18 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2010-05-24 18:31:16 +0100
commitea07535240dafc4c6ef55b4b7a2eeaa595febe86 (patch)
tree34d5d5ba34da6dfb40255a3627e0b4387242c1a8
parent80a9e64f50aeda6004e3aba1fbfdda50bb1f1c82 (diff)
i915: Emit CA over using OutReverse + Add passes
On PineView: 578/621 -> 610/617 kglyphs/sec [rgb/aa]
-rw-r--r--src/i830.h1
-rw-r--r--src/i830_uxa.c1
-rw-r--r--src/i915_3d.c7
-rw-r--r--src/i915_render.c306
4 files changed, 183 insertions, 132 deletions
diff --git a/src/i830.h b/src/i830.h
index 229a4e68..a69f60de 100644
--- a/src/i830.h
+++ b/src/i830.h
@@ -344,6 +344,7 @@ typedef struct intel_screen_private {
Bool render_mask_is_solid;
Bool needs_render_state_emit;
Bool needs_render_vertex_emit;
+ Bool needs_render_ca_pass;
/* i830 render accel state */
uint32_t render_dest_format;
diff --git a/src/i830_uxa.c b/src/i830_uxa.c
index 0a6b6f80..a2da530c 100644
--- a/src/i830_uxa.c
+++ b/src/i830_uxa.c
@@ -1055,7 +1055,6 @@ Bool i830_uxa_init(ScreenPtr screen)
intel->uxa_driver->uxa_major = 1;
intel->uxa_driver->uxa_minor = 0;
- intel->needs_render_vertex_emit = TRUE;
intel->prim_offset = 0;
intel->vertex_count = 0;
intel->floats_per_vertex = 0;
diff --git a/src/i915_3d.c b/src/i915_3d.c
index 7f07b4bc..906043b1 100644
--- a/src/i915_3d.c
+++ b/src/i915_3d.c
@@ -85,8 +85,13 @@ void I915EmitInvarientState(ScrnInfoPtr scrn)
ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff) |
ENABLE_STENCIL_TEST_MASK | STENCIL_TEST_MASK(0xff));
- OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | 0);
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | 2);
OUT_BATCH(0x00000000); /* Disable texture coordinate wrap-shortest */
+ OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) |
+ S4_LINE_WIDTH_ONE |
+ S4_CULLMODE_NONE |
+ S4_VFMT_XY);
+ OUT_BATCH(0x00000000); /* Stencil. */
OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
OUT_BATCH(_3DSTATE_SCISSOR_RECT_0_CMD);
diff --git a/src/i915_render.c b/src/i915_render.c
index 4eb40466..3d38397c 100644
--- a/src/i915_render.c
+++ b/src/i915_render.c
@@ -133,8 +133,10 @@ static uint32_t i915_get_blend_cntl(int op, PicturePtr mask,
}
}
- return (sblend << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
- (dblend << S6_CBUF_DST_BLEND_FACT_SHIFT);
+ return S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
+ (BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT) |
+ (sblend << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
+ (dblend << S6_CBUF_DST_BLEND_FACT_SHIFT);
}
#define DSTORG_HORT_BIAS(x) ((x)<<20)
@@ -204,11 +206,13 @@ i915_check_composite(int op,
*/
if (i915_blend_op[op].src_alpha &&
(i915_blend_op[op].src_blend != BLENDFACT_ZERO)) {
- intel_debug_fallback(scrn,
- "Component alpha not supported "
- "with source alpha and source "
- "value blending.\n");
- return FALSE;
+ if (op != PictOpOver) {
+ intel_debug_fallback(scrn,
+ "Component alpha not supported "
+ "with source alpha and source "
+ "value blending.\n");
+ return FALSE;
+ }
}
}
@@ -814,6 +818,23 @@ i915_prepare_composite(int op, PicturePtr source_picture,
if (!i830_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table)))
return FALSE;
+
+ intel->needs_render_ca_pass = FALSE;
+ if (mask_picture != NULL && mask_picture->componentAlpha &&
+ PICT_FORMAT_RGB(mask_picture->format)) {
+ /* Check if it's component alpha that relies on a source alpha
+ * and on the source value. We can only get one of those
+ * into the single source value that we get to blend with.
+ */
+ if (i915_blend_op[op].src_alpha &&
+ (i915_blend_op[op].src_blend != BLENDFACT_ZERO)) {
+ if (op != PictOpOver)
+ return FALSE;
+
+ intel->needs_render_ca_pass = TRUE;
+ }
+ }
+
intel->dst_coord_adjust = 0;
intel->src_coord_adjust = 0;
intel->mask_coord_adjust = 0;
@@ -902,6 +923,120 @@ i915_prepare_composite(int op, PicturePtr source_picture,
return TRUE;
}
+static void
+i915_composite_emit_shader(intel_screen_private *intel, CARD8 op)
+{
+ PicturePtr mask_picture = intel->render_mask_picture;
+ PixmapPtr mask = intel->render_mask;
+ int src_reg, mask_reg;
+ Bool is_solid_src, is_solid_mask;
+ uint32_t dst_format = intel->i915_render_state.dst_format;
+ int tex_unit, t;
+ FS_LOCALS();
+
+ is_solid_src = intel->render_source_is_solid;
+ is_solid_mask = intel->render_mask_is_solid;
+
+ FS_BEGIN();
+
+ /* Declare the registers necessary for our program. */
+ t = 0;
+ if (is_solid_src) {
+ i915_fs_dcl(FS_T8);
+ src_reg = FS_T8;
+ } else {
+ i915_fs_dcl(FS_T0);
+ i915_fs_dcl(FS_S0);
+ t++;
+ }
+ if (!mask) {
+ /* No mask, so load directly to output color */
+ if (! is_solid_src) {
+ if (dst_format == COLR_BUF_8BIT)
+ src_reg = FS_R0;
+ else
+ src_reg = FS_OC;
+
+ if (i830_transform_is_affine(intel->transform[0]))
+ i915_fs_texld(src_reg, FS_S0, FS_T0);
+ else
+ i915_fs_texldp(src_reg, FS_S0, FS_T0);
+ }
+
+ if (src_reg != FS_OC) {
+ if (dst_format == COLR_BUF_8BIT)
+ i915_fs_mov(FS_OC, i915_fs_operand(src_reg, W, W, W, W));
+ else
+ i915_fs_mov(FS_OC, i915_fs_operand_reg(src_reg));
+ }
+ } else {
+ if (is_solid_mask) {
+ i915_fs_dcl(FS_T9);
+ mask_reg = FS_T9;
+ } else {
+ i915_fs_dcl(FS_T0 + t);
+ i915_fs_dcl(FS_S0 + t);
+ }
+
+ tex_unit = 0;
+ if (! is_solid_src) {
+ /* Load the source_picture texel */
+ if (i830_transform_is_affine(intel->transform[tex_unit]))
+ i915_fs_texld(FS_R0, FS_S0, FS_T0);
+ else
+ i915_fs_texldp(FS_R0, FS_S0, FS_T0);
+
+ src_reg = FS_R0;
+ tex_unit++;
+ }
+
+ if (! is_solid_mask) {
+ /* Load the mask_picture texel */
+ if (i830_transform_is_affine(intel->transform[tex_unit]))
+ i915_fs_texld(FS_R1, FS_S0 + t, FS_T0 + t);
+ else
+ i915_fs_texldp(FS_R1, FS_S0 + t, FS_T0 + t);
+
+ mask_reg = FS_R1;
+ }
+
+ if (dst_format == COLR_BUF_8BIT) {
+ i915_fs_mul(FS_OC,
+ i915_fs_operand(src_reg, W, W, W, W),
+ i915_fs_operand(mask_reg, W, W, W, W));
+ } else {
+ /* If component alpha is active in the mask and the blend
+ * operation uses the source alpha, then we know we don't
+ * need the source value (otherwise we would have hit a
+ * fallback earlier), so we provide the source alpha (src.A *
+ * mask.X) as output color.
+ * Conversely, if CA is set and we don't need the source alpha,
+ * then we produce the source value (src.X * mask.X) and the
+ * source alpha is unused. Otherwise, we provide the non-CA
+ * source value (src.X * mask.A).
+ */
+ if (mask_picture->componentAlpha &&
+ PICT_FORMAT_RGB(mask_picture->format)) {
+ if (i915_blend_op[op].src_alpha) {
+ i915_fs_mul(FS_OC,
+ i915_fs_operand(src_reg, W, W, W, W),
+ i915_fs_operand_reg(mask_reg));
+ } else {
+ i915_fs_mul(FS_OC,
+ i915_fs_operand_reg(src_reg),
+ i915_fs_operand_reg(mask_reg));
+ }
+ } else {
+ i915_fs_mul(FS_OC,
+ i915_fs_operand_reg(src_reg),
+ i915_fs_operand(mask_reg, W, W, W, W));
+ }
+ }
+ }
+
+ FS_END();
+}
+
static void i915_emit_composite_setup(ScrnInfoPtr scrn)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
@@ -911,8 +1046,7 @@ static void i915_emit_composite_setup(ScrnInfoPtr scrn)
PixmapPtr mask = intel->render_mask;
PixmapPtr dest = intel->render_dest;
uint32_t dst_format = intel->i915_render_state.dst_format, dst_pitch;
- uint32_t blendctl, tiling_bits;
- Bool is_affine_src, is_affine_mask;
+ uint32_t tiling_bits;
Bool is_solid_src, is_solid_mask;
int tex_count, t;
@@ -923,9 +1057,6 @@ static void i915_emit_composite_setup(ScrnInfoPtr scrn)
dst_pitch = intel_get_pixmap_pitch(dest);
- is_affine_src = i830_transform_is_affine(intel->transform[0]);
- is_affine_mask = i830_transform_is_affine(intel->transform[1]);
-
is_solid_src = intel->render_source_is_solid;
is_solid_mask = intel->render_mask_is_solid;
@@ -982,33 +1113,31 @@ static void i915_emit_composite_setup(ScrnInfoPtr scrn)
{
uint32_t ss2;
- OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) |
- I1_LOAD_S(4) | I1_LOAD_S(5) | I1_LOAD_S(6) | 3);
ss2 = ~0;
t = 0;
if (! is_solid_src) {
ss2 &= ~S2_TEXCOORD_FMT(t, TEXCOORDFMT_NOT_PRESENT);
ss2 |= S2_TEXCOORD_FMT(t,
- is_affine_src ? TEXCOORDFMT_2D :
- TEXCOORDFMT_4D);
+ i830_transform_is_affine(intel->transform[t]) ?
+ TEXCOORDFMT_2D : TEXCOORDFMT_4D);
t++;
}
if (mask && ! is_solid_mask) {
ss2 &= ~S2_TEXCOORD_FMT(t, TEXCOORDFMT_NOT_PRESENT);
ss2 |= S2_TEXCOORD_FMT(t,
- is_affine_mask ? TEXCOORDFMT_2D :
- TEXCOORDFMT_4D);
+ i830_transform_is_affine(intel->transform[t]) ?
+ TEXCOORDFMT_2D : TEXCOORDFMT_4D);
t++;
}
- OUT_BATCH(ss2);
- OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) | S4_LINE_WIDTH_ONE |
- S4_CULLMODE_NONE | S4_VFMT_XY);
- blendctl =
- i915_get_blend_cntl(op, mask_picture, dest_picture->format);
- OUT_BATCH(0x00000000); /* Disable stencil buffer */
- OUT_BATCH(S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
- (BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT) |
- blendctl);
+
+ if (intel->needs_render_ca_pass) {
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | 0);
+ OUT_BATCH(ss2);
+ } else {
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1);
+ OUT_BATCH(ss2);
+ OUT_BATCH(i915_get_blend_cntl(op, mask_picture, dest_picture->format));
+ }
/* draw rect is unconditional */
OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
@@ -1020,109 +1149,8 @@ static void i915_emit_composite_setup(ScrnInfoPtr scrn)
OUT_BATCH(0x00000000);
}
- {
- FS_LOCALS();
- int src_reg, mask_reg;
-
- FS_BEGIN();
-
- /* Declare the registers necessary for our program. */
- t = 0;
- if (is_solid_src) {
- i915_fs_dcl(FS_T8);
- src_reg = FS_T8;
- } else {
- i915_fs_dcl(FS_T0);
- i915_fs_dcl(FS_S0);
- t++;
- }
- if (!mask) {
- /* No mask, so load directly to output color */
- if (! is_solid_src) {
- if (dst_format == COLR_BUF_8BIT)
- src_reg = FS_R0;
- else
- src_reg = FS_OC;
-
- if (is_affine_src)
- i915_fs_texld(src_reg, FS_S0, FS_T0);
- else
- i915_fs_texldp(src_reg, FS_S0, FS_T0);
- }
-
- if (src_reg != FS_OC) {
- if (dst_format == COLR_BUF_8BIT)
- i915_fs_mov(FS_OC, i915_fs_operand(src_reg, W, W, W, W));
- else
- i915_fs_mov(FS_OC, i915_fs_operand_reg(src_reg));
- }
- } else {
- if (is_solid_mask) {
- i915_fs_dcl(FS_T9);
- mask_reg = FS_T9;
- } else {
- i915_fs_dcl(FS_T0 + t);
- i915_fs_dcl(FS_S0 + t);
- }
-
- if (! is_solid_src) {
- /* Load the source_picture texel */
- if (is_affine_src) {
- i915_fs_texld(FS_R0, FS_S0, FS_T0);
- } else {
- i915_fs_texldp(FS_R0, FS_S0, FS_T0);
- }
-
- src_reg = FS_R0;
- }
-
- if (! is_solid_mask) {
- /* Load the mask_picture texel */
- if (is_affine_mask) {
- i915_fs_texld(FS_R1, FS_S0 + t, FS_T0 + t);
- } else {
- i915_fs_texldp(FS_R1, FS_S0 + t, FS_T0 + t);
- }
-
- mask_reg = FS_R1;
- }
-
- if (dst_format == COLR_BUF_8BIT) {
- i915_fs_mul(FS_OC,
- i915_fs_operand(src_reg, W, W, W, W),
- i915_fs_operand(mask_reg, W, W, W, W));
- } else {
- /* If component alpha is active in the mask and the blend
- * operation uses the source alpha, then we know we don't
- * need the source value (otherwise we would have hit a
- * fallback earlier), so we provide the source alpha (src.A *
- * mask.X) as output color.
- * Conversely, if CA is set and we don't need the source alpha,
- * then we produce the source value (src.X * mask.X) and the
- * source alpha is unused. Otherwise, we provide the non-CA
- * source value (src.X * mask.A).
- */
- if (mask_picture->componentAlpha &&
- PICT_FORMAT_RGB(mask_picture->format)) {
- if (i915_blend_op[op].src_alpha) {
- i915_fs_mul(FS_OC,
- i915_fs_operand(src_reg, W, W, W, W),
- i915_fs_operand_reg(mask_reg));
- } else {
- i915_fs_mul(FS_OC,
- i915_fs_operand_reg(src_reg),
- i915_fs_operand_reg(mask_reg));
- }
- } else {
- i915_fs_mul(FS_OC,
- i915_fs_operand_reg(src_reg),
- i915_fs_operand(mask_reg, W, W, W, W));
- }
- }
- }
-
- FS_END();
- }
+ if (! intel->needs_render_ca_pass)
+ i915_composite_emit_shader(intel, op);
}
void
@@ -1168,6 +1196,14 @@ i915_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
}
if (intel->prim_offset == 0) {
+ if (intel->needs_render_ca_pass) {
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
+ OUT_BATCH(i915_get_blend_cntl(PictOpOutReverse,
+ intel->render_mask_picture,
+ intel->render_dest_picture->format));
+ i915_composite_emit_shader(intel, PictOpOutReverse);
+ }
+
intel->prim_offset = intel->batch_used;
OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL);
OUT_BATCH(intel->vertex_index);
@@ -1192,6 +1228,16 @@ i915_vertex_flush(intel_screen_private *intel)
intel->batch_ptr[intel->prim_offset] |= intel->vertex_count;
intel->prim_offset = 0;
+ if (intel->needs_render_ca_pass) {
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
+ OUT_BATCH(i915_get_blend_cntl(PictOpAdd,
+ intel->render_mask_picture,
+ intel->render_dest_picture->format));
+ i915_composite_emit_shader(intel, PictOpAdd);
+ OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL | intel->vertex_count);
+ OUT_BATCH(intel->vertex_index);
+ }
+
intel->vertex_index += intel->vertex_count;
intel->vertex_count = 0;
}