diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2011-10-21 23:45:28 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2011-10-26 10:37:31 +0100 |
commit | 0ed3426a810336b666604d34c10f996f318ebf82 (patch) | |
tree | 8546cd4aaba1df67f4d4903a41cd13998212b1a3 | |
parent | c12371d9e7b3bbff7f318186a0933d6108db0bc8 (diff) |
sna: Convert diagonal zero-width lines into blits
This is slower than falling back to swrast for x11perf (up to 4x slower
on SNB), it is still faster than doing that rasterisation through a
WC-mapping and much faster in ordinary usage due to avoiding the
readback hit.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r-- | src/sna/gen2_render.c | 50 | ||||
-rw-r--r-- | src/sna/gen3_render.c | 52 | ||||
-rw-r--r-- | src/sna/gen4_render.c | 35 | ||||
-rw-r--r-- | src/sna/gen5_render.c | 60 | ||||
-rw-r--r-- | src/sna/gen6_render.c | 57 | ||||
-rw-r--r-- | src/sna/gen7_render.c | 55 | ||||
-rw-r--r-- | src/sna/sna_accel.c | 731 | ||||
-rw-r--r-- | src/sna/sna_blt.c | 67 | ||||
-rw-r--r-- | src/sna/sna_render.h | 4 |
9 files changed, 1020 insertions, 91 deletions
diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c index 7c72f416..3ec2e084 100644 --- a/src/sna/gen2_render.c +++ b/src/sna/gen2_render.c @@ -2021,9 +2021,9 @@ static void gen2_emit_fill_state(struct sna *sna, } static void -gen2_render_fill_blt(struct sna *sna, - const struct sna_fill_op *op, - int16_t x, int16_t y, int16_t w, int16_t h) +gen2_render_fill_op_blt(struct sna *sna, + const struct sna_fill_op *op, + int16_t x, int16_t y, int16_t w, int16_t h) { if (!gen2_get_rectangles(sna, &op->base, 1)) { gen2_emit_fill_state(sna, &op->base); @@ -2039,9 +2039,9 @@ gen2_render_fill_blt(struct sna *sna, } fastcall static void -gen2_render_fill_box(struct sna *sna, - const struct sna_fill_op *op, - const BoxRec *box) +gen2_render_fill_op_box(struct sna *sna, + const struct sna_fill_op *op, + const BoxRec *box) { if (!gen2_get_rectangles(sna, &op->base, 1)) { gen2_emit_fill_state(sna, &op->base); @@ -2056,8 +2056,37 @@ gen2_render_fill_box(struct sna *sna, VERTEX(box->y1); } +fastcall static void +gen2_render_fill_op_boxes(struct sna *sna, + const struct sna_fill_op *op, + const BoxRec *box, + int nbox) +{ + DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, + box->x1, box->y1, box->x2, box->y2, n)); + + do { + int nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox); + if (nbox_this_time == 0) { + gen2_emit_fill_state(sna, &op->base); + nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox); + } + nbox -= nbox_this_time; + + do { + VERTEX(box->x2); + VERTEX(box->y2); + VERTEX(box->x1); + VERTEX(box->y2); + VERTEX(box->x1); + VERTEX(box->y1); + box++; + } while (--nbox_this_time); + } while (nbox); +} + static void -gen2_render_fill_done(struct sna *sna, const struct sna_fill_op *op) +gen2_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) { gen2_vertex_flush(sna); _kgem_set_mode(&sna->kgem, KGEM_RENDER); @@ -2114,9 +2143,10 @@ gen2_render_fill(struct sna *sna, uint8_t alu, tmp); } - tmp->blt = gen2_render_fill_blt; - tmp->box = gen2_render_fill_box; - tmp->done = gen2_render_fill_done; + tmp->blt = gen2_render_fill_op_blt; + tmp->box = gen2_render_fill_op_box; + tmp->boxes = gen2_render_fill_op_boxes; + tmp->done = gen2_render_fill_op_done; gen2_emit_fill_state(sna, &tmp->base); return TRUE; diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c index 6d91f32b..b9ce45f0 100644 --- a/src/sna/gen3_render.c +++ b/src/sna/gen3_render.c @@ -3776,9 +3776,9 @@ gen3_render_fill_boxes(struct sna *sna, } static void -gen3_render_fill_blt(struct sna *sna, - const struct sna_fill_op *op, - int16_t x, int16_t y, int16_t w, int16_t h) +gen3_render_fill_op_blt(struct sna *sna, + const struct sna_fill_op *op, + int16_t x, int16_t y, int16_t w, int16_t h) { if (!gen3_get_rectangles(sna, &op->base, 1)) { gen3_emit_composite_state(sna, &op->base); @@ -3794,9 +3794,9 @@ gen3_render_fill_blt(struct sna *sna, } fastcall static void -gen3_render_fill_box(struct sna *sna, - const struct sna_fill_op *op, - const BoxRec *box) +gen3_render_fill_op_box(struct sna *sna, + const struct sna_fill_op *op, + const BoxRec *box) { if (!gen3_get_rectangles(sna, &op->base, 1)) { gen3_emit_composite_state(sna, &op->base); @@ -3811,8 +3811,37 @@ gen3_render_fill_box(struct sna *sna, OUT_VERTEX(box->y1); } +fastcall static void +gen3_render_fill_op_boxes(struct sna *sna, + const struct sna_fill_op *op, + const BoxRec *box, + int nbox) +{ + DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, + box->x1, box->y1, box->x2, box->y2, n)); + + do { + int nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox); + if (nbox_this_time == 0) { + gen3_emit_composite_state(sna, &op->base); + nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox); + } + nbox -= nbox_this_time; + + do { + OUT_VERTEX(box->x2); + OUT_VERTEX(box->y2); + OUT_VERTEX(box->x1); + OUT_VERTEX(box->y2); + OUT_VERTEX(box->x1); + OUT_VERTEX(box->y1); + box++; + } while (--nbox_this_time); + } while (nbox); +} + static void -gen3_render_fill_done(struct sna *sna, const struct sna_fill_op *op) +gen3_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) { gen3_vertex_flush(sna); _kgem_set_mode(&sna->kgem, KGEM_RENDER); @@ -3859,6 +3888,8 @@ gen3_render_fill(struct sna *sna, uint8_t alu, tmp->base.dst.bo = dst_bo; tmp->base.floats_per_vertex = 2; tmp->base.floats_per_rect = 6; + tmp->base.need_magic_ca_pass = 0; + tmp->base.has_component_alpha = 0; tmp->base.src.u.gen3.type = SHADER_CONSTANT; tmp->base.src.u.gen3.mode = @@ -3869,9 +3900,10 @@ gen3_render_fill(struct sna *sna, uint8_t alu, if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) kgem_submit(&sna->kgem); - tmp->blt = gen3_render_fill_blt; - tmp->box = gen3_render_fill_box; - tmp->done = gen3_render_fill_done; + tmp->blt = gen3_render_fill_op_blt; + tmp->box = gen3_render_fill_op_box; + tmp->boxes = gen3_render_fill_op_boxes; + tmp->done = gen3_render_fill_op_done; gen3_emit_composite_state(sna, &tmp->base); gen3_align_vertex(sna, &tmp->base); diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c index 0092f603..e4a40fc3 100644 --- a/src/sna/gen4_render.c +++ b/src/sna/gen4_render.c @@ -2497,24 +2497,38 @@ gen4_render_fill_boxes(struct sna *sna, } static void -gen4_render_fill_blt(struct sna *sna, const struct sna_fill_op *op, - int16_t x, int16_t y, int16_t w, int16_t h) +gen4_render_fill_op_blt(struct sna *sna, const struct sna_fill_op *op, + int16_t x, int16_t y, int16_t w, int16_t h) { gen4_render_fill_rectangle(sna, &op->base, x, y, w, h); } fastcall static void -gen4_render_fill_box(struct sna *sna, - const struct sna_fill_op *op, - const BoxRec *box) +gen4_render_fill_op_box(struct sna *sna, + const struct sna_fill_op *op, + const BoxRec *box) { gen4_render_fill_rectangle(sna, &op->base, box->x1, box->y1, box->x2-box->x1, box->y2-box->y1); } +fastcall static void +gen4_render_fill_op_boxes(struct sna *sna, + const struct sna_fill_op *op, + const BoxRec *box, + int nbox) +{ + do { + gen4_render_fill_rectangle(sna, &op->base, + box->x1, box->y1, + box->x2-box->x1, box->y2-box->y1); + box++; + } while (--nbox); +} + static void -gen4_render_fill_done(struct sna *sna, const struct sna_fill_op *op) +gen4_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) { gen4_vertex_flush(sna); kgem_bo_destroy(&sna->kgem, op->base.src.bo); @@ -2573,6 +2587,8 @@ gen4_render_fill(struct sna *sna, uint8_t alu, op->base.is_affine = TRUE; op->base.floats_per_vertex = 3; + op->base.need_magic_ca_pass = 0; + op->base.has_component_alpha = 0; op->base.u.gen4.wm_kernel = WM_KERNEL; op->base.u.gen4.ve_id = 1; @@ -2582,9 +2598,10 @@ gen4_render_fill(struct sna *sna, uint8_t alu, gen4_fill_bind_surfaces(sna, &op->base); gen4_align_vertex(sna, &op->base); - op->blt = gen4_render_fill_blt; - op->box = gen4_render_fill_box; - op->done = gen4_render_fill_done; + op->blt = gen4_render_fill_op_blt; + op->box = gen4_render_fill_op_box; + op->boxes = gen4_render_fill_op_boxes; + op->done = gen4_render_fill_op_done; return TRUE; } diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c index 041e9186..e72283e0 100644 --- a/src/sna/gen5_render.c +++ b/src/sna/gen5_render.c @@ -2483,9 +2483,9 @@ gen5_render_fill_boxes(struct sna *sna, } static void -gen5_render_fill_blt(struct sna *sna, - const struct sna_fill_op *op, - int16_t x, int16_t y, int16_t w, int16_t h) +gen5_render_fill_op_blt(struct sna *sna, + const struct sna_fill_op *op, + int16_t x, int16_t y, int16_t w, int16_t h) { DBG(("%s (%d, %d)x(%d, %d)\n", __FUNCTION__, x,y,w,h)); @@ -2508,9 +2508,9 @@ gen5_render_fill_blt(struct sna *sna, } fastcall static void -gen5_render_fill_box(struct sna *sna, - const struct sna_fill_op *op, - const BoxRec *box) +gen5_render_fill_op_box(struct sna *sna, + const struct sna_fill_op *op, + const BoxRec *box) { DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__, box->x1, box->y1, box->x2, box->y2)); @@ -2533,9 +2533,43 @@ gen5_render_fill_box(struct sna *sna, OUT_VERTEX_F(0); } +fastcall static void +gen5_render_fill_op_boxes(struct sna *sna, + const struct sna_fill_op *op, + const BoxRec *box, + int nbox) +{ + DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, + box->x1, box->y1, box->x2, box->y2, n)); + + do { + int nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox); + if (nbox_this_time == 0) { + gen5_fill_bind_surfaces(sna, &op->base); + nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox); + } + nbox -= nbox_this_time; + + do { + OUT_VERTEX(box->x2, box->y2); + OUT_VERTEX_F(1); + OUT_VERTEX_F(1); + + OUT_VERTEX(box->x1, box->y2); + OUT_VERTEX_F(0); + OUT_VERTEX_F(1); + + OUT_VERTEX(box->x1, box->y1); + OUT_VERTEX_F(0); + OUT_VERTEX_F(0); + box++; + } while (--nbox_this_time); + } while (nbox); +} + static void -gen5_render_fill_done(struct sna *sna, - const struct sna_fill_op *op) +gen5_render_fill_op_done(struct sna *sna, + const struct sna_fill_op *op) { gen5_vertex_flush(sna); kgem_bo_destroy(&sna->kgem, op->base.src.bo); @@ -2578,6 +2612,9 @@ gen5_render_fill(struct sna *sna, uint8_t alu, op->base.dst.bo = dst_bo; op->base.dst.x = op->base.dst.y = 0; + op->base.need_magic_ca_pass = 0; + op->base.has_component_alpha = 0; + op->base.src.bo = sna_render_get_solid(sna, sna_rgba_for_color(color, @@ -2600,9 +2637,10 @@ gen5_render_fill(struct sna *sna, uint8_t alu, gen5_fill_bind_surfaces(sna, &op->base); gen5_align_vertex(sna, &op->base); - op->blt = gen5_render_fill_blt; - op->box = gen5_render_fill_box; - op->done = gen5_render_fill_done; + op->blt = gen5_render_fill_op_blt; + op->box = gen5_render_fill_op_box; + op->boxes = gen5_render_fill_op_boxes; + op->done = gen5_render_fill_op_done; return TRUE; } diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c index 838819d1..05e6d633 100644 --- a/src/sna/gen6_render.c +++ b/src/sna/gen6_render.c @@ -2696,9 +2696,9 @@ gen6_render_fill_boxes(struct sna *sna, } static void -gen6_render_fill_blt(struct sna *sna, - const struct sna_fill_op *op, - int16_t x, int16_t y, int16_t w, int16_t h) +gen6_render_op_fill_blt(struct sna *sna, + const struct sna_fill_op *op, + int16_t x, int16_t y, int16_t w, int16_t h) { DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); @@ -2721,9 +2721,9 @@ gen6_render_fill_blt(struct sna *sna, } fastcall static void -gen6_render_fill_box(struct sna *sna, - const struct sna_fill_op *op, - const BoxRec *box) +gen6_render_op_fill_box(struct sna *sna, + const struct sna_fill_op *op, + const BoxRec *box) { DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__, box->x1, box->y1, box->x2, box->y2)); @@ -2746,8 +2746,42 @@ gen6_render_fill_box(struct sna *sna, OUT_VERTEX_F(0); } +fastcall static void +gen6_render_op_fill_boxes(struct sna *sna, + const struct sna_fill_op *op, + const BoxRec *box, + int nbox) +{ + DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, + box->x1, box->y1, box->x2, box->y2, n)); + + do { + int nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox); + if (nbox_this_time == 0) { + gen6_emit_fill_state(sna, &op->base); + nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox); + } + nbox -= nbox_this_time; + + do { + OUT_VERTEX(box->x2, box->y2); + OUT_VERTEX_F(1); + OUT_VERTEX_F(1); + + OUT_VERTEX(box->x1, box->y2); + OUT_VERTEX_F(0); + OUT_VERTEX_F(1); + + OUT_VERTEX(box->x1, box->y1); + OUT_VERTEX_F(0); + OUT_VERTEX_F(0); + box++; + } while (--nbox_this_time); + } while (nbox); +} + static void -gen6_render_fill_done(struct sna *sna, const struct sna_fill_op *op) +gen6_render_op_fill_done(struct sna *sna, const struct sna_fill_op *op) { gen6_vertex_flush(sna); kgem_bo_destroy(&sna->kgem, op->base.src.bo); @@ -2807,6 +2841,8 @@ gen6_render_fill(struct sna *sna, uint8_t alu, op->base.mask.repeat = SAMPLER_EXTEND_NONE; op->base.is_affine = TRUE; + op->base.has_component_alpha = FALSE; + op->base.need_magic_ca_pass = FALSE; op->base.floats_per_vertex = 3; op->base.floats_per_rect = 9; @@ -2821,9 +2857,10 @@ gen6_render_fill(struct sna *sna, uint8_t alu, gen6_emit_fill_state(sna, &op->base); gen6_align_vertex(sna, &op->base); - op->blt = gen6_render_fill_blt; - op->box = gen6_render_fill_box; - op->done = gen6_render_fill_done; + op->blt = gen6_render_op_fill_blt; + op->box = gen6_render_op_fill_box; + op->boxes = gen6_render_op_fill_boxes; + op->done = gen6_render_op_fill_done; return TRUE; } diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c index ccc00373..05b65f39 100644 --- a/src/sna/gen7_render.c +++ b/src/sna/gen7_render.c @@ -2846,9 +2846,9 @@ gen7_render_fill_boxes(struct sna *sna, } static void -gen7_render_fill_blt(struct sna *sna, - const struct sna_fill_op *op, - int16_t x, int16_t y, int16_t w, int16_t h) +gen7_render_fill_op_blt(struct sna *sna, + const struct sna_fill_op *op, + int16_t x, int16_t y, int16_t w, int16_t h) { DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); @@ -2871,9 +2871,9 @@ gen7_render_fill_blt(struct sna *sna, } fastcall static void -gen7_render_fill_box(struct sna *sna, - const struct sna_fill_op *op, - const BoxRec *box) +gen7_render_fill_op_box(struct sna *sna, + const struct sna_fill_op *op, + const BoxRec *box) { DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__, box->x1, box->y1, box->x2, box->y2)); @@ -2896,8 +2896,42 @@ gen7_render_fill_box(struct sna *sna, OUT_VERTEX_F(0); } +fastcall static void +gen7_render_fill_op_boxes(struct sna *sna, + const struct sna_fill_op *op, + const BoxRec *box, + int nbox) +{ + DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, + box->x1, box->y1, box->x2, box->y2, n)); + + do { + int nbox_this_time = gen7_get_rectangles(sna, &op->base, nbox); + if (nbox_this_time == 0) { + gen7_emit_fill_state(sna, &op->base); + nbox_this_time = gen7_get_rectangles(sna, &op->base, nbox); + } + nbox -= nbox_this_time; + + do { + OUT_VERTEX(box->x2, box->y2); + OUT_VERTEX_F(1); + OUT_VERTEX_F(1); + + OUT_VERTEX(box->x1, box->y2); + OUT_VERTEX_F(0); + OUT_VERTEX_F(1); + + OUT_VERTEX(box->x1, box->y1); + OUT_VERTEX_F(0); + OUT_VERTEX_F(0); + box++; + } while (--nbox_this_time); + } while (nbox); +} + static void -gen7_render_fill_done(struct sna *sna, const struct sna_fill_op *op) +gen7_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) { gen7_vertex_flush(sna); kgem_bo_destroy(&sna->kgem, op->base.src.bo); @@ -2971,9 +3005,10 @@ gen7_render_fill(struct sna *sna, uint8_t alu, gen7_emit_fill_state(sna, &op->base); gen7_align_vertex(sna, &op->base); - op->blt = gen7_render_fill_blt; - op->box = gen7_render_fill_box; - op->done = gen7_render_fill_done; + op->blt = gen7_render_fill_op_blt; + op->box = gen7_render_fill_op_box; + op->boxes = gen7_render_fill_op_boxes; + op->done = gen7_render_fill_op_done; return TRUE; } diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c index 6c45b688..e73804df 100644 --- a/src/sna/sna_accel.c +++ b/src/sna/sna_accel.c @@ -43,6 +43,7 @@ #include <mipict.h> #include <fbpict.h> #endif +#include <miline.h> #include <sys/time.h> #include <sys/mman.h> @@ -60,6 +61,7 @@ #define FORCE_FLUSH 0 #define USE_SPANS 0 +#define USE_ZERO_SPANS 1 DevPrivateKeyRec sna_pixmap_index; DevPrivateKey sna_window_key; @@ -2434,6 +2436,363 @@ fallback: fbPolyPoint(drawable, gc, mode, n, pt); } +static bool +sna_poly_zero_line_blt(DrawablePtr drawable, + struct kgem_bo *bo, + struct sna_damage **damage, + GCPtr gc, int mode, const int _n, const DDXPointRec * const _pt, + const BoxRec *extents, unsigned clipped) +{ + static void * const _jump[] = { + &&no_damage, + &&damage, + + &&no_damage_offset, + &&damage_offset, + }; + + struct sna *sna = to_sna_from_drawable(drawable); + PixmapPtr pixmap = get_drawable_pixmap(drawable); + int x2, y2, xstart, ystart; + int oc2, pt2_clipped = 0; + unsigned int bias = miGetZeroLineBias(drawable->pScreen); + bool degenerate = true; + struct sna_fill_op fill; + RegionRec clip; + BoxRec box[512], *b, * const last_box = box + ARRAY_SIZE(box); + const BoxRec *last_extents; + int16_t dx, dy; + void *jump, *ret; + + DBG(("%s: alu=%d, pixel=%lx, n=%d, clipped=%d, damage=%p\n", + __FUNCTION__, gc->alu, gc->fgPixel, n, clipped, damage)); + if (!sna_fill_init_blt(&fill, sna, pixmap, bo, gc->alu, gc->fgPixel)) + return FALSE; + + get_drawable_deltas(drawable, pixmap, &dx, &dy); + + region_set(&clip, extents); + if (clipped) + region_maybe_clip(&clip, gc->pCompositeClip); + + jump = _jump[(damage != NULL) | !!(dx|dy) << 1]; + DBG(("%s: [clipped] extents=(%d, %d), (%d, %d), delta=(%d, %d)\n", + __FUNCTION__, + clip.extents.x1, clip.extents.y1, + clip.extents.x2, clip.extents.y2, + dx, dy)); + + extents = REGION_RECTS(&clip); + last_extents = extents + REGION_NUM_RECTS(&clip); + + b = box; + do { + int n = _n; + const DDXPointRec *pt = _pt; + + xstart = pt->x + drawable->x; + ystart = pt->y + drawable->y; + + /* x2, y2, oc2 copied to x1, y1, oc1 at top of loop to simplify + * iteration logic + */ + x2 = xstart; + y2 = ystart; + oc2 = 0; + MIOUTCODES(oc2, x2, y2, + clip.extents.x1, + clip.extents.y1, + clip.extents.x2, + clip.extents.y2); + + while (--n) { + int16_t sdx, sdy; + int16_t adx, ady; + int16_t e, e1, e2, e3; + int16_t length; + int x1 = x2, x; + int y1 = y2, y; + int oc1 = oc2; + int octant; + + ++pt; + + x2 = pt->x; + y2 = pt->y; + if (mode == CoordModePrevious) { + x2 += x1; + y2 += y1; + } else { + x2 += drawable->x; + y2 += drawable->y; + } + DBG(("%s: segment (%d, %d) to (%d, %d)\n", + __FUNCTION__, x1, y1, x2, y2)); + if (x2 == x1 && y2 == y1) + continue; + + degenerate = false; + + oc2 = 0; + MIOUTCODES(oc2, x2, y2, + clip.extents.x1, + clip.extents.y1, + clip.extents.x2, + clip.extents.y2); + if (oc1 & oc2) + continue; + + CalcLineDeltas(x1, y1, x2, y2, + adx, ady, sdx, sdy, + 1, 1, octant); + + DBG(("%s: adx=(%d, %d), sdx=(%d, %d)\n", + __FUNCTION__, adx, ady, sdx, sdy)); + if (adx == 0 || ady == 0) { + if (x1 <= x2) { + b->x1 = x1; + b->x2 = x2; + } else { + b->x1 = x2; + b->x2 = x1; + } + if (y1 <= y2) { + b->y1 = y1; + b->y2 = y2; + } else { + b->y1 = y2; + b->y2 = y1; + } + b->x2++; + b->y2++; + if (oc1 | oc2) + box_intersect(b, &clip.extents); + if (++b == last_box) { + ret = &&rectangle_continue; + goto *jump; +rectangle_continue: + b = box; + } + } else if (adx >= ady) { + /* X-major segment */ + e1 = ady << 1; + e2 = e1 - (adx << 1); + e = e1 - adx; + length = adx; /* don't draw endpoint in main loop */ + + FIXUP_ERROR(e, octant, bias); + + x = x1; + y = y1; + pt2_clipped = 0; + + if (oc1 | oc2) { + int x2_clipped = x2, y2_clipped = y2; + int pt1_clipped; + + if (miZeroClipLine(clip.extents.x1, clip.extents.y1, + clip.extents.x2, clip.extents.y2, + &x, &y, &x2_clipped, &y2_clipped, + adx, ady, + &pt1_clipped, &pt2_clipped, + octant, bias, oc1, oc2) == -1) + continue; + + length = abs(x2_clipped - x); + + /* if we've clipped the endpoint, always draw the full length + * of the segment, because then the capstyle doesn't matter + */ + if (pt2_clipped) + length++; + + if (pt1_clipped) { + int clipdx = abs(x - x1); + int clipdy = abs(y - y1); + e += clipdy * e2 + (clipdx - clipdy) * e1; + } + } + if (length == 0) + continue; + + e3 = e2 - e1; + e = e - e1; + + b->x1 = x; + b->y2 = b->y1 = y; + while (length--) { + e += e1; + if (e >= 0) { + b->x2 = x; + if (b->x2 < b->x1) { + int16_t t = b->x1; + b->x1 = b->x2; + b->x2 = t; + } + b->x2++; + b->y2++; + if (++b == last_box) { + ret = &&X_continue; + goto *jump; +X_continue: + b = box; + } + y += sdy; + e += e3; + b->y2 = b->y1 = y; + b->x1 = x; + } + x += sdx; + } + } else { + /* Y-major segment */ + e1 = adx << 1; + e2 = e1 - (ady << 1); + e = e1 - ady; + length = ady; /* don't draw endpoint in main loop */ + + SetYMajorOctant(octant); + FIXUP_ERROR(e, octant, bias); + + x = x1; + y = y1; + pt2_clipped = 0; + + if (oc1 | oc2) { + int x2_clipped = x2, y2_clipped = y2; + int pt1_clipped; + + if (miZeroClipLine(clip.extents.x1, + clip.extents.y1, + clip.extents.x2, + clip.extents.y2, + &x, &y, &x2_clipped, &y2_clipped, + adx, ady, + &pt1_clipped, &pt2_clipped, + octant, bias, oc1, oc2) == -1) + continue; + + length = abs(y2 - y); + + /* if we've clipped the endpoint, always draw the full length + * of the segment, because then the capstyle doesn't matter + */ + if (pt2_clipped) + length++; + + if (pt1_clipped) { + int clipdx = abs(x - x1); + int clipdy = abs(y - y1); + e += clipdx * e2 + (clipdy - clipdx) * e1; + } + } + if (length == 0) + continue; + + e3 = e2 - e1; + e = e - e1; + + b->x2 = b->x1 = x; + b->y1 = y; + while (length--) { + e += e1; + if (e >= 0) { + b->y2 = y; + if (b->y2 < b->y1) { + int16_t t = b->y1; + b->y1 = b->y2; + b->y2 = t; + } + b->x2++; + b->y2++; + if (++b == last_box) { + ret = &&Y_continue; + goto *jump; +Y_continue: + b = box; + } + x += sdx; + e += e3; + b->x2 = b->x1 = x; + b->y1 = y; + } + y += sdy; + } + } + } + +#if 0 + /* Only do the CapNotLast check on the last segment + * and only if the endpoint wasn't clipped. And then, if the last + * point is the same as the first point, do not draw it, unless the + * line is degenerate + */ + if (!pt2_clipped && + gc->capStyle != CapNotLast && + !(xstart == x2 && ystart == y2 && !degenerate)) + { + b->x2 = x2; + b->y2 = y2; + if (b->x2 < b->x1) { + int16_t t = b->x1; + b->x1 = b->x2; + b->x2 = t; + } + if (b->y2 < b->y1) { + int16_t t = b->y1; + b->y1 = b->y2; + b->y2 = t; + } + b->x2++; + b->y2++; + b++; + } +#endif + } while (++extents != last_extents); + + if (b != box) { + ret = &&done; + goto *jump; + } + +done: + fill.done(sna, &fill); + return true; + +damage: + sna_damage_add_boxes(damage, box, b-box, 0, 0); +no_damage: + fill.boxes(sna, &fill, box, b-box); + goto *ret; + +no_damage_offset: + { + BoxRec *bb = box; + do { + bb->x1 += dx; + bb->x2 += dx; + bb->y1 += dy; + bb->y2 += dy; + } while (++bb != b); + fill.boxes(sna, &fill, box, b - box); + } + goto *ret; + +damage_offset: + { + BoxRec *bb = box; + do { + bb->x1 += dx; + bb->x2 += dx; + bb->y1 += dy; + bb->y2 += dy; + } while (++bb != b); + fill.boxes(sna, &fill, box, b - box); + sna_damage_add_boxes(damage, box, b - box, 0, 0); + } + goto *ret; +} + static Bool sna_poly_line_blt(DrawablePtr drawable, struct kgem_bo *bo, @@ -2680,14 +3039,14 @@ sna_poly_line(DrawablePtr drawable, GCPtr gc, flags & 2)); if (gc->fillStyle == FillSolid && gc->lineStyle == LineSolid && - (gc->lineWidth == 0 || gc->lineWidth == 1) && - PM_IS_SOLID(drawable, gc->planemask) && - flags & 2) { + gc->lineWidth <= 1 && + PM_IS_SOLID(drawable, gc->planemask)) { struct sna_pixmap *priv = sna_pixmap_from_drawable(drawable); DBG(("%s: trying solid fill [%08lx]\n", __FUNCTION__, gc->fgPixel)); + if (flags & 2) { if (sna_drawable_use_gpu_bo(drawable, ®ion.extents) && sna_poly_line_blt(drawable, priv->gpu_bo, @@ -2701,6 +3060,17 @@ sna_poly_line(DrawablePtr drawable, GCPtr gc, reduce_damage(drawable, &priv->cpu_damage, ®ion.extents), gc, mode, n, pt, flags & 4)) return; + } else { /* !rectilinear */ + if (USE_ZERO_SPANS && + sna_drawable_use_gpu_bo(drawable, ®ion.extents) && + sna_poly_zero_line_blt(drawable, + priv->gpu_bo, + priv->gpu_only ? NULL : reduce_damage(drawable, &priv->gpu_damage, ®ion.extents), + gc, mode, n, pt, + ®ion.extents, flags & 4)) + return; + + } } if (USE_SPANS && can_fill_spans(drawable, gc) && @@ -2745,22 +3115,6 @@ fallback: } static Bool -sna_poly_segment_can_blt(int n, xSegment *seg) -{ - while (n--) { - if (seg->x1 != seg->x2 && seg->y1 != seg->y2) { - DBG(("%s: (%d, %d) -> (%d, %d)\n", - __FUNCTION__, seg->x1, seg->y1, seg->x2, seg->y2)); - return FALSE; - } - - seg++; - } - - return TRUE; -} - -static Bool sna_poly_segment_blt(DrawablePtr drawable, struct kgem_bo *bo, struct sna_damage **damage, @@ -2895,16 +3249,320 @@ sna_poly_segment_blt(DrawablePtr drawable, return TRUE; } -static Bool +static bool +sna_poly_zero_segment_blt(DrawablePtr drawable, + struct kgem_bo *bo, + struct sna_damage **damage, + GCPtr gc, const int _n, const xSegment *_s, + const BoxRec *extents, unsigned clipped) +{ + static void * const _jump[] = { + &&no_damage, + &&damage, + + &&no_damage_offset, + &&damage_offset, + }; + + struct sna *sna = to_sna_from_drawable(drawable); + PixmapPtr pixmap = get_drawable_pixmap(drawable); + unsigned int bias = miGetZeroLineBias(drawable->pScreen); + struct sna_fill_op fill; + RegionRec clip; + const BoxRec *last_extents; + BoxRec box[512], *b; + BoxRec *const last_box = box + ARRAY_SIZE(box); + int16_t dx, dy; + void *jump, *ret; + + DBG(("%s: alu=%d, pixel=%lx, n=%d, clipped=%d, damage=%p\n", + __FUNCTION__, gc->alu, gc->fgPixel, n, clipped, damage)); + if (!sna_fill_init_blt(&fill, sna, pixmap, bo, gc->alu, gc->fgPixel)) + return FALSE; + + get_drawable_deltas(drawable, pixmap, &dx, &dy); + + region_set(&clip, extents); + if (clipped) + region_maybe_clip(&clip, gc->pCompositeClip); + DBG(("%s: [clipped] extents=(%d, %d), (%d, %d), delta=(%d, %d)\n", + __FUNCTION__, + clip.extents.x1, clip.extents.y1, + clip.extents.x2, clip.extents.y2, + dx, dy)); + + jump = _jump[(damage != NULL) | !!(dx|dy) << 1]; + + b = box; + extents = REGION_RECTS(&clip); + last_extents = extents + REGION_NUM_RECTS(&clip); + do { + int n = _n; + const xSegment *s = _s; + do { + int16_t sdx, sdy; + int16_t adx, ady; + int16_t e, e1, e2, e3; + int16_t length; + int x1, x2; + int y1, y2; + int oc1, oc2; + int octant; + + x1 = s->x1 + drawable->x; + y1 = s->y1 + drawable->y; + x2 = s->x2 + drawable->x; + y2 = s->y2 + drawable->y; + s++; + + DBG(("%s: segment (%d, %d) to (%d, %d)\n", + __FUNCTION__, x1, y1, x2, y2)); + if (x2 == x1 && y2 == y1) + continue; + + oc1 = 0; + MIOUTCODES(oc1, x1, y1, + extents->x1, + extents->y1, + extents->x2, + extents->y2); + oc2 = 0; + MIOUTCODES(oc2, x2, y2, + extents->x1, + extents->y1, + extents->x2, + extents->y2); + if (oc1 & oc2) + continue; + + CalcLineDeltas(x1, y1, x2, y2, + adx, ady, sdx, sdy, + 1, 1, octant); + + DBG(("%s: adx=(%d, %d), sdx=(%d, %d)\n", + __FUNCTION__, adx, ady, sdx, sdy)); + if (adx == 0 || ady == 0) { + if (x1 <= x2) { + b->x1 = x1; + b->x2 = x2; + } else { + b->x1 = x2; + b->x2 = x1; + } + if (y1 <= y2) { + b->y1 = y1; + b->y2 = y2; + } else { + b->y1 = y2; + b->y2 = y1; + } + b->x2++; + b->y2++; + if (box_intersect(b, extents)) { + if (++b == last_box) { + ret = &&rectangle_continue; + goto *jump; +rectangle_continue: + b = box; + } + } + } else if (adx >= ady) { + /* X-major segment */ + e1 = ady << 1; + e2 = e1 - (adx << 1); + e = e1 - adx; + length = adx; /* don't draw endpoint in main loop */ + + FIXUP_ERROR(e, octant, bias); + + if (oc1 | oc2) { + int pt1_clipped, pt2_clipped; + int x = x1, y = y1; + + if (miZeroClipLine(extents->x1, + extents->y1, + extents->x2, + extents->y2, + &x1, &y1, &x2, &y2, + adx, ady, + &pt1_clipped, &pt2_clipped, + octant, bias, oc1, oc2) == -1) + continue; + + length = abs(x2 - x1); + + /* if we've clipped the endpoint, always draw the full length + * of the segment, because then the capstyle doesn't matter + */ + if (pt2_clipped) + length++; + + if (pt1_clipped) { + int clipdx = abs(x1 - x); + int clipdy = abs(y1 - y); + e += clipdy * e2 + (clipdx - clipdy) * e1; + } + } + if (length == 0) + continue; + + e3 = e2 - e1; + e = e - e1; + + b->x1 = x1; + b->y2 = b->y1 = y1; + while (length--) { + e += e1; + if (e >= 0) { + b->x2 = x1; + if (b->x2 < b->x1) { + int16_t t = b->x1; + b->x1 = b->x2; + b->x2 = t; + } + b->x2++; + b->y2++; + if (++b == last_box) { + ret = &&X_continue; + goto *jump; +X_continue: + b = box; + } + y1 += sdy; + e += e3; + b->y2 = b->y1 = y1; + b->x1 = x1; + } + x1 += sdx; + } + } else { + /* Y-major segment */ + e1 = adx << 1; + e2 = e1 - (ady << 1); + e = e1 - ady; + length = ady; /* don't draw endpoint in main loop */ + + SetYMajorOctant(octant); + FIXUP_ERROR(e, octant, bias); + + if (oc1 | oc2) { + int pt1_clipped, pt2_clipped; + int x = x1, y = y1; + + if (miZeroClipLine(extents->x1, + extents->y1, + extents->x2, + extents->y2, + &x1, &y1, &x2, &y2, + adx, ady, + &pt1_clipped, &pt2_clipped, + octant, bias, oc1, oc2) == -1) + continue; + + length = abs(y2 - y1); + + /* if we've clipped the endpoint, always draw the full length + * of the segment, because then the capstyle doesn't matter + */ + if (pt2_clipped) + length++; + + if (pt1_clipped) { + int clipdx = abs(x1 - x); + int clipdy = abs(y1 - y); + e += clipdx * e2 + (clipdy - clipdx) * e1; + } + } + if (length == 0) + continue; + + e3 = e2 - e1; + e = e - e1; + + b->x2 = b->x1 = x1; + b->y1 = y1; + while (length--) { + e += e1; + if (e >= 0) { + b->y2 = y1; + if (b->y2 < b->y1) { + int16_t t = b->y1; + b->y1 = b->y2; + b->y2 = t; + } + b->x2++; + b->y2++; + if (++b == last_box) { + ret = &&Y_continue; + goto *jump; +Y_continue: + b = box; + } + x1 += sdx; + e += e3; + b->x2 = b->x1 = x1; + b->y1 = y1; + } + y1 += sdy; + } + } + } while (--n); + } while (++extents != last_extents); + + if (b != box) { + ret = &&done; + goto *jump; + } + +done: + fill.done(sna, &fill); + return true; + +damage: + sna_damage_add_boxes(damage, box, b-box, 0, 0); +no_damage: + fill.boxes(sna, &fill, box, b-box); + goto *ret; + +no_damage_offset: + { + BoxRec *bb = box; + do { + bb->x1 += dx; + bb->x2 += dx; + bb->y1 += dy; + bb->y2 += dy; + } while (++bb != b); + fill.boxes(sna, &fill, box, b - box); + } + goto *ret; + +damage_offset: + { + BoxRec *bb = box; + do { + bb->x1 += dx; + bb->x2 += dx; + bb->y1 += dy; + bb->y2 += dy; + } while (++bb != b); + fill.boxes(sna, &fill, box, b - box); + sna_damage_add_boxes(damage, box, b - box, 0, 0); + } + goto *ret; +} + +static unsigned sna_poly_segment_extents(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg, BoxPtr out) { BoxRec box; int extra = gc->lineWidth; + bool clipped, can_blit; if (n == 0) - return true; + return 0; if (gc->capStyle != CapProjecting) extra >>= 1; @@ -2925,6 +3583,7 @@ sna_poly_segment_extents(DrawablePtr drawable, GCPtr gc, box.y1 = seg->y2; } + can_blit = seg->x1 == seg->x2 || seg->y1 == seg->y2; while (--n) { seg++; if (seg->x2 > seg->x1) { @@ -2942,6 +3601,9 @@ sna_poly_segment_extents(DrawablePtr drawable, GCPtr gc, if (seg->y2 < box.y1) box.y1 = seg->y2; if (seg->y1 > box.y2) box.y2 = seg->y1; } + + if (can_blit && !(seg->x1 == seg->x2 || seg->y1 == seg->y2)) + can_blit = false; } box.x2++; @@ -2954,9 +3616,11 @@ sna_poly_segment_extents(DrawablePtr drawable, GCPtr gc, box.y2 += extra; } - trim_and_translate_box(&box, drawable, gc); + clipped = trim_and_translate_box(&box, drawable, gc); + if (box_empty(&box)) + return 0; *out = box; - return box_empty(&box); + return 1 | clipped << 1 | can_blit << 2; } static void @@ -2964,13 +3628,15 @@ sna_poly_segment(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg) { struct sna *sna = to_sna_from_drawable(drawable); RegionRec region; + unsigned flags; DBG(("%s(n=%d, first=((%d, %d), (%d, %d)), lineWidth=%d\n", __FUNCTION__, n, seg->x1, seg->y1, seg->x2, seg->y2, gc->lineWidth)); - if (sna_poly_segment_extents(drawable, gc, n, seg, ®ion.extents)) + flags = sna_poly_segment_extents(drawable, gc, n, seg, ®ion.extents); + if (flags == 0) return; DBG(("%s: extents=(%d, %d), (%d, %d)\n", __FUNCTION__, @@ -2991,17 +3657,17 @@ sna_poly_segment(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg) gc->lineStyle, gc->lineStyle == LineSolid, gc->lineWidth, gc->planemask, PM_IS_SOLID(drawable, gc->planemask), - sna_poly_segment_can_blt(n, seg))); + flags & 4)); if (gc->fillStyle == FillSolid && gc->lineStyle == LineSolid && - (gc->lineWidth == 0 || gc->lineWidth == 1) && - PM_IS_SOLID(drawable, gc->planemask) && - sna_poly_segment_can_blt(n, seg)) { + gc->lineWidth <= 1 && + PM_IS_SOLID(drawable, gc->planemask)) { struct sna_pixmap *priv = sna_pixmap_from_drawable(drawable); DBG(("%s: trying blt solid fill [%08lx] paths\n", __FUNCTION__, gc->fgPixel)); + if (flags & 4) { if (sna_drawable_use_gpu_bo(drawable, ®ion.extents) && sna_poly_segment_blt(drawable, priv->gpu_bo, @@ -3015,6 +3681,15 @@ sna_poly_segment(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg) reduce_damage(drawable, &priv->cpu_damage, ®ion.extents), gc, n, seg, ®ion.extents)) return; + } else { + if (USE_ZERO_SPANS && + sna_drawable_use_gpu_bo(drawable, ®ion.extents) && + sna_poly_zero_segment_blt(drawable, + priv->gpu_bo, + priv->gpu_only ? NULL : reduce_damage(drawable, &priv->gpu_damage, ®ion.extents), + gc, n, seg, ®ion.extents, flags & 2)) + return; + } } /* XXX Do we really want to base this decision on the amalgam ? */ diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c index 62f8cdf2..9b598c96 100644 --- a/src/sna/sna_blt.c +++ b/src/sna/sna_blt.c @@ -1297,6 +1297,66 @@ fastcall static void sna_blt_fill_op_box(struct sna *sna, *(uint64_t *)(b+1) = *(uint64_t *)box; } +fastcall static void sna_blt_fill_op_boxes(struct sna *sna, + const struct sna_fill_op *op, + const BoxRec *box, + int nbox) +{ + struct kgem *kgem = &sna->kgem; + uint32_t cmd = op->base.u.blt.cmd; + + DBG(("%s: %08x x %d\n", __FUNCTION__, + op->base.u.blt.pixel, nbox)); + + if (!kgem_check_batch(kgem, 3)) + sna_blt_fill_begin(sna, &op->base.u.blt); + + do { + uint32_t *b = kgem->batch + kgem->nbatch; + int nbox_this_time; + + nbox_this_time = nbox; + if (3*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) + nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 3; + assert(nbox_this_time); + nbox -= nbox_this_time; + + kgem->nbatch += 3 * nbox_this_time; + while (nbox_this_time >= 8) { + b[0] = cmd; *(uint64_t *)(b+1) = *(uint64_t *)box++; + b[3] = cmd; *(uint64_t *)(b+4) = *(uint64_t *)box++; + b[6] = cmd; *(uint64_t *)(b+7) = *(uint64_t *)box++; + b[9] = cmd; *(uint64_t *)(b+10) = *(uint64_t *)box++; + b[12] = cmd; *(uint64_t *)(b+13) = *(uint64_t *)box++; + b[15] = cmd; *(uint64_t *)(b+16) = *(uint64_t *)box++; + b[18] = cmd; *(uint64_t *)(b+19) = *(uint64_t *)box++; + b[21] = cmd; *(uint64_t *)(b+22) = *(uint64_t *)box++; + b += 24; + nbox_this_time -= 8; + } + if (nbox_this_time & 4) { + b[0] = cmd; *(uint64_t *)(b+1) = *(uint64_t *)box++; + b[3] = cmd; *(uint64_t *)(b+4) = *(uint64_t *)box++; + b[6] = cmd; *(uint64_t *)(b+7) = *(uint64_t *)box++; + b[9] = cmd; *(uint64_t *)(b+10) = *(uint64_t *)box++; + b += 12; + } + if (nbox_this_time & 2) { + b[0] = cmd; *(uint64_t *)(b+1) = *(uint64_t *)box++; + b[3] = cmd; *(uint64_t *)(b+4) = *(uint64_t *)box++; + b += 6; + } + if (nbox_this_time & 1) { + b[0] = cmd; *(uint64_t *)(b+1) = *(uint64_t *)box++; + } + + if (!nbox) + return; + + sna_blt_fill_begin(sna, &op->base.u.blt); + } while (1); +} + static void sna_blt_fill_op_done(struct sna *sna, const struct sna_fill_op *fill) { @@ -1324,9 +1384,10 @@ bool sna_blt_fill(struct sna *sna, uint8_t alu, bo, bpp, alu, pixel)) return FALSE; - fill->blt = sna_blt_fill_op_blt; - fill->box = sna_blt_fill_op_box; - fill->done = sna_blt_fill_op_done; + fill->blt = sna_blt_fill_op_blt; + fill->box = sna_blt_fill_op_box; + fill->boxes = sna_blt_fill_op_boxes; + fill->done = sna_blt_fill_op_done; return TRUE; } diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h index d30c0b67..6c187915 100644 --- a/src/sna/sna_render.h +++ b/src/sna/sna_render.h @@ -160,6 +160,10 @@ struct sna_fill_op { fastcall void (*box)(struct sna *sna, const struct sna_fill_op *op, const BoxRec *box); + fastcall void (*boxes)(struct sna *sna, + const struct sna_fill_op *op, + const BoxRec *box, + int count); void (*done)(struct sna *sna, const struct sna_fill_op *op); }; |