summaryrefslogtreecommitdiff
path: root/exa
diff options
context:
space:
mode:
authorMichel Dänzer <michel@tungstengraphics.com>2008-07-30 18:30:37 +0200
committerMichel Dänzer <michel@tungstengraphics.com>2008-07-30 18:30:37 +0200
commita3afa6f2fb80489f7b6a88d12def09281d32ed94 (patch)
tree36759427ed13c5ea96f46d511c74913b2e70d212 /exa
parent37087bc10630ee7740df1369b3e56a44fd2ad2b0 (diff)
EXA: Optimize GXcopy tiled fills.
Diffstat (limited to 'exa')
-rw-r--r--exa/exa_accel.c60
1 files changed, 53 insertions, 7 deletions
diff --git a/exa/exa_accel.c b/exa/exa_accel.c
index 8bcc2ce34..cf15709a3 100644
--- a/exa/exa_accel.c
+++ b/exa/exa_accel.c
@@ -1211,6 +1211,7 @@ exaFillRegionTiled (DrawablePtr pDrawable,
int nbox = REGION_NUM_RECTS (pRegion);
BoxPtr pBox = REGION_RECTS (pRegion);
Bool ret = FALSE;
+ int i;
tileWidth = pTile->drawable.width;
tileHeight = pTile->drawable.height;
@@ -1252,20 +1253,26 @@ exaFillRegionTiled (DrawablePtr pDrawable,
if ((*pExaScr->info->PrepareCopy) (pTile, pPixmap, 1, 1, alu, planemask))
{
- while (nbox--)
+ for (i = 0; i < nbox; i++)
{
- int height = pBox->y2 - pBox->y1;
- int dstY = pBox->y1;
+ int height = pBox[i].y2 - pBox[i].y1;
+ int dstY = pBox[i].y1;
int tileY;
+ if (alu == GXcopy)
+ height = min(height, tileHeight);
+
modulus(dstY - yoff - pDrawable->y - pPatOrg->y, tileHeight, tileY);
while (height > 0) {
- int width = pBox->x2 - pBox->x1;
- int dstX = pBox->x1;
+ int width = pBox[i].x2 - pBox[i].x1;
+ int dstX = pBox[i].x1;
int tileX;
int h = tileHeight - tileY;
+ if (alu == GXcopy)
+ width = min(width, tileWidth);
+
if (h > height)
h = height;
height -= h;
@@ -1287,12 +1294,51 @@ exaFillRegionTiled (DrawablePtr pDrawable,
dstY += h;
tileY = 0;
}
- pBox++;
}
(*pExaScr->info->DoneCopy) (pPixmap);
+
exaMarkSync(pDrawable->pScreen);
- ret = TRUE;
+ /* With GXcopy, we only need to do the basic algorithm up to the tile
+ * size; then, we can just keep doubling the destination in each
+ * direction until it fills the box. This way, the number of copy
+ * operations is O(log(rx)) + O(log(ry)) instead of O(rx * ry), where
+ * rx/ry is the ratio between box and tile width/height. This can make
+ * a big difference if each driver copy incurs a significant constant
+ * overhead.
+ */
+ if (alu != GXcopy)
+ ret = TRUE;
+ else if ((*pExaScr->info->PrepareCopy) (pPixmap, pPixmap, 1, 1, alu,
+ planemask)) {
+ for (i = 0; i < nbox; i++)
+ {
+ int width = min(pBox[i].x2 - pBox[i].x1, tileWidth);
+ int height = min(pBox[i].y2 - pBox[i].y1, tileHeight);
+ int dstX = pBox[i].x1 + width;
+ int dstY = pBox[i].y1 + height;
+
+ while (dstX < pBox[i].x2) {
+ (*pExaScr->info->Copy) (pPixmap, pBox[i].x1, pBox[i].y1,
+ dstX, pBox[i].y1, width, height);
+ dstX += width;
+ width = min(pBox[i].x2 - dstX, width * 2);
+ }
+
+ width = pBox[i].x2 - pBox[i].x1;
+
+ while (dstY < pBox[i].y2) {
+ (*pExaScr->info->Copy) (pPixmap, pBox[i].x1, pBox[i].y1,
+ pBox[i].x1, dstY, width, height);
+ dstY += height;
+ height = min(pBox[i].y2 - dstY, height * 2);
+ }
+ }
+
+ (*pExaScr->info->DoneCopy) (pPixmap);
+
+ ret = TRUE;
+ }
}
out: