/* * Accelerated rootless blit */ /* * This code is largely copied from fbBlt.c. * * Copyright © 1998 Keith Packard * Copyright (c) 2002 Apple Computer, Inc. All Rights Reserved. * Copyright (c) 2003 Torrey T. Lyons. All Rights Reserved. * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that * copyright notice and this permission notice appear in supporting * documentation, and that the name of Keith Packard not be used in * advertising or publicity pertaining to distribution of the software without * specific, written prior permission. Keith Packard makes no * representations about the suitability of this software for any purpose. It * is provided "as is" without express or implied warranty. * * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR * PERFORMANCE OF THIS SOFTWARE. */ /* $XFree86: xc/programs/Xserver/fb/fbblt.c,v 1.8 2000/09/28 00:47:22 keithp Exp $ */ #include "fb.h" #include "rootlessCommon.h" #include "rlAccel.h" void rlBlt (FbBits *srcLine, FbStride srcStride, int srcX, ScreenPtr pDstScreen, FbBits *dstLine, FbStride dstStride, int dstX, int width, int height, int alu, FbBits pm, int bpp, Bool reverse, Bool upsidedown) { FbBits *src, *dst; int leftShift, rightShift; FbBits startmask, endmask; FbBits bits, bits1; int n, nmiddle; Bool destInvarient; int startbyte, endbyte; FbDeclareMergeRop (); #ifdef FB_24BIT if (bpp == 24 && !FbCheck24Pix (pm)) { fbBlt24 (srcLine, srcStride, srcX, dstLine, dstStride, dstX, width, height, alu, pm, reverse, upsidedown); return; } #endif FbInitializeMergeRop(alu, pm); destInvarient = FbDestInvarientMergeRop(); if (upsidedown) { srcLine += (height - 1) * (srcStride); dstLine += (height - 1) * (dstStride); srcStride = -srcStride; dstStride = -dstStride; } FbMaskBitsBytes (dstX, width, destInvarient, startmask, startbyte, nmiddle, endmask, endbyte); /* * Beginning of the rootless acceleration code */ if (!startmask && !endmask && alu == GXcopy && height * nmiddle * sizeof(*dst) > rootless_CopyBytes_threshold) { if (pm == FB_ALLONES && SCREENREC(pDstScreen)->imp->CopyBytes) { SCREENREC(pDstScreen)->imp->CopyBytes( nmiddle * sizeof(*dst), height, (char *) srcLine + (srcX >> 3), srcStride * sizeof (*src), (char *) dstLine + (dstX >> 3), dstStride * sizeof (*dst)); return; } /* FIXME: the pm test here isn't super-wonderful - just because we don't care about the top eight bits doesn't necessarily mean we want them set to 255. But doing this does give a factor of two performance improvement when copying from a pixmap to a window, which is pretty common.. */ else if (bpp == 32 && sizeof(FbBits) == 4 && pm == 0x00FFFFFFUL && !reverse && SCREENREC(pDstScreen)->imp->CompositePixels) { /* need to copy XRGB to ARGB. */ void *src[2], *dest[2]; unsigned int src_rowbytes[2], dest_rowbytes[2]; unsigned int fn; src[0] = (char *) srcLine + (srcX >> 3); src[1] = NULL; src_rowbytes[0] = srcStride * sizeof(*src); src_rowbytes[1] = 0; dest[0] = (char *) dstLine + (dstX >> 3); dest[1] = dest[0]; dest_rowbytes[0] = dstStride * sizeof(*dst); dest_rowbytes[1] = dest_rowbytes[0]; fn = RL_COMPOSITE_FUNCTION(RL_COMPOSITE_SRC, RL_DEPTH_ARGB8888, RL_DEPTH_NIL, RL_DEPTH_ARGB8888); if (SCREENREC(pDstScreen)->imp->CompositePixels( nmiddle, height, fn, src, src_rowbytes, NULL, 0, dest, dest_rowbytes) == Success) { return; } } } /* End of the rootless acceleration code */ if (reverse) { srcLine += ((srcX + width - 1) >> FB_SHIFT) + 1; dstLine += ((dstX + width - 1) >> FB_SHIFT) + 1; srcX = (srcX + width - 1) & FB_MASK; dstX = (dstX + width - 1) & FB_MASK; } else { srcLine += srcX >> FB_SHIFT; dstLine += dstX >> FB_SHIFT; srcX &= FB_MASK; dstX &= FB_MASK; } if (srcX == dstX) { while (height--) { src = srcLine; srcLine += srcStride; dst = dstLine; dstLine += dstStride; if (reverse) { if (endmask) { bits = *--src; --dst; FbDoRightMaskByteMergeRop(dst, bits, endbyte, endmask); } n = nmiddle; if (destInvarient) { while (n--) *--dst = FbDoDestInvarientMergeRop(*--src); } else { while (n--) { bits = *--src; --dst; *dst = FbDoMergeRop (bits, *dst); } } if (startmask) { bits = *--src; --dst; FbDoLeftMaskByteMergeRop(dst, bits, startbyte, startmask); } } else { if (startmask) { bits = *src++; FbDoLeftMaskByteMergeRop(dst, bits, startbyte, startmask); dst++; } n = nmiddle; if (destInvarient) { #if 0 /* * This provides some speedup on screen->screen blts * over the PCI bus, usually about 10%. But fb * isn't usually used for this operation... */ if (_ca2 + 1 == 0 && _cx2 == 0) { FbBits t1, t2, t3, t4; while (n >= 4) { t1 = *src++; t2 = *src++; t3 = *src++; t4 = *src++; *dst++ = t1; *dst++ = t2; *dst++ = t3; *dst++ = t4; n -= 4; } } #endif while (n--) *dst++ = FbDoDestInvarientMergeRop(*src++); } else { while (n--) { bits = *src++; *dst = FbDoMergeRop (bits, *dst); dst++; } } if (endmask) { bits = *src; FbDoRightMaskByteMergeRop(dst, bits, endbyte, endmask); } } } } else { if (srcX > dstX) { leftShift = srcX - dstX; rightShift = FB_UNIT - leftShift; } else { rightShift = dstX - srcX; leftShift = FB_UNIT - rightShift; } while (height--) { src = srcLine; srcLine += srcStride; dst = dstLine; dstLine += dstStride; bits1 = 0; if (reverse) { if (srcX < dstX) bits1 = *--src; if (endmask) { bits = FbScrRight(bits1, rightShift); if (FbScrRight(endmask, leftShift)) { bits1 = *--src; bits |= FbScrLeft(bits1, leftShift); } --dst; FbDoRightMaskByteMergeRop(dst, bits, endbyte, endmask); } n = nmiddle; if (destInvarient) { while (n--) { bits = FbScrRight(bits1, rightShift); bits1 = *--src; bits |= FbScrLeft(bits1, leftShift); --dst; *dst = FbDoDestInvarientMergeRop(bits); } } else { while (n--) { bits = FbScrRight(bits1, rightShift); bits1 = *--src; bits |= FbScrLeft(bits1, leftShift); --dst; *dst = FbDoMergeRop(bits, *dst); } } if (startmask) { bits = FbScrRight(bits1, rightShift); if (FbScrRight(startmask, leftShift)) { bits1 = *--src; bits |= FbScrLeft(bits1, leftShift); } --dst; FbDoLeftMaskByteMergeRop (dst, bits, startbyte, startmask); } } else { if (srcX > dstX) bits1 = *src++; if (startmask) { bits = FbScrLeft(bits1, leftShift); bits1 = *src++; bits |= FbScrRight(bits1, rightShift); FbDoLeftMaskByteMergeRop (dst, bits, startbyte, startmask); dst++; } n = nmiddle; if (destInvarient) { while (n--) { bits = FbScrLeft(bits1, leftShift); bits1 = *src++; bits |= FbScrRight(bits1, rightShift); *dst = FbDoDestInvarientMergeRop(bits); dst++; } } else { while (n--) { bits = FbScrLeft(bits1, leftShift); bits1 = *src++; bits |= FbScrRight(bits1, rightShift); *dst = FbDoMergeRop(bits, *dst); dst++; } } if (endmask) { bits = FbScrLeft(bits1, leftShift); if (FbScrLeft(endmask, rightShift)) { bits1 = *src; bits |= FbScrRight(bits1, rightShift); } FbDoRightMaskByteMergeRop (dst, bits, endbyte, endmask); } } } } }