summaryrefslogtreecommitdiff
path: root/xc/programs/Xserver/hw/xfree86/accel/ibm8514/imfill.s
diff options
context:
space:
mode:
Diffstat (limited to 'xc/programs/Xserver/hw/xfree86/accel/ibm8514/imfill.s')
-rw-r--r--xc/programs/Xserver/hw/xfree86/accel/ibm8514/imfill.s429
1 files changed, 429 insertions, 0 deletions
diff --git a/xc/programs/Xserver/hw/xfree86/accel/ibm8514/imfill.s b/xc/programs/Xserver/hw/xfree86/accel/ibm8514/imfill.s
new file mode 100644
index 000000000..5a0ef505e
--- /dev/null
+++ b/xc/programs/Xserver/hw/xfree86/accel/ibm8514/imfill.s
@@ -0,0 +1,429 @@
+/* $XConsortium: ibm8514imfl.s,v 1.2 94/03/29 09:37:34 dpw Exp $ */
+/******************************************************************************
+
+This is a assembly language version of the ibm8514ImageFill routine.
+It is renamed to enable a filter routine to catch the simplest
+cases and dispatch them to the more efficent ibm8514Imagewrite.
+
+Written by Hans Nasten ( nasten@everyware.se ) AUG 29, 1993.
+
+
+The equivalent C-code looks like this.
+
+void
+ibm8514RealImageFill(x, y, w, h, psrc, pwidth, pw, ph, pox, poy, alu, planemask)
+ int x;
+ int y;
+ int w;
+ int h;
+ unsigned char *psrc;
+ int pwidth;
+ int pw;
+ int ph;
+ int pox;
+ int poy;
+ short alu;
+ short planemask;
+{
+ int srcxsave, srcx, srcy, dstw, srcw, srch;
+ int wtemp, count, i, j;
+ unsigned short btemp, *p;
+
+
+ WaitQueue(2);
+ outw(FRGD_MIX, FSS_PCDATA | alu);
+ outw(WRT_MASK, planemask);
+ WaitQueue(7);
+ outw(CUR_X, (short)x);
+ outw(CUR_Y, (short)y);
+ if (w&1)
+ outw(MAJ_AXIS_PCNT, (short)w);
+ else
+ outw(MAJ_AXIS_PCNT, (short)w-1);
+ outw(MULTIFUNC_CNTL, MIN_AXIS_PCNT | h-1);
+ outw(MULTIFUNC_CNTL, SCISSORS_L | x);
+ outw(MULTIFUNC_CNTL, SCISSORS_R | (x+w-1));
+ outw(CMD, CMD_RECT | INC_Y | INC_X | DRAW |
+ PCDATA | WRTDATA | _16BIT | BYTSEQ);
+ WaitQueue(8);
+
+ modulus(y-poy,ph,srcy);
+ while( h > 0 ) {
+ srch = ( srcy+h > ph ? ph - srcy : h );
+ modulus(x-pox,pw,srcxsave);
+ for( i = 0; i < srch; i++ ) {
+ dstw = w;
+ srcx = srcxsave;
+ srcw = ( srcx+w > pw ? pw - srcx : w );
+ wtemp = 0;
+ while( dstw > 0 ) {
+ p = (unsigned short *)((unsigned char *)(psrc + pwidth * srcy + srcx));
+ if( wtemp & 1 ) {
+ outw( PIX_TRANS, (btemp & 0x00ff) | (*p << 8 ) );
+ p = (unsigned short *)((unsigned char *)(p)++);
+ wtemp = srcw - 1;
+ }
+ else
+ wtemp = srcw;
+
+ count = wtemp / 2;
+ for( j = 0; j < count; j++ )
+ outw( PIX_TRANS, *p++ );
+
+ dstw -= srcw;
+ srcx = 0;
+ if( wtemp & 1 ) {
+ if( dstw != 0 ) {
+ btemp = *p;
+ }
+ else
+ outw( PIX_TRANS, *p );
+ }
+ srcw = ( dstw < pw ? dstw : pw );
+ }
+ srcy++;
+ h--;
+ }
+ srcy = 0;
+ }
+ WaitQueue(3);
+ outw(FRGD_MIX, FSS_FRGDCOL | MIX_SRC);
+ outw(MULTIFUNC_CNTL, SCISSORS_L);
+ outw(MULTIFUNC_CNTL, SCISSORS_R | 1023);
+}
+
+******************************************************************************/
+
+#define _8514_ASM_
+#include "assyntax.h"
+#include "reg8514.h"
+
+ AS_BEGIN
+
+/*
+ * Defines for in arguments.
+ */
+#define x_arg REGOFF(8,EBP)
+#define y_arg REGOFF(12,EBP)
+#define w_arg REGOFF(16,EBP)
+#define h_arg REGOFF(20,EBP)
+#define psrc_arg REGOFF(24,EBP)
+#define pwidth_arg REGOFF(28,EBP)
+#define pw_arg REGOFF(32,EBP)
+#define ph_arg REGOFF(36,EBP)
+#define pox_arg REGOFF(40,EBP)
+#define poy_arg REGOFF(44,EBP)
+#define alu_arg REGOFF(48,EBP)
+#define planemask_arg REGOFF(52,EBP)
+
+/*
+ * Defines for local variables.
+ */
+#define srcy_loc REGOFF(-4,EBP)
+#define srch_loc REGOFF(-8,EBP)
+#define srcx_loc REGOFF(-12,EBP)
+#define srcxsave_loc REGOFF(-16,EBP)
+
+
+ SEG_TEXT
+ ALIGNTEXT4
+
+GLOBL GLNAME(ibm8514RealImageFill)
+
+GLNAME(ibm8514RealImageFill):
+ PUSH_L (EBP)
+ MOV_L (ESP,EBP)
+ SUB_L (CONST(16),ESP)
+ PUSH_L (EDI)
+ PUSH_L (ESI)
+ PUSH_L (EBX)
+/*
+ * Check if height or width is 0.
+ */
+ MOV_L (w_arg,EDI)
+ MOV_L (h_arg,EBX)
+ OR_L (EDI,EDI)
+ JZ (.finish)
+ OR_L (EBX,EBX)
+ JZ (.finish)
+/*
+ * Wait for 2 queue entries
+ */
+ MOV_L (GP_STAT,EDX)
+.wait_queue_0:
+ IN_B
+ TEST_B (CONST(0x40),AL)
+ JNZ (.wait_queue_0)
+/*
+ * Init 8514 registers.
+ */
+ MOV_L (FRGD_MIX,EDX)
+ MOV_W (alu_arg,AX)
+ OR_W (FSS_PCDATA,AX)
+ OUT_W
+
+ MOV_L (WRT_MASK,EDX)
+ MOV_W (planemask_arg,AX)
+ OUT_W
+/*
+ * Wait for 7 queue entries
+ */
+ MOV_L (GP_STAT,EDX)
+.wait_queue_1:
+ IN_B
+ TEST_B (CONST(0x02),AL)
+ JNZ (.wait_queue_1)
+
+ MOV_L (CUR_X,EDX)
+ MOV_W (x_arg,AX)
+ OUT_W
+
+ MOV_L (CUR_Y,EDX)
+ MOV_W (y_arg,AX)
+ OUT_W
+/*
+ * If the width is odd, program the 8514 registers for width+1.
+ */
+ MOV_L (MAJ_AXIS_PCNT,EDX)
+ MOV_W (w_arg,AX)
+ TEST_W (CONST(1),AX)
+ JNZ (.odd)
+
+ DEC_W (AX)
+.odd:
+ OUT_W
+
+.cont1:
+/*
+ * Set height and scissors registers.
+ * The scissors is used to clip the last unwanted pixel on
+ * lines with a odd length.
+ */
+ MOV_L (MULTIFUNC_CNTL,EDX)
+ MOV_W (h_arg,AX)
+ DEC_W (AX)
+/* OR_W (MIN_AXIS_PCNT,AX)*/
+ OUT_W
+ MOV_W (x_arg,AX)
+ OR_W (SCISSORS_L,AX)
+ OUT_W
+ MOV_W (x_arg,AX)
+ ADD_W (w_arg,AX)
+ DEC_W (AX)
+ OR_W (SCISSORS_R,AX)
+ OUT_W
+/*
+ * Give command to 8514.
+ * The command is : CMD_RECT | INC_Y | INC_X | DRAW
+ * | PCDATA | WRTDATA | _16BIT | BYTSEQ);
+ */
+ MOV_L (CMD,EDX)
+ MOV_W (GP_WRITE_CMD,AX)
+ OUT_W
+/*
+ * Do the modulo trick for the y coordinate.
+ * This is stolen from the gcc 2.4.5 output.
+ */
+ MOV_L (y_arg,EAX)
+ SUB_L (poy_arg,EAX)
+ CDQ
+ IDIV_L (ph_arg)
+ TEST_L (EDX,EDX)
+ JGE (.mod1)
+ ADD_L (ph_arg,EDX)
+.mod1:
+ MOV_L (EDX,srcy_loc)
+/*
+ * Wait until the fifo is empty.
+ */
+ MOV_L (GP_STAT,EDX)
+.wait_queue_2:
+ IN_B
+ TEST_B (CONST(1),AL)
+ JNZ (.wait_queue_2)
+
+ CLD
+/*
+ * Process all lines on screen repeating the pixmap if needed.
+ * This loop paints from the present y location to the end of
+ * the pixmap. ( or to the end of the screen area if the pixmap
+ * is nonrepeating ).
+ * Start by checking if the pixmap is high enough to completely
+ * cover the screen area vertically.
+ */
+.next_pixmap_vertical:
+ MOV_L (srcy_loc,EAX)
+ ADD_L (h_arg,EAX)
+ MOV_L (ph_arg,EBX)
+ CMP_L (EBX,EAX)
+ JLE (.cmp_get_h)
+
+ SUB_L (srcy_loc,EBX)
+ JMP (.cmp2)
+.cmp_get_h:
+ MOV_L (h_arg,EBX)
+.cmp2:
+ MOV_L (EBX,srch_loc)
+ SUB_L (EBX,h_arg)
+/*
+ * Do the modulo trick for the x coordinate.
+ */
+ MOV_L (x_arg,EAX)
+ SUB_L (pox_arg,EAX)
+ CDQ
+ IDIV_L (pw_arg)
+ TEST_L (EDX,EDX)
+ JGE (.mod2)
+ ADD_L (pw_arg,EDX)
+.mod2:
+ MOV_L (EDX,srcxsave_loc)
+/*
+ * Process one line of pixels in the pixmap,
+ * repeating the pixmap horisontally if needed.
+ * Start by checking if the pixmap is wide enough to completely
+ * cover the screen area horisontally.
+ */
+.next_pixmap_line:
+ MOV_L (w_arg,EBX)
+ MOV_L (srcxsave_loc,EAX)
+ ADD_L (EBX,EAX)
+ MOV_L (pw_arg,EDI)
+ CMP_L (EDI,EAX)
+ JLE (.cmp_get_w)
+
+ SUB_L (srcxsave_loc,EDI)
+ JMP (.cmp4)
+
+.cmp_get_w:
+ MOV_L (EBX,EDI)
+.cmp4:
+ SUB_L (ECX,ECX)
+/*
+ * Calculate and save a pointer to the first pixel on this line in
+ * the pixmap. Add the x offset for the first lap and then skip the
+ * first part of the loop.
+ * ( all this just to avoid one multiplication and two add's in the loop ).
+ */
+ MOV_L (pwidth_arg,EAX)
+ MUL_L (srcy_loc)
+ ADD_L (psrc_arg,EAX)
+ MOV_L (EAX,srcx_loc)
+ MOV_L (srcxsave_loc,ESI)
+ ADD_L (EAX,ESI)
+ JMP (.previously_even)
+/*
+ * Copy one line of the pixmap to the screen.
+ * This loop paints one line of pixels from the pixmap onto the screen.
+ */
+.next_pixmap_horizontal:
+ MOV_L (srcx_loc,ESI)
+/*
+ * Is there a pixel leftover from the previous lap in the loop ?
+ * ( since CH is used to store the saved pixel, only CL is in scope ).
+ */
+ TEST_B (CONST(1),CL)
+ JZ (.previously_even)
+/*
+ * Yes there was, combine it with the first pixel and write them
+ * to the 8514 engine.
+ */
+ MOV_B (CH,AL)
+ MOV_B (REGIND(ESI),AH)
+ MOV_L (PIX_TRANS,EDX)
+ OUT_W
+ INC_L (ESI)
+ MOV_L (EDI,ECX)
+ DEC_L (ECX)
+ JMP (.move_pixels)
+
+.previously_even:
+ MOV_L (EDI,ECX)
+ MOV_L (PIX_TRANS,EDX)
+/*
+ * Move the rest of the line using a "rep outsw" instruction.
+ */
+.move_pixels:
+ MOV_L (ECX,EAX)
+ SHR_L (CONST(1),ECX)
+ REP
+ OUTS_W
+ MOV_L (EAX,ECX)
+ SUB_L (EDI,EBX)
+/*
+ * Is there a pixel left unwritten ?
+ */
+ TEST_W (CONST(1),CX)
+ JZ (.all_written)
+/*
+ * Yes, check if this is the last ( or only ) repetition of the pixmap.
+ */
+ AND_L (EBX,EBX)
+ JZ (.write_pixel)
+/*
+ * No, there is more to come. Save this pixel for later.
+ */
+ MOV_B (REGIND(ESI),CH)
+ JMP (.all_written)
+/*
+ * Yes, this is the last pixel displayed on this line.
+ * Write it and let the scissors cut the unwanted extra pixel.
+ */
+.write_pixel:
+ MOV_B (REGIND(ESI),AL)
+ OUT_W
+/*
+ * Check if the next repetition of the pixmap is using the entire
+ * pixmap width.
+ */
+.all_written:
+ MOV_L (pw_arg,EAX)
+ CMP_L (EAX,EBX)
+ JG (.cmp6)
+
+ MOV_L (EBX,EAX)
+.cmp6:
+ MOV_L (EAX,EDI)
+ AND_L (EBX,EBX)
+ JNZ (.next_pixmap_horizontal)
+
+ INC_L (srcy_loc)
+ DEC_L (srch_loc)
+ JNZ (.next_pixmap_line)
+
+ SUB_L (EDX,EDX)
+ MOV_L (EDX,srcy_loc)
+ CMP_L (EDX,h_arg)
+ JNZ (.next_pixmap_vertical)
+
+/*
+ * Wait until room for 3 entries in the fifo.
+ */
+ MOV_L (GP_STAT,EDX)
+.wait_queue_3:
+ IN_B
+ TEST_B (CONST(0x20),AL)
+ JNZ (.wait_queue_3)
+/*
+ * Reset FRGD_MIX to default.
+ */
+ MOV_L (FRGD_MIX,EDX)
+ MOV_W (GP_DEF_FRGD_MIX,AX)
+ OUT_W
+
+/*
+ * Reset the scissors regsiters.
+ */
+ MOV_L (MULTIFUNC_CNTL,EDX)
+ MOV_W (SCISSORS_L,AX)
+ OUT_W
+ MOV_W (SCISSORS_R,AX)
+ OR_W (CONST(1023),AX)
+ OUT_W
+
+.finish:
+ POP_L (EBX)
+ POP_L (ESI)
+ POP_L (EDI)
+ ADD_L (CONST(16),ESP)
+ POP_L (EBP)
+ RET