diff options
Diffstat (limited to 'xc/programs/Xserver/hw/xfree86/accel/ibm8514/imfill.s')
-rw-r--r-- | xc/programs/Xserver/hw/xfree86/accel/ibm8514/imfill.s | 429 |
1 files changed, 429 insertions, 0 deletions
diff --git a/xc/programs/Xserver/hw/xfree86/accel/ibm8514/imfill.s b/xc/programs/Xserver/hw/xfree86/accel/ibm8514/imfill.s new file mode 100644 index 000000000..5a0ef505e --- /dev/null +++ b/xc/programs/Xserver/hw/xfree86/accel/ibm8514/imfill.s @@ -0,0 +1,429 @@ +/* $XConsortium: ibm8514imfl.s,v 1.2 94/03/29 09:37:34 dpw Exp $ */ +/****************************************************************************** + +This is a assembly language version of the ibm8514ImageFill routine. +It is renamed to enable a filter routine to catch the simplest +cases and dispatch them to the more efficent ibm8514Imagewrite. + +Written by Hans Nasten ( nasten@everyware.se ) AUG 29, 1993. + + +The equivalent C-code looks like this. + +void +ibm8514RealImageFill(x, y, w, h, psrc, pwidth, pw, ph, pox, poy, alu, planemask) + int x; + int y; + int w; + int h; + unsigned char *psrc; + int pwidth; + int pw; + int ph; + int pox; + int poy; + short alu; + short planemask; +{ + int srcxsave, srcx, srcy, dstw, srcw, srch; + int wtemp, count, i, j; + unsigned short btemp, *p; + + + WaitQueue(2); + outw(FRGD_MIX, FSS_PCDATA | alu); + outw(WRT_MASK, planemask); + WaitQueue(7); + outw(CUR_X, (short)x); + outw(CUR_Y, (short)y); + if (w&1) + outw(MAJ_AXIS_PCNT, (short)w); + else + outw(MAJ_AXIS_PCNT, (short)w-1); + outw(MULTIFUNC_CNTL, MIN_AXIS_PCNT | h-1); + outw(MULTIFUNC_CNTL, SCISSORS_L | x); + outw(MULTIFUNC_CNTL, SCISSORS_R | (x+w-1)); + outw(CMD, CMD_RECT | INC_Y | INC_X | DRAW | + PCDATA | WRTDATA | _16BIT | BYTSEQ); + WaitQueue(8); + + modulus(y-poy,ph,srcy); + while( h > 0 ) { + srch = ( srcy+h > ph ? ph - srcy : h ); + modulus(x-pox,pw,srcxsave); + for( i = 0; i < srch; i++ ) { + dstw = w; + srcx = srcxsave; + srcw = ( srcx+w > pw ? pw - srcx : w ); + wtemp = 0; + while( dstw > 0 ) { + p = (unsigned short *)((unsigned char *)(psrc + pwidth * srcy + srcx)); + if( wtemp & 1 ) { + outw( PIX_TRANS, (btemp & 0x00ff) | (*p << 8 ) ); + p = (unsigned short *)((unsigned char *)(p)++); + wtemp = srcw - 1; + } + else + wtemp = srcw; + + count = wtemp / 2; + for( j = 0; j < count; j++ ) + outw( PIX_TRANS, *p++ ); + + dstw -= srcw; + srcx = 0; + if( wtemp & 1 ) { + if( dstw != 0 ) { + btemp = *p; + } + else + outw( PIX_TRANS, *p ); + } + srcw = ( dstw < pw ? dstw : pw ); + } + srcy++; + h--; + } + srcy = 0; + } + WaitQueue(3); + outw(FRGD_MIX, FSS_FRGDCOL | MIX_SRC); + outw(MULTIFUNC_CNTL, SCISSORS_L); + outw(MULTIFUNC_CNTL, SCISSORS_R | 1023); +} + +******************************************************************************/ + +#define _8514_ASM_ +#include "assyntax.h" +#include "reg8514.h" + + AS_BEGIN + +/* + * Defines for in arguments. + */ +#define x_arg REGOFF(8,EBP) +#define y_arg REGOFF(12,EBP) +#define w_arg REGOFF(16,EBP) +#define h_arg REGOFF(20,EBP) +#define psrc_arg REGOFF(24,EBP) +#define pwidth_arg REGOFF(28,EBP) +#define pw_arg REGOFF(32,EBP) +#define ph_arg REGOFF(36,EBP) +#define pox_arg REGOFF(40,EBP) +#define poy_arg REGOFF(44,EBP) +#define alu_arg REGOFF(48,EBP) +#define planemask_arg REGOFF(52,EBP) + +/* + * Defines for local variables. + */ +#define srcy_loc REGOFF(-4,EBP) +#define srch_loc REGOFF(-8,EBP) +#define srcx_loc REGOFF(-12,EBP) +#define srcxsave_loc REGOFF(-16,EBP) + + + SEG_TEXT + ALIGNTEXT4 + +GLOBL GLNAME(ibm8514RealImageFill) + +GLNAME(ibm8514RealImageFill): + PUSH_L (EBP) + MOV_L (ESP,EBP) + SUB_L (CONST(16),ESP) + PUSH_L (EDI) + PUSH_L (ESI) + PUSH_L (EBX) +/* + * Check if height or width is 0. + */ + MOV_L (w_arg,EDI) + MOV_L (h_arg,EBX) + OR_L (EDI,EDI) + JZ (.finish) + OR_L (EBX,EBX) + JZ (.finish) +/* + * Wait for 2 queue entries + */ + MOV_L (GP_STAT,EDX) +.wait_queue_0: + IN_B + TEST_B (CONST(0x40),AL) + JNZ (.wait_queue_0) +/* + * Init 8514 registers. + */ + MOV_L (FRGD_MIX,EDX) + MOV_W (alu_arg,AX) + OR_W (FSS_PCDATA,AX) + OUT_W + + MOV_L (WRT_MASK,EDX) + MOV_W (planemask_arg,AX) + OUT_W +/* + * Wait for 7 queue entries + */ + MOV_L (GP_STAT,EDX) +.wait_queue_1: + IN_B + TEST_B (CONST(0x02),AL) + JNZ (.wait_queue_1) + + MOV_L (CUR_X,EDX) + MOV_W (x_arg,AX) + OUT_W + + MOV_L (CUR_Y,EDX) + MOV_W (y_arg,AX) + OUT_W +/* + * If the width is odd, program the 8514 registers for width+1. + */ + MOV_L (MAJ_AXIS_PCNT,EDX) + MOV_W (w_arg,AX) + TEST_W (CONST(1),AX) + JNZ (.odd) + + DEC_W (AX) +.odd: + OUT_W + +.cont1: +/* + * Set height and scissors registers. + * The scissors is used to clip the last unwanted pixel on + * lines with a odd length. + */ + MOV_L (MULTIFUNC_CNTL,EDX) + MOV_W (h_arg,AX) + DEC_W (AX) +/* OR_W (MIN_AXIS_PCNT,AX)*/ + OUT_W + MOV_W (x_arg,AX) + OR_W (SCISSORS_L,AX) + OUT_W + MOV_W (x_arg,AX) + ADD_W (w_arg,AX) + DEC_W (AX) + OR_W (SCISSORS_R,AX) + OUT_W +/* + * Give command to 8514. + * The command is : CMD_RECT | INC_Y | INC_X | DRAW + * | PCDATA | WRTDATA | _16BIT | BYTSEQ); + */ + MOV_L (CMD,EDX) + MOV_W (GP_WRITE_CMD,AX) + OUT_W +/* + * Do the modulo trick for the y coordinate. + * This is stolen from the gcc 2.4.5 output. + */ + MOV_L (y_arg,EAX) + SUB_L (poy_arg,EAX) + CDQ + IDIV_L (ph_arg) + TEST_L (EDX,EDX) + JGE (.mod1) + ADD_L (ph_arg,EDX) +.mod1: + MOV_L (EDX,srcy_loc) +/* + * Wait until the fifo is empty. + */ + MOV_L (GP_STAT,EDX) +.wait_queue_2: + IN_B + TEST_B (CONST(1),AL) + JNZ (.wait_queue_2) + + CLD +/* + * Process all lines on screen repeating the pixmap if needed. + * This loop paints from the present y location to the end of + * the pixmap. ( or to the end of the screen area if the pixmap + * is nonrepeating ). + * Start by checking if the pixmap is high enough to completely + * cover the screen area vertically. + */ +.next_pixmap_vertical: + MOV_L (srcy_loc,EAX) + ADD_L (h_arg,EAX) + MOV_L (ph_arg,EBX) + CMP_L (EBX,EAX) + JLE (.cmp_get_h) + + SUB_L (srcy_loc,EBX) + JMP (.cmp2) +.cmp_get_h: + MOV_L (h_arg,EBX) +.cmp2: + MOV_L (EBX,srch_loc) + SUB_L (EBX,h_arg) +/* + * Do the modulo trick for the x coordinate. + */ + MOV_L (x_arg,EAX) + SUB_L (pox_arg,EAX) + CDQ + IDIV_L (pw_arg) + TEST_L (EDX,EDX) + JGE (.mod2) + ADD_L (pw_arg,EDX) +.mod2: + MOV_L (EDX,srcxsave_loc) +/* + * Process one line of pixels in the pixmap, + * repeating the pixmap horisontally if needed. + * Start by checking if the pixmap is wide enough to completely + * cover the screen area horisontally. + */ +.next_pixmap_line: + MOV_L (w_arg,EBX) + MOV_L (srcxsave_loc,EAX) + ADD_L (EBX,EAX) + MOV_L (pw_arg,EDI) + CMP_L (EDI,EAX) + JLE (.cmp_get_w) + + SUB_L (srcxsave_loc,EDI) + JMP (.cmp4) + +.cmp_get_w: + MOV_L (EBX,EDI) +.cmp4: + SUB_L (ECX,ECX) +/* + * Calculate and save a pointer to the first pixel on this line in + * the pixmap. Add the x offset for the first lap and then skip the + * first part of the loop. + * ( all this just to avoid one multiplication and two add's in the loop ). + */ + MOV_L (pwidth_arg,EAX) + MUL_L (srcy_loc) + ADD_L (psrc_arg,EAX) + MOV_L (EAX,srcx_loc) + MOV_L (srcxsave_loc,ESI) + ADD_L (EAX,ESI) + JMP (.previously_even) +/* + * Copy one line of the pixmap to the screen. + * This loop paints one line of pixels from the pixmap onto the screen. + */ +.next_pixmap_horizontal: + MOV_L (srcx_loc,ESI) +/* + * Is there a pixel leftover from the previous lap in the loop ? + * ( since CH is used to store the saved pixel, only CL is in scope ). + */ + TEST_B (CONST(1),CL) + JZ (.previously_even) +/* + * Yes there was, combine it with the first pixel and write them + * to the 8514 engine. + */ + MOV_B (CH,AL) + MOV_B (REGIND(ESI),AH) + MOV_L (PIX_TRANS,EDX) + OUT_W + INC_L (ESI) + MOV_L (EDI,ECX) + DEC_L (ECX) + JMP (.move_pixels) + +.previously_even: + MOV_L (EDI,ECX) + MOV_L (PIX_TRANS,EDX) +/* + * Move the rest of the line using a "rep outsw" instruction. + */ +.move_pixels: + MOV_L (ECX,EAX) + SHR_L (CONST(1),ECX) + REP + OUTS_W + MOV_L (EAX,ECX) + SUB_L (EDI,EBX) +/* + * Is there a pixel left unwritten ? + */ + TEST_W (CONST(1),CX) + JZ (.all_written) +/* + * Yes, check if this is the last ( or only ) repetition of the pixmap. + */ + AND_L (EBX,EBX) + JZ (.write_pixel) +/* + * No, there is more to come. Save this pixel for later. + */ + MOV_B (REGIND(ESI),CH) + JMP (.all_written) +/* + * Yes, this is the last pixel displayed on this line. + * Write it and let the scissors cut the unwanted extra pixel. + */ +.write_pixel: + MOV_B (REGIND(ESI),AL) + OUT_W +/* + * Check if the next repetition of the pixmap is using the entire + * pixmap width. + */ +.all_written: + MOV_L (pw_arg,EAX) + CMP_L (EAX,EBX) + JG (.cmp6) + + MOV_L (EBX,EAX) +.cmp6: + MOV_L (EAX,EDI) + AND_L (EBX,EBX) + JNZ (.next_pixmap_horizontal) + + INC_L (srcy_loc) + DEC_L (srch_loc) + JNZ (.next_pixmap_line) + + SUB_L (EDX,EDX) + MOV_L (EDX,srcy_loc) + CMP_L (EDX,h_arg) + JNZ (.next_pixmap_vertical) + +/* + * Wait until room for 3 entries in the fifo. + */ + MOV_L (GP_STAT,EDX) +.wait_queue_3: + IN_B + TEST_B (CONST(0x20),AL) + JNZ (.wait_queue_3) +/* + * Reset FRGD_MIX to default. + */ + MOV_L (FRGD_MIX,EDX) + MOV_W (GP_DEF_FRGD_MIX,AX) + OUT_W + +/* + * Reset the scissors regsiters. + */ + MOV_L (MULTIFUNC_CNTL,EDX) + MOV_W (SCISSORS_L,AX) + OUT_W + MOV_W (SCISSORS_R,AX) + OR_W (CONST(1023),AX) + OUT_W + +.finish: + POP_L (EBX) + POP_L (ESI) + POP_L (EDI) + ADD_L (CONST(16),ESP) + POP_L (EBP) + RET |