From 5df79bd84656b57b729e7737e7fa3b4d1c61a4fc Mon Sep 17 00:00:00 2001 From: mdaenzer Date: Tue, 12 Sep 2000 19:17:11 +0000 Subject: merge trunk to ppc-1-0-0-branch --- xc/extras/Mesa/src/X86/3dnow.c | 2 +- xc/extras/Mesa/src/X86/3dnow_norm_raw.S | 418 +++++++++++++------------- xc/extras/Mesa/src/X86/katmai.c | 2 +- xc/extras/Mesa/src/X86/katmai_xform_masked1.S | 2 +- xc/extras/Mesa/src/X86/katmai_xform_masked2.S | 2 +- xc/extras/Mesa/src/X86/katmai_xform_masked3.S | 2 +- xc/extras/Mesa/src/X86/katmai_xform_masked4.S | 2 +- xc/extras/Mesa/src/X86/katmai_xform_raw1.S | 2 +- xc/extras/Mesa/src/X86/katmai_xform_raw2.S | 2 +- xc/extras/Mesa/src/X86/katmai_xform_raw3.S | 2 +- xc/extras/Mesa/src/X86/katmai_xform_raw4.S | 2 +- xc/extras/Mesa/src/X86/x86.c | 9 +- xc/extras/Mesa/src/X86/x86a.S | 2 +- 13 files changed, 224 insertions(+), 225 deletions(-) (limited to 'xc/extras/Mesa/src/X86') diff --git a/xc/extras/Mesa/src/X86/3dnow.c b/xc/extras/Mesa/src/X86/3dnow.c index 5255a8c7f..c252f70ea 100644 --- a/xc/extras/Mesa/src/X86/3dnow.c +++ b/xc/extras/Mesa/src/X86/3dnow.c @@ -49,7 +49,7 @@ #define XFORM_ARGS GLvector4f *to_vec, \ - const GLmatrix *mat, \ + const GLfloat m[16], \ const GLvector4f *from_vec, \ const GLubyte *mask, \ const GLubyte flag diff --git a/xc/extras/Mesa/src/X86/3dnow_norm_raw.S b/xc/extras/Mesa/src/X86/3dnow_norm_raw.S index 7d4eed85b..427ba295c 100644 --- a/xc/extras/Mesa/src/X86/3dnow_norm_raw.S +++ b/xc/extras/Mesa/src/X86/3dnow_norm_raw.S @@ -61,7 +61,7 @@ #define V3F_COUNT 8 #define V3F_STRIDE 12 -#define MAT_INV 64 +#define MAT_INV 4 #define M(i) REGOFF(i * 4, ECX) #define STRIDE REGOFF(12, ESI) @@ -72,28 +72,24 @@ GLOBL GLNAME(gl_3dnow_transform_normalize_normals_raw) GLNAME(gl_3dnow_transform_normalize_normals_raw): #define FRAME_OFFSET 12 - SUB_L ( CONST(4), ESP ) - PUSH_L ( EDI ) - PUSH_L ( ESI ) - - MOV_L ( ARG_LENGTHS, EDI ) - MOV_L ( ARG_IN, EAX ) - MOV_L ( ARG_DEST, EDX ) - MOV_L ( REGOFF(V3F_COUNT, EAX), EAX ) /* dest->count = in->count */ - MOV_L ( EAX, REGOFF(V3F_COUNT, EDX) ) - MOV_L ( ARG_IN, ESI ) - MOV_L ( ARG_MAT, ECX ) - MOV_L ( REGOFF(V3F_START, EDX), EAX ) /* dest->start */ - MOV_L ( REGOFF(MAT_INV, ECX), ECX ) /* mat->inv */ - MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */ + PUSH_L ( EDI ) + PUSH_L ( ESI ) + PUSH_L ( EBP ) -ALIGNTEXT32 - - CMP_L ( CONST(0), REGOFF (V3F_COUNT, ESI ) ) /* count > 0 ?? */ + MOV_L ( ARG_LENGTHS, EDI ) + MOV_L ( ARG_IN, ESI ) + MOV_L ( ARG_DEST, EAX ) + MOV_L ( REGOFF(V3F_COUNT, ESI), EBP ) /* dest->count = in->count */ + MOV_L ( EBP, REGOFF(V3F_COUNT, EAX) ) + MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */ + MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */ + MOV_L ( ARG_MAT, ECX ) + MOV_L ( REGOFF(MAT_INV, ECX), ECX ) /* mat->inv */ + + CMP_L ( CONST(0), EBP ) /* count > 0 ?? */ JE ( LLBL (G3TN_end) ) - PUSH_L ( EBP ) MOV_L ( REGOFF (V3F_COUNT, ESI), EBP ) FEMMS @@ -101,7 +97,8 @@ ALIGNTEXT32 PUSH_L ( EAX ) PUSH_L ( EDX ) /* save counter & pointer for */ /* the normalize pass */ - + #undef FRAME_OFFSET + #define FRAME_OFFSET 24 MOVQ ( M(0), MM3 ) /* m1 | m0 */ MOVQ ( M(4), MM4 ) /* m5 | m4 */ @@ -115,8 +112,6 @@ ALIGNTEXT32 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */ JNE ( LLBL (G3TN_scale_end ) ) - #undef FRAME_OFFSET - #define FRAME_OFFSET 28 MOVD ( ARG_SCALE, MM0 ) /* | scale */ PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */ @@ -138,6 +133,8 @@ LLBL (G3TN_transform): PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */ ADD_L ( CONST(12), EAX ) /* next r */ + PREFETCHW ( REGIND(EAX) ) + PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */ PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */ @@ -156,6 +153,8 @@ LLBL (G3TN_transform): PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m*/ ADD_L ( STRIDE, EDX ) /* next normal */ + PREFETCH ( REGIND(EDX) ) + MOVD ( MM1, REGOFF(-4, EAX) ) /* write r2 */ MOVQ ( REGIND (EDX), MM0 ) /* x1 | x0 */ @@ -177,19 +176,24 @@ LLBL (G3TN_transform): ALIGNTEXT32 LLBL (G3TN_norm_w_lengths): + + PREFETCHW ( REGOFF(12,EAX) ) + MOVD ( REGIND (EDI), MM3 ) /* | length (x) */ PFMUL ( MM3, MM1 ) /* | x2 (normalize*/ PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */ PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalize*/ + ADD_L ( STRIDE, EDX ) /* next normal */ + ADD_L ( CONST(4), EDI ) /* next length */ + + PREFETCH ( REGIND(EDI) ) + MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */ MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */ - ADD_L ( STRIDE, EDX ) /* next normal */ ADD_L ( CONST(12), EAX ) /* next r */ - - ADD_L ( CONST(4), EDI ) /* next length */ DEC_L ( EBP ) /* decrement normal counter */ MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ @@ -199,6 +203,9 @@ LLBL (G3TN_norm_w_lengths): ALIGNTEXT32 LLBL (G3TN_norm): + + PREFETCHW ( REGIND(EAX) ) + MOVQ ( MM0, MM3 ) /* x1 | x0 */ MOVQ ( MM1, MM4 ) /* | x2 */ @@ -233,46 +240,39 @@ LLBL (G3TN_norm): LLBL (G3TN_exit_3dnow): FEMMS - POP_L ( EBP ) LLBL (G3TN_end): + POP_L ( EBP ) POP_L ( ESI ) POP_L ( EDI ) - POP_L ( ECX ) RET ALIGNTEXT16 - GLOBL GLNAME(gl_3dnow_transform_normalize_normals_no_rot_raw) GLNAME(gl_3dnow_transform_normalize_normals_no_rot_raw): #undef FRAME_OFFSET #define FRAME_OFFSET 12 - SUB_L ( CONST(4), ESP ) - PUSH_L ( EDI ) - PUSH_L ( ESI ) - - MOV_L ( ARG_LENGTHS, EDI ) - MOV_L ( ARG_IN, EAX ) - MOV_L ( ARG_DEST, EDX ) - MOV_L ( REGOFF(V3F_COUNT, EAX), EAX ) /* dest->count = in->count */ - MOV_L ( EAX, REGOFF(V3F_COUNT, EDX) ) - MOV_L ( ARG_IN, ESI ) - MOV_L ( ARG_MAT, ECX ) - MOV_L ( REGOFF(V3F_START, EDX), EAX ) /* dest->start */ - MOV_L ( REGOFF(MAT_INV, ECX), ECX ) /* mat->inv */ - MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */ + PUSH_L ( EDI ) + PUSH_L ( ESI ) + PUSH_L ( EBP ) -ALIGNTEXT32 - - CMP_L ( CONST(0), REGOFF (V3F_COUNT, ESI ) ) /* count > 0 ?? */ + MOV_L ( ARG_LENGTHS, EDI ) + MOV_L ( ARG_IN, ESI ) + MOV_L ( ARG_DEST, EAX ) + MOV_L ( REGOFF(V3F_COUNT, ESI), EBP ) /* dest->count = in->count */ + MOV_L ( EBP, REGOFF(V3F_COUNT, EAX) ) + MOV_L ( ARG_MAT, ECX ) + MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */ + MOV_L ( REGOFF(MAT_INV, ECX), ECX ) /* mat->inv */ + MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */ + + CMP_L ( CONST(0), EBP ) /* count > 0 ?? */ JE ( LLBL (G3TNNR_end) ) - PUSH_L ( EBP ) - MOV_L ( REGOFF (V3F_COUNT, ESI), EBP ) FEMMS MOVD ( M(0), MM0 ) /* | m0 */ @@ -284,14 +284,13 @@ ALIGNTEXT32 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */ JNE ( LLBL (G3TNNR_scale_end ) ) - #undef FRAME_OFFSET - #define FRAME_OFFSET 16 MOVD ( ARG_SCALE, MM7 ) /* | scale */ PUNPCKLDQ ( MM7, MM7 ) /* scale | scale */ PFMUL ( MM7, MM0 ) /* scale * m5 | scale * m0 */ PFMUL ( MM7, MM2 ) /* scale * m10 | scale * m10 */ +ALIGNTEXT32 LLBL (G3TNNR_scale_end): MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */ MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */ @@ -302,10 +301,16 @@ LLBL (G3TNNR_scale_end): MOVD ( REGIND(EDI), MM3 ) /* | length (x) */ +ALIGNTEXT32 LLBL (G3TNNR_norm_w_lengths): /* use precalculated lengths */ + + PREFETCHW ( REGIND(EAX) ) + PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */ ADD_L ( STRIDE, EDX ) /* next normal */ + PREFETCH ( REGIND(EDX) ) + PFMUL ( MM2, MM7 ) /* | x2*m10 */ ADD_L ( CONST(12), EAX ) /* next r */ @@ -327,7 +332,11 @@ LLBL (G3TNNR_norm_w_lengths): /* use precalculated lengths */ JA ( LLBL (G3TNNR_norm_w_lengths) ) JMP ( LLBL (G3TNNR_exit_3dnow) ) +ALIGNTEXT32 LLBL (G3TNNR_norm): /* need to calculate lengths */ + + PREFETCHW ( REGIND(EAX) ) + PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */ ADD_L ( CONST(12), EAX ) /* next r */ @@ -337,12 +346,15 @@ LLBL (G3TNNR_norm): /* need to calculate lengths */ MOVQ ( MM7, MM4 ) /* | x2 (transformed) */ PFMUL ( MM6, MM3 ) /* x1*x1 | x0*x0 */ + PFMUL ( MM7, MM4 ) /* | x2*x2 */ PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1 */ PFADD ( MM4, MM3 ) /* | x0*x0+x1*x1+x2*x2*/ ADD_L ( STRIDE, EDX ) /* next normal */ + PREFETCH ( REGIND(EDX) ) + PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */ MOVQ ( MM5, MM4 ) @@ -367,12 +379,11 @@ LLBL (G3TNNR_norm): /* need to calculate lengths */ LLBL (G3TNNR_exit_3dnow): FEMMS - POP_L ( EBP ) LLBL (G3TNNR_end): + POP_L ( EBP ) POP_L ( ESI ) POP_L ( EDI ) - POP_L ( ECX ) RET @@ -381,38 +392,31 @@ LLBL (G3TNNR_end): ALIGNTEXT16 - GLOBL GLNAME(gl_3dnow_transform_rescale_normals_no_rot_raw) GLNAME(gl_3dnow_transform_rescale_normals_no_rot_raw): #undef FRAME_OFFSET #define FRAME_OFFSET 12 - SUB_L ( CONST(4), ESP ) - PUSH_L ( EDI ) - PUSH_L ( ESI ) - - MOV_L ( ARG_IN, EAX ) - MOV_L ( ARG_DEST, EDX ) - MOV_L ( REGOFF(V3F_COUNT, EAX), EAX ) /* dest->count = in->count */ - MOV_L ( EAX, REGOFF(V3F_COUNT, EDX) ) - MOV_L ( ARG_IN, ESI ) - MOV_L ( ARG_MAT, ECX ) - MOV_L ( REGOFF(V3F_START, EDX), EAX ) /* dest->start */ - MOV_L ( REGOFF(MAT_INV, ECX), ECX ) /* mat->inv */ - MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */ + PUSH_L ( EDI ) + PUSH_L ( ESI ) + PUSH_L ( EBP ) -ALIGNTEXT32 - - CMP_L ( CONST(0), REGOFF (V3F_COUNT, ESI) ) + MOV_L ( ARG_IN, EAX ) + MOV_L ( ARG_DEST, EDX ) + MOV_L ( REGOFF(V3F_COUNT, EAX), EBP ) /* dest->count = in->count */ + MOV_L ( EBP, REGOFF(V3F_COUNT, EDX) ) + MOV_L ( ARG_IN, ESI ) + MOV_L ( ARG_MAT, ECX ) + MOV_L ( REGOFF(MAT_INV, ECX), ECX ) /* mat->inv */ + MOV_L ( REGOFF(V3F_START, EDX), EAX ) /* dest->start */ + MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */ + + CMP_L ( CONST(0), EBP ) JE ( LLBL (G3TRNR_end) ) - PUSH_L ( EBP ) - MOV_L ( REGOFF (V3F_COUNT, ESI), EBP ) FEMMS - #undef FRAME_OFFSET - #define FRAME_OFFSET 16 MOVD ( ARG_SCALE, MM6 ) /* | scale */ PUNPCKLDQ ( MM6, MM6 ) /* scale | scale */ @@ -428,11 +432,15 @@ ALIGNTEXT32 MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */ ALIGNTEXT32 - LLBL (G3TRNR_rescale): + + PREFETCHW ( REGIND(EAX) ) + PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */ ADD_L ( STRIDE, EDX ) /* next normal */ + PREFETCH ( REGIND(EDX) ) + PFMUL ( MM2, MM5 ) /* | x2*m10 */ ADD_L ( CONST(12), EAX ) /* next r */ @@ -446,12 +454,11 @@ LLBL (G3TRNR_rescale): JA ( LLBL (G3TRNR_rescale) ) /* cnt > 0 ? -> process next normal */ FEMMS - POP_L ( EBP ) LLBL (G3TRNR_end): + POP_L ( EBP ) POP_L ( ESI ) POP_L ( EDI ) - POP_L ( ECX ) RET @@ -463,24 +470,19 @@ GLOBL GLNAME(gl_3dnow_transform_rescale_normals_raw) GLNAME(gl_3dnow_transform_rescale_normals_raw): #undef FRAME_OFFSET - #define FRAME_OFFSET 12 - SUB_L ( CONST(4), ESP ) - PUSH_L ( EDI ) - PUSH_L ( ESI ) - - MOV_L (REGOFF(24,ESP), EAX) - MOV_L (REGOFF(36,ESP), EDX) - MOV_L (REGOFF(8,EAX), EAX) - MOV_L (EAX, REGOFF(8,EDX)) - MOV_L (REGOFF(24,ESP), EDI) - MOV_L (REGOFF(4,EDX), EAX) - MOV_L (REGOFF(16,ESP), ECX) - MOV_L (REGOFF(24,ESP), ESI) - MOV_L (REGOFF(64,ECX), ECX) - MOV_L (REGOFF(4,EDI), EDX) - MOV_L (REGOFF(8,EDI), EDI) + #define FRAME_OFFSET 8 -ALIGNTEXT32 + PUSH_L ( EDI ) + PUSH_L ( ESI ) + + MOV_L ( ARG_IN, ESI ) + MOV_L ( ARG_DEST, EAX ) + MOV_L ( ARG_MAT, ECX ) + MOV_L ( REGOFF(V3F_COUNT, ESI), EDI ) /* dest->count = in->count */ + MOV_L ( EDI, REGOFF(V3F_COUNT, EAX) ) + MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */ + MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */ + MOV_L ( REGOFF(MAT_INV, ECX), ECX ) /* mat->inv */ CMP_L ( CONST(0), EDI ) JE ( LLBL (G3TR_end) ) @@ -490,12 +492,12 @@ ALIGNTEXT32 MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */ MOVQ ( REGOFF(16,ECX), MM4 ) /* m5 | m4 */ - MOVD ( REGOFF(20, ESP), MM0 ) /* scale */ + MOVD ( ARG_SCALE, MM0 ) /* scale */ MOVD ( REGOFF(8,ECX), MM5 ) /* | m2 */ PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */ - PUNPCKLDQ ( REGOFF(24, ECX), MM5 ) + PUNPCKLDQ ( REGOFF(24, ECX), MM5 ) PFMUL ( MM0, MM3 ) /* scale*m1 | scale*m0 */ MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8*/ @@ -510,7 +512,11 @@ ALIGNTEXT32 PFMUL ( MM0, MM7 ) /* | scale*m10 */ MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */ +ALIGNTEXT32 LLBL (G3TR_rescale): + + PREFETCHW ( REGIND(EAX) ) + MOVQ ( MM0, MM1 ) /* x1 | x0 */ PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */ @@ -520,25 +526,28 @@ LLBL (G3TR_rescale): PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */ PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */ + MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */ + PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */ PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */ - MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */ - MOVQ ( MM0, REGOFF(-12, EAX) ) /* write r0, r1 */ + MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */ + ADD_L ( STRIDE, EDX ) /* next normal */ + + PREFETCH ( REGIND(EDX) ) + MOVQ ( MM0, REGOFF(-12, EAX) ) /* write r0, r1 */ PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */ - MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */ PFMUL ( MM7, MM2 ) /* | x2*m10 */ PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */ PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */ - ADD_L ( REGOFF(12, ESI), EDX ) /* next normal */ - MOVD ( MM1, REGOFF(-4, EAX) ) /* write r2 */ - MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */ + MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */ MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */ + DEC_L ( EDI ) /* decrement normal counter */ JA ( LLBL (G3TR_rescale) ) @@ -547,7 +556,6 @@ LLBL (G3TR_rescale): LLBL (G3TR_end): POP_L ( ESI ) POP_L ( EDI ) - POP_L ( ECX ) RET @@ -560,25 +568,20 @@ ALIGNTEXT16 GLOBL GLNAME(gl_3dnow_transform_normals_no_rot_raw) GLNAME(gl_3dnow_transform_normals_no_rot_raw): - #define FRAME_OFFSET 12 - SUB_L ( CONST(4), ESP ) - PUSH_L ( EDI ) - PUSH_L ( ESI ) - - MOV_L (REGOFF(24,ESP), EAX) - MOV_L (REGOFF(36,ESP), EDX) - MOV_L (REGOFF(8,EAX), EAX) - MOV_L (EAX, REGOFF(8,EDX)) - MOV_L (REGOFF(24,ESP), EDI) - MOV_L (REGOFF(4,EDX), EAX) - MOV_L (REGOFF(16,ESP), ECX) - MOV_L (REGOFF(24,ESP), ESI) - MOV_L (REGOFF(64,ECX), ECX) - MOV_L (REGOFF(4,EDI), EDX) - MOV_L (REGOFF(8,EDI), EDI) - - -ALIGNTEXT32 + #undef FRAME_OFFSET + #define FRAME_OFFSET 8 + + PUSH_L ( EDI ) + PUSH_L ( ESI ) + + MOV_L ( ARG_IN, ESI ) + MOV_L ( ARG_DEST, EAX ) + MOV_L ( ARG_MAT, ECX ) + MOV_L ( REGOFF(V3F_COUNT, ESI), EDI ) /* dest->count = in->count */ + MOV_L ( EDI, REGOFF(V3F_COUNT, EAX) ) + MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */ + MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */ + MOV_L ( REGOFF(MAT_INV, ECX), ECX ) /* mat->inv */ CMP_L ( CONST(0), EDI ) JE ( LLBL (G3TNR_end) ) @@ -595,10 +598,14 @@ ALIGNTEXT32 MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */ ALIGNTEXT32 - LLBL (G3TNR_transform): + + PREFETCHW ( REGIND(EAX) ) + PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */ - ADD_L ( REGOFF(12, ESI), EDX) /* next normal */ + ADD_L ( STRIDE, EDX) /* next normal */ + + PREFETCH ( REGIND(EDX) ) PFMUL ( MM2, MM5 ) /* | x2*m10 */ ADD_L ( CONST(12), EAX ) /* next r */ @@ -617,7 +624,6 @@ LLBL (G3TNR_transform): LLBL (G3TNR_end): POP_L ( ESI ) POP_L ( EDI ) - POP_L ( ECX ) RET @@ -631,24 +637,20 @@ ALIGNTEXT16 GLOBL GLNAME(gl_3dnow_transform_normals_raw) GLNAME(gl_3dnow_transform_normals_raw): - #define FRAME_OFFSET 12 - SUB_L ( CONST(4), ESP ) - PUSH_L ( EDI ) - PUSH_L ( ESI ) - - MOV_L (REGOFF(24,ESP), EAX) - MOV_L (REGOFF(36,ESP), EDX) - MOV_L (REGOFF(8,EAX), EAX) - MOV_L (EAX, REGOFF(8,EDX)) - MOV_L (REGOFF(24,ESP), EDI) - MOV_L (REGOFF(4,EDX), EAX) - MOV_L (REGOFF(16,ESP), ECX) - MOV_L (REGOFF(24,ESP), ESI) - MOV_L (REGOFF(64,ECX), ECX) - MOV_L (REGOFF(4,EDI), EDX) - MOV_L (REGOFF(8,EDI), EDI) - -ALIGNTEXT32 + #undef FRAME_OFFSET + #define FRAME_OFFSET 8 + + PUSH_L ( EDI ) + PUSH_L ( ESI ) + + MOV_L ( ARG_IN, ESI ) + MOV_L ( ARG_DEST, EAX ) + MOV_L ( ARG_MAT, ECX ) + MOV_L ( REGOFF(V3F_COUNT, ESI), EDI ) /* dest->count = in->count */ + MOV_L ( EDI, REGOFF(V3F_COUNT, EAX) ) + MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */ + MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */ + MOV_L ( REGOFF(MAT_INV, ECX), ECX ) /* mat->inv */ CMP_L ( CONST(0), EDI ) /* count > 0 ?? */ JE ( LLBL (G3T_end) ) @@ -659,15 +661,19 @@ ALIGNTEXT32 MOVQ ( REGOFF(16, ECX), MM4 ) /* m5 | m4 */ MOVD ( REGOFF(8, ECX), MM5 ) /* | m2 */ - PUNPCKLDQ ( REGOFF(24, ECX), MM5 ) /* m6 | m2*/ + PUNPCKLDQ ( REGOFF(24, ECX), MM5 ) /* m6 | m2 */ - MOVQ ( REGOFF(32,ECX), MM6 ) /* m9 | m8 */ - MOVD ( REGOFF(40,ECX), MM7 ) /* | m10 */ + MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8 */ + MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */ MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */ MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */ +ALIGNTEXT32 LLBL (G3T_transform): + + PREFETCHW ( REGIND(EAX) ) + MOVQ ( MM0, MM1 ) /* x1 | x0 */ PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */ @@ -687,10 +693,12 @@ LLBL (G3T_transform): MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */ PFMUL ( MM7, MM2 ) /* | x2*m10 */ - PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */ + ADD_L ( STRIDE, EDX ) /* next normal */ + PREFETCH ( REGIND(EDX) ) + + PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */ PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */ - ADD_L ( REGOFF(12, ESI), EDX ) /* next normal */ MOVD ( MM1, REGOFF(-4, EAX) ) /* write r2 */ MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */ @@ -704,7 +712,6 @@ LLBL (G3T_transform): LLBL (G3T_end): POP_L ( ESI ) POP_L ( EDI ) - POP_L ( ECX ) RET @@ -716,30 +723,23 @@ ALIGNTEXT16 GLOBL GLNAME(gl_3dnow_normalize_normals_raw) GLNAME(gl_3dnow_normalize_normals_raw): + #undef FRAME_OFFSET #define FRAME_OFFSET 12 - SUB_L ( CONST(4), ESP ) - PUSH_L ( EDI ) - PUSH_L ( ESI ) - - MOV_L (REGOFF(16,ESP), ESI) - MOV_L (REGOFF(24,ESP), EAX) - MOV_L (REGOFF(36,ESP), EDX) - MOV_L (REGOFF(8,EAX), EAX) - MOV_L (EAX, REGOFF(8,EDX)) - MOV_L (REGOFF(4,EDX), EAX) - MOV_L (REGOFF(24,ESP), ECX) - MOV_L (REGOFF(28,ESP), EDX) - MOV_L (REGOFF(24,ESP), EDI) - MOV_L (REGOFF(4,ECX), ECX) - - -ALIGNTEXT32 - - CMP_L ( CONST(0), REGOFF(8, EDI) ) /* count > 0 ?? */ - JE ( LLBL (G3N_end) ) + PUSH_L ( EDI ) + PUSH_L ( ESI ) PUSH_L ( EBP ) - MOV_L (REGOFF(8,EDI), EBP) + + MOV_L ( ARG_IN, ESI ) + MOV_L ( ARG_DEST, EAX ) + MOV_L ( REGOFF(V3F_COUNT, ESI), EBP ) /* dest->count = in->count */ + MOV_L ( EBP, REGOFF(V3F_COUNT, EAX) ) + MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */ + MOV_L ( REGOFF(V3F_START, ESI), ECX ) /* in->start */ + MOV_L ( ARG_LENGTHS, EDX ) + + CMP_L ( CONST(0), EBP ) /* count > 0 ?? */ + JE ( LLBL (G3N_end) ) FEMMS @@ -749,17 +749,23 @@ ALIGNTEXT32 CMP_L ( CONST(0), EDX ) /* lengths == 0 ? */ JE ( LLBL (G3N_norm2) ) /* calculate lengths */ +ALIGNTEXT32 LLBL (G3N_norm1): /* use precalculated lengths */ + + PREFETCH ( REGIND(EAX) ) + MOVD ( REGIND(EDX), MM3 ) /* | length (x) */ PFMUL ( MM3, MM1 ) /* | x2 (normalized) */ PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */ - PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalized) */ + ADD_L ( STRIDE, ECX ) /* next normal */ + + PREFETCH ( REGIND(ECX) ) + PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalized) */ MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */ - MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */ - ADD_L ( REGOFF(12, EDI), ECX ) /* next normal */ + MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */ ADD_L ( CONST(12), EAX ) /* next r */ ADD_L ( CONST(4), EDX ) /* next length */ @@ -771,10 +777,15 @@ LLBL (G3N_norm1): /* use precalculated lengths */ JMP ( LLBL (G3N_end1) ) -LLBL (G3N_norm2): - /* need to calculate lengths */ +ALIGNTEXT32 +LLBL (G3N_norm2): /* need to calculate lengths */ + + PREFETCHW ( REGIND(EAX) ) + MOVQ ( MM0, MM3 ) /* x1 | x0 */ - ADD_L ( REGOFF(12, EDI), ECX ) /* next normal */ + ADD_L ( STRIDE, ECX ) /* next normal */ + + PREFETCH ( REGIND(ECX) ) PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */ MOVQ ( MM1, MM4 ) /* | x2 */ @@ -808,12 +819,11 @@ LLBL (G3N_norm2): LLBL (G3N_end1): FEMMS - POP_L ( EBP ) LLBL (G3N_end): - POP_L ( ESI ) - POP_L ( EDI ) - POP_L ( ECX ) + POP_L ( EBP ) + POP_L ( ESI ) + POP_L ( EDI ) RET @@ -825,48 +835,46 @@ ALIGNTEXT16 GLOBL GLNAME(gl_3dnow_rescale_normals_raw) GLNAME(gl_3dnow_rescale_normals_raw): - #define FRAME_OFFSET 12 - SUB_L ( CONST(4), ESP ) - PUSH_L ( EDI ) - PUSH_L ( ESI ) - - MOV_L (REGOFF(20,ESP), ESI) - MOV_L (REGOFF(24,ESP), EAX) - MOV_L (REGOFF(36,ESP), EDX) - MOV_L (REGOFF(8,EAX), EAX) - MOV_L (EAX, REGOFF(8,EDX)) - MOV_L (REGOFF(24,ESP), EDI) - MOV_L (EDI, ECX) - MOV_L (REGOFF(4,EDX), EAX) - MOV_L (REGOFF(4,ECX), ECX) - MOV_L (REGOFF(8,EDI), EDX) + #undef FRAME_OFFSET + #define FRAME_OFFSET 8 + PUSH_L ( EDI ) + PUSH_L ( ESI ) -ALIGNTEXT32 + MOV_L ( ARG_IN, ESI ) + MOV_L ( ARG_DEST, EAX ) + MOV_L ( REGOFF(V3F_COUNT, ESI), EDX ) /* dest->count = in->count */ + MOV_L ( EDX, REGOFF(V3F_COUNT, EAX) ) + MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */ + MOV_L ( REGOFF(V3F_START, ESI), ECX ) /* in->start */ CMP_L ( CONST(0), EDX ) JE ( LLBL (G3R_end) ) FEMMS - MOVD ( ESI, MM0 ) /* scale */ + MOVD ( ARG_SCALE, MM0 ) /* scale */ PUNPCKLDQ ( MM0, MM0 ) MOVQ ( REGIND(ECX), MM1 ) /* x1 | x0 */ MOVD ( REGOFF(8, ECX), MM2 ) /* | x2 */ ALIGNTEXT32 - LLBL (G3R_rescale): + + PREFETCHW ( REGIND(EAX) ) + PFMUL ( MM0, MM1 ) /* x1*scale | x0*scale */ - ADD_L ( REGOFF(12, EDI), ECX ) /* next normal */ + ADD_L ( STRIDE, ECX ) /* next normal */ + + PREFETCH ( REGIND(ECX) ) PFMUL ( MM0, MM2 ) /* | x2*scale */ ADD_L ( CONST(12), EAX ) /* next r */ - DEC_L ( EDX ) /* decrement normal counter */ MOVQ ( MM1, REGOFF(-12, EAX) ) /* write r0, r1 */ - MOVD ( MM2, REGOFF(-4, EAX) ) /* write r2 */ + + DEC_L ( EDX ) /* decrement normal counter */ MOVQ ( REGIND(ECX), MM1 ) /* x1 | x0 */ MOVD ( REGOFF(8, ECX), MM2 ) /* | x2 */ @@ -875,17 +883,9 @@ LLBL (G3R_rescale): FEMMS LLBL (G3R_end): - POP_L ( ESI ) - POP_L ( EDI ) - POP_L ( ECX ) + POP_L ( ESI ) + POP_L ( EDI ) RET - - - - - - - diff --git a/xc/extras/Mesa/src/X86/katmai.c b/xc/extras/Mesa/src/X86/katmai.c index 1dfa0f7fc..d27f89b2d 100644 --- a/xc/extras/Mesa/src/X86/katmai.c +++ b/xc/extras/Mesa/src/X86/katmai.c @@ -49,7 +49,7 @@ #define XFORM_ARGS GLvector4f *to_vec, \ - const GLmatrix *mat, \ + const GLfloat m[16], \ const GLvector4f *from_vec, \ const GLubyte *mask, \ const GLubyte flag diff --git a/xc/extras/Mesa/src/X86/katmai_xform_masked1.S b/xc/extras/Mesa/src/X86/katmai_xform_masked1.S index f3e11c803..235ac4e57 100644 --- a/xc/extras/Mesa/src/X86/katmai_xform_masked1.S +++ b/xc/extras/Mesa/src/X86/katmai_xform_masked1.S @@ -32,7 +32,7 @@ * Offsets for transform_func arguments * * typedef void (*transform_func)( GLvector4f *to_vec, - * const GLmatrix *mat, + * const GLfloat m[16], * const GLvector4f *from_vec, * const GLubyte *clipmask, * const GLubyte flag ); diff --git a/xc/extras/Mesa/src/X86/katmai_xform_masked2.S b/xc/extras/Mesa/src/X86/katmai_xform_masked2.S index a4936fa6a..45193f19c 100644 --- a/xc/extras/Mesa/src/X86/katmai_xform_masked2.S +++ b/xc/extras/Mesa/src/X86/katmai_xform_masked2.S @@ -32,7 +32,7 @@ * Offsets for transform_func arguments * * typedef void (*transform_func)( GLvector4f *to_vec, - * const GLmatrix *mat, + * const GLfloat m[16], * const GLvector4f *from_vec, * const GLubyte *clipmask, * const GLubyte flag ); diff --git a/xc/extras/Mesa/src/X86/katmai_xform_masked3.S b/xc/extras/Mesa/src/X86/katmai_xform_masked3.S index 10f3c41b5..3258f521e 100644 --- a/xc/extras/Mesa/src/X86/katmai_xform_masked3.S +++ b/xc/extras/Mesa/src/X86/katmai_xform_masked3.S @@ -32,7 +32,7 @@ * Offsets for transform_func arguments * * typedef void (*transform_func)( GLvector4f *to_vec, - * const GLmatrix *mat, + * const GLfloat m[16], * const GLvector4f *from_vec, * const GLubyte *clipmask, * const GLubyte flag ); diff --git a/xc/extras/Mesa/src/X86/katmai_xform_masked4.S b/xc/extras/Mesa/src/X86/katmai_xform_masked4.S index 4a0bf5c05..82d960c02 100644 --- a/xc/extras/Mesa/src/X86/katmai_xform_masked4.S +++ b/xc/extras/Mesa/src/X86/katmai_xform_masked4.S @@ -32,7 +32,7 @@ * Offsets for transform_func arguments * * typedef void (*transform_func)( GLvector4f *to_vec, - * const GLmatrix *mat, + * const GLfloat m[16], * const GLvector4f *from_vec, * const GLubyte *clipmask, * const GLubyte flag ); diff --git a/xc/extras/Mesa/src/X86/katmai_xform_raw1.S b/xc/extras/Mesa/src/X86/katmai_xform_raw1.S index 4ca992d2c..e6ff089e2 100644 --- a/xc/extras/Mesa/src/X86/katmai_xform_raw1.S +++ b/xc/extras/Mesa/src/X86/katmai_xform_raw1.S @@ -32,7 +32,7 @@ * Offsets for transform_func arguments * * typedef void (*transform_func)( GLvector4f *to_vec, - * const GLmatrix *mat, + * const GLfloat m[16], * const GLvector4f *from_vec, * const GLubyte *clipmask, * const GLubyte flag ); diff --git a/xc/extras/Mesa/src/X86/katmai_xform_raw2.S b/xc/extras/Mesa/src/X86/katmai_xform_raw2.S index 61845034c..993880885 100644 --- a/xc/extras/Mesa/src/X86/katmai_xform_raw2.S +++ b/xc/extras/Mesa/src/X86/katmai_xform_raw2.S @@ -32,7 +32,7 @@ * Offsets for transform_func arguments * * typedef void (*transform_func)( GLvector4f *to_vec, - * const GLmatrix *mat, + * const GLfloat m[16], * const GLvector4f *from_vec, * const GLubyte *clipmask, * const GLubyte flag ); diff --git a/xc/extras/Mesa/src/X86/katmai_xform_raw3.S b/xc/extras/Mesa/src/X86/katmai_xform_raw3.S index 49918de95..35f95f511 100644 --- a/xc/extras/Mesa/src/X86/katmai_xform_raw3.S +++ b/xc/extras/Mesa/src/X86/katmai_xform_raw3.S @@ -32,7 +32,7 @@ * Offsets for transform_func arguments * * typedef void (*transform_func)( GLvector4f *to_vec, - * const GLmatrix *mat, + * const GLfloat m[16], * const GLvector4f *from_vec, * const GLubyte *clipmask, * const GLubyte flag ); diff --git a/xc/extras/Mesa/src/X86/katmai_xform_raw4.S b/xc/extras/Mesa/src/X86/katmai_xform_raw4.S index d0867da7c..5a38a2f4e 100644 --- a/xc/extras/Mesa/src/X86/katmai_xform_raw4.S +++ b/xc/extras/Mesa/src/X86/katmai_xform_raw4.S @@ -32,7 +32,7 @@ * Offsets for transform_func arguments * * typedef void (*transform_func)( GLvector4f *to_vec, - * const GLmatrix *mat, + * const GLfloat m[16], * const GLvector4f *from_vec, * const GLubyte *clipmask, * const GLubyte flag ); diff --git a/xc/extras/Mesa/src/X86/x86.c b/xc/extras/Mesa/src/X86/x86.c index 1b24811d4..606ec7051 100644 --- a/xc/extras/Mesa/src/X86/x86.c +++ b/xc/extras/Mesa/src/X86/x86.c @@ -36,23 +36,23 @@ #include "x86.h" #ifdef USE_X86_ASM -extern void _ASMAPI gl_v16_x86_cliptest_points4(GLfloat *first_vert, +extern void _ASMAPI gl_v16_x86_cliptest_points4( GLfloat *first_vert, GLfloat *last_vert, GLubyte *or_mask, GLubyte *and_mask, GLubyte *clip_mask ); -extern void _ASMAPI gl_v16_x86_general_xform(GLfloat *dest, +extern void _ASMAPI gl_v16_x86_general_xform( GLfloat *dest, const GLfloat *m, const GLfloat *src, GLuint src_stride, - GLuint count); + GLuint count ); #endif #define XFORM_ARGS GLvector4f *to_vec, \ - const GLmatrix *mat, \ + const GLfloat m[16], \ const GLvector4f *from_vec, \ const GLubyte *mask, \ const GLubyte flag @@ -117,7 +117,6 @@ void gl_init_x86_asm_transforms( void ) gl_test_all_transform_functions("x86"); #endif - gl_cliptest_points4_v16 = gl_v16_x86_cliptest_points4; gl_xform_points3_v16_general = gl_v16_x86_general_xform; #endif diff --git a/xc/extras/Mesa/src/X86/x86a.S b/xc/extras/Mesa/src/X86/x86a.S index 621bc1f1a..bae1e8f65 100644 --- a/xc/extras/Mesa/src/X86/x86a.S +++ b/xc/extras/Mesa/src/X86/x86a.S @@ -28,7 +28,7 @@ * Offsets for transform_func arguments * * typedef void (*transform_func)( GLvector4f *to_vec, - * const GLmatrix *mat, + * const GLfloat m[16], * const GLvector4f *from_vec, * const GLubyte *clipmask, * const GLubyte flag ); -- cgit v1.2.3