diff options
author | Keith Packard <keithp@keithp.com> | 2008-03-31 02:20:43 -0700 |
---|---|---|
committer | Keith Packard <keithp@keithp.com> | 2008-03-31 02:20:43 -0700 |
commit | 08500507284f13ad7084eb231b43e117e9728129 (patch) | |
tree | 2150b2ec6731be012091c68a98026452dd5311c3 /src/exa_sf_mask.g4a | |
parent | 949d73271d7100c1f028fd60f185f4929461304e (diff) |
Use m4 to clean up gen4 asm progs. Start adding projective transform support.
Use macros for register names, modularize functions into separate files.
Diffstat (limited to 'src/exa_sf_mask.g4a')
-rw-r--r-- | src/exa_sf_mask.g4a | 104 |
1 files changed, 37 insertions, 67 deletions
diff --git a/src/exa_sf_mask.g4a b/src/exa_sf_mask.g4a index c830fd86..a0d6efc4 100644 --- a/src/exa_sf_mask.g4a +++ b/src/exa_sf_mask.g4a @@ -21,82 +21,52 @@ * IN THE SOFTWARE. * * Authors: - * Keith Packard <keithp@keithp.com> - * Eric Anholt <eric@anholt.net> * Wang Zhenyu <zhenyu.z.wang@intel.com> */ +/* FIXME how to setup second coeffient for mask tex coord */ -/* - * Inputs (note all sub-register addresses are bytes, not float indices) - * - * Note that the vertices will have been reordered: - * - * V0 is topmost (leftmost among topmost) (upper left) - * V1 is next clockwise (lower right) - * V2 is remaining (lower left) - * - * V0 ...................... XX - * | . - * | . - * | . - * V2------------------------V1 - * - * G0 thread state -- just pass along - * - * G1 and G2 are fixed by SF spec - * - * G1.0 reserved - * G1.4 Provoking vertex - * G1.8 Determinant - * G1.12 X1 - X0 - * G1.16 X2 - X0 - * G1.20 Y1 - Y0 - * G1.24 Y2 - Y0 - * G1.30 reserved - * - * G2.0 Z0 - * G2.4 1/W0 - * G2.8 Z1 - * G2.12 1/W1 - * G2.16 Z2 - * G2.20 1/W2 - * G2.24 reserved - * G2.30 reserved - * - * G3 is V0 Vertex Attribute Data from URB (upper left) - * - * G3.0 u0 - * G3.4 v0 - * - * G4 is V1 Vertex Attribute Data from URB (lower right) - * - * G4.0 u1 - * G4.4 v1 - * - * G5 is V2 Vertex Attribute Data from URB (lower left) - * +/* + g3 (v0) { u0, v0, 1.0, 1.0 } ==> {u0, v0, 1.0, 1.0, mu0, mv0, 1.0, 1.0} Co[0](u0) Co[1](v0) Co[2](mu0) Co[3](mv0) + g4 (v1) { u1, v1, 1.0, 1.0 } ==> {u1, v1, 1.0, 1.0, mu1, mv1, 1.0, 1.0} + g5 (v2) { u2, v2 } ==> (u2, v2, mu2, mv2} + g6 { 1/(x1-x0), 1/(y1-y0) } + g7 { u1-u0, v1-v0, 0, 0} ==>{u1-u0, v1-v0,0, 0, mu1-mu0, mv1-mv0, 0, 0} + -> { (u1-u0)/(x1-x0), (v1-v0)/(y1-y0) } ==>{(u1-u0)/(x1-x0), (v1-v0)/(y1-y0),(mu1-mu0)/(x1-x0), (mv1-mv0)/(y1-y0) + Cx, Cy Cx[0], Cy[0], Cx[1], Cy[1] */ -/* Compute inverses of the input deltas */ -send (4) 0 g6<1>F g1.12<4,4,1>F math inv mlen 1 rlen 1 { align1 }; +/* assign Cx[0], Cx[1] to src, same to Cy, Co + Cx[2], Cx[3] to mask, same to Cy, Co */ -/* texture location at V0 */ -mov (8) m3<1>F g3<8,8,1>F { align1 }; +send (1) 0 g6<1>F g1.12<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 }; +send (1) 0 g6.4<1>F g1.20<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 }; +add (8) g7<1>F g4<8,8,1>F -g3<8,8,1>F { align1 }; +/* Cx[0] */ +mul (1) g7<1>F g7<0,1,0>F g6<0,1,0>F { align1 }; +/* Cy[0] */ +mul (1) g7.4<1>F g7.4<0,1,0>F g6.4<0,1,0>F { align1 }; +/* Cx[2] */ +mul (1) g7.8<1>F g7.8<0,1,0>F g6<0,1,0>F { align1 }; +/* Cy[2] */ +mul (1) g7.12<1>F g7.12<0,1,0>F g6.4<0,1,0>F { align1 }; -/* compute V1 - V2 (motion in X) for texture coordinates */ -add (8) g7<1>F g4<8,8,1>F -g5<8,8,1>F { align1 }; - -/* multiply by 1/dx */ -mul (8) m1<1>F g7<8,8,1>F g6.0<0,1,0>F { align1 }; - -/* Compute V2 - V0 (motion in Y) for texture coordinates */ -add (8) g7<1>F g5<8,8,1>F -g3<8,8,1>F { align1 }; - -/* multiply by 1/dy */ -mul (8) m2<1>F g7<8,8,1>F g6.8<0,1,0>F {align1 }; +/* src Cx[0], Cx[1] */ +mov (8) m1<1>F g7<0,1,0>F { align1 }; +/* mask Cx[2], Cx[3] */ +mov (1) m1.8<1>F g7.8<0,1,0>F { align1 }; +mov (1) m1.12<1>F g7.8<0,1,0>F { align1 }; +/* src Cy[0], Cy[1] */ +mov (8) m2<1>F g7.4<0,1,0>F { align1 }; +/* mask Cy[2], Cy[3] */ +mov (1) m2.8<1>F g7.12<0,1,0>F { align1 }; +mov (1) m2.12<1>F g7.12<0,1,0>F { align1 }; +/* src Co[0], Co[1] */ +mov (8) m3<1>F g3<8,8,1>F { align1 }; +/* mask Co[2], Co[3] */ +mov (1) m3.8<1>F g3.8<0,1,0>F { align1 }; +mov (1) m3.12<1>F g3.12<0,1,0>F { align1 }; -/* and we're done */ send (8) 0 null g0<8,8,1>F urb 0 transpose used complete mlen 4 rlen 0 { align1 EOT }; nop; nop; |