diff options
author | Alex Deucher <alexdeucher@gmail.com> | 2009-04-16 20:33:28 -0400 |
---|---|---|
committer | Alex Deucher <alexdeucher@gmail.com> | 2009-04-17 10:25:00 -0400 |
commit | 00266177bed2dc8693df497ca3ec19f2dc4adc05 (patch) | |
tree | 65374e696fa6699dfcf68eb4204146088affe46c /src/radeon_textured_videofuncs.c | |
parent | 85323a7f84381fef7fad20c7f7ec601637af9aa7 (diff) |
R3xx/R5xx: only apply Xv attributes if bicubic is disabled
Provides consistent output
Diffstat (limited to 'src/radeon_textured_videofuncs.c')
-rw-r--r-- | src/radeon_textured_videofuncs.c | 3561 |
1 files changed, 1778 insertions, 1783 deletions
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c index 0ac247ad..caf8dce0 100644 --- a/src/radeon_textured_videofuncs.c +++ b/src/radeon_textured_videofuncs.c @@ -1051,9 +1051,9 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) colorpitch |= R300_COLORTILE; - if (!pPriv->bicubic_enabled && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) { + if (((pPriv->bicubic_state == BICUBIC_OFF)) && + (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) isplanar = TRUE; - } if (isplanar) { txformat1 = R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_HALF_REGION_0; @@ -1064,7 +1064,7 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) else txformat1 = R300_TX_FORMAT_VYUY422; - if (pPriv->bicubic_enabled) + if (pPriv->bicubic_state != BICUBIC_OFF) txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP; /* pitch is in pixels */ @@ -1250,661 +1250,422 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) FINISH_ACCEL(); /* setup pixel shader */ - if (pPriv->bicubic_enabled) { - BEGIN_ACCEL(79); - - /* 4 components: 2 for tex0 and 2 for tex1 */ - OUT_ACCEL_REG(R300_RS_COUNT, ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) | - R300_RS_COUNT_HIRES_EN)); - - /* R300_INST_COUNT_RS - highest RS instruction used */ - OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6)); - - /* Pixel stack frame size. */ - OUT_ACCEL_REG(R300_US_PIXSIZE, 5); - - /* Indirection levels */ - OUT_ACCEL_REG(R300_US_CONFIG, ((2 << R300_NLEVEL_SHIFT) | - R300_FIRST_TEX)); - - /* Set nodes. */ - OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | - R300_ALU_CODE_SIZE(14) | - R300_TEX_CODE_OFFSET(0) | - R300_TEX_CODE_SIZE(6))); - - /* Nodes are allocated highest first, but executed lowest first */ - OUT_ACCEL_REG(R300_US_CODE_ADDR_0, 0); - OUT_ACCEL_REG(R300_US_CODE_ADDR_1, (R300_ALU_START(0) | - R300_ALU_SIZE(0) | - R300_TEX_START(0) | - R300_TEX_SIZE(0))); - OUT_ACCEL_REG(R300_US_CODE_ADDR_2, (R300_ALU_START(1) | - R300_ALU_SIZE(9) | - R300_TEX_START(1) | - R300_TEX_SIZE(0))); - OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(11) | - R300_ALU_SIZE(2) | - R300_TEX_START(2) | - R300_TEX_SIZE(3) | - R300_RGBA_OUT)); - - /* ** BICUBIC FP ** */ - - /* texcoord0 => temp0 - * texcoord1 => temp1 */ - - // first node - /* TEX temp2, temp1.rrr0, tex1, 1D */ - OUT_ACCEL_REG(R300_US_TEX_INST(0), (R300_TEX_INST(R300_TEX_INST_LD) | - R300_TEX_ID(1) | - R300_TEX_SRC_ADDR(1) | - R300_TEX_DST_ADDR(2))); - - /* MOV temp1.r, temp1.ggg0 */ - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(1) | - R300_ALU_RGB_ADDRD(1) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDRD(1) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - - - // second node - /* TEX temp1, temp1, tex1, 1D */ - OUT_ACCEL_REG(R300_US_TEX_INST(1), (R300_TEX_INST(R300_TEX_INST_LD) | - R300_TEX_ID(1) | - R300_TEX_SRC_ADDR(1) | - R300_TEX_DST_ADDR(1))); - - /* MUL temp3.rg, temp2.ggg0, const0.rgb0 */ - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(2) | - R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) | - R300_ALU_RGB_ADDRD(3) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(3) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - - - /* MUL temp2.rg, temp2.rrr0, const0.rgb */ - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(2) | - R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) | - R300_ALU_RGB_ADDRD(2) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(2) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - - /* MAD temp4.rg, temp1.ggg0, const1.rgb, temp3.rgb0 */ - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(1) | - R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | - R300_ALU_RGB_ADDR2(3) | - R300_ALU_RGB_ADDRD(4) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(4) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - - /* MAD temp5.rg, temp1.ggg0, const1.rgb, temp2.rgb0 */ - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(1) | - R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | - R300_ALU_RGB_ADDR2(2) | - R300_ALU_RGB_ADDRD(5) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(5) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - - /* MAD temp3.rg, temp1.rrr0, const1.rgb, temp3.rgb0 */ - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(1) | - R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | - R300_ALU_RGB_ADDR2(3) | - R300_ALU_RGB_ADDRD(3) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(3) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - - /* MAD temp1.rg, temp1.rrr0, const1.rgb, temp2.rgb0 */ - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(1) | - R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | - R300_ALU_RGB_ADDR2(2) | - R300_ALU_RGB_ADDRD(1) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(1) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - - /* ADD temp1.rg, temp0.rgb0, temp1.rgb0 */ - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_ADDR2(1) | - R300_ALU_RGB_ADDRD(1) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(1) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - - /* ADD temp2.rg, temp0.rgb0, temp3.rgb0 */ - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_ADDR2(3) | - R300_ALU_RGB_ADDRD(2) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(2) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - - /* ADD temp3.rg, temp0.rgb0, temp5.rgb0 */ - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_ADDR2(5) | - R300_ALU_RGB_ADDRD(3) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(3) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - - /* ADD temp0.rg, temp0.rgb0, temp4.rgb0 */ - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(10), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(10), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_ADDR2(4) | - R300_ALU_RGB_ADDRD(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(10), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(10), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - - - // third node - /* TEX temp4, temp1.rg--, tex0, 1D */ - OUT_ACCEL_REG(R300_US_TEX_INST(2), (R300_TEX_INST(R300_TEX_INST_LD) | - R300_TEX_ID(0) | - R300_TEX_SRC_ADDR(1) | - R300_TEX_DST_ADDR(4))); - - /* TEX temp3, temp3.rg--, tex0, 1D */ - OUT_ACCEL_REG(R300_US_TEX_INST(3), (R300_TEX_INST(R300_TEX_INST_LD) | - R300_TEX_ID(0) | - R300_TEX_SRC_ADDR(3) | - R300_TEX_DST_ADDR(3))); - - /* TEX temp5, temp2.rg--, tex0, 1D */ - OUT_ACCEL_REG(R300_US_TEX_INST(4), (R300_TEX_INST(R300_TEX_INST_LD) | - R300_TEX_ID(0) | - R300_TEX_SRC_ADDR(2) | - R300_TEX_DST_ADDR(5))); - - /* TEX temp0, temp0.rg--, tex0, 1D */ - OUT_ACCEL_REG(R300_US_TEX_INST(5), (R300_TEX_INST(R300_TEX_INST_LD) | - R300_TEX_ID(0) | - R300_TEX_SRC_ADDR(0) | - R300_TEX_DST_ADDR(0))); - - /* LRP temp3, temp1.bbbb, temp4, temp3 -> - * - PRESUB temps, temp4 - temp3 - * - MAD temp3, temp1.bbbb, temps, temp3 */ - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(11), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | - R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0))); - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(11), (R300_ALU_RGB_ADDR0(3) | - R300_ALU_RGB_ADDR1(4) | - R300_ALU_RGB_ADDR2(1) | - R300_ALU_RGB_ADDRD(3) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(11), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(11), (R300_ALU_ALPHA_ADDR0(3) | - R300_ALU_ALPHA_ADDR1(4) | - R300_ALU_ALPHA_ADDR2(1) | - R300_ALU_ALPHA_ADDRD(3) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A))); - - /* LRP temp0, temp1.bbbb, temp5, temp0 -> - * - PRESUB temps, temp5 - temp0 - * - MAD temp0, temp1.bbbb, temps, temp0 */ - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(12), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | - R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0) | - R300_ALU_RGB_INSERT_NOP)); - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(12), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_ADDR1(5) | - R300_ALU_RGB_ADDR2(1) | - R300_ALU_RGB_ADDRD(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(12), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(12), (R300_ALU_ALPHA_ADDR0(0) | - R300_ALU_ALPHA_ADDR1(5) | - R300_ALU_ALPHA_ADDR2(1) | - R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A))); - - /* LRP output, temp2.bbbb, temp3, temp0 -> - * - PRESUB temps, temp3 - temp0 - * - MAD output, temp2.bbbb, temps, temp0 */ - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(13), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | - R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0))); - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(13), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_ADDR1(3) | - R300_ALU_RGB_ADDR2(2) | - R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(13), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(13), (R300_ALU_ALPHA_ADDR0(0) | - R300_ALU_ALPHA_ADDR1(3) | - R300_ALU_ALPHA_ADDR2(2) | - R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A))); - - /* Shader constants. */ - OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(1.0/(float)pPriv->w)); - OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), 0); - OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), 0); - OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), 0); - - OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), 0); - OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(1.0/(float)pPriv->h)); - OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), 0); - OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), 0); - - FINISH_ACCEL(); - } else if (isplanar) { - /* - * y' = y - .0625 - * u' = u - .5 - * v' = v - .5; - * - * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' - * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' - * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' - * - * DP3 might look like the straightforward solution - * but we'd need to move the texture yuv values in - * the same reg for this to work. Therefore use MADs. - * Brightness just adds to the off constant. - * Contrast is multiplication of luminance. - * Saturation and hue change the u and v coeffs. - * Default values (before adjustments - depend on colorspace): - * yco = 1.1643 - * uco = 0, -0.39173, 2.017 - * vco = 1.5958, -0.8129, 0 - * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r], - * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g], - * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b], - * - * temp = MAD(yco, yuv.yyyy, off) - * temp = MAD(uco, yuv.uuuu, temp) - * result = MAD(vco, yuv.vvvv, temp) - */ - /* TODO: don't recalc consts always */ - const float Loff = -0.0627; - const float Coff = -0.502; - float uvcosf, uvsinf; - float yco; - float uco[3], vco[3], off[3]; - float bright, cont, gamma; - int ref = pPriv->transform_index; - Bool needgamma = FALSE; + if (pPriv->bicubic_state != BICUBIC_OFF) { + if (pPriv->bicubic_enabled) { + BEGIN_ACCEL(79); + + /* 4 components: 2 for tex0 and 2 for tex1 */ + OUT_ACCEL_REG(R300_RS_COUNT, ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) | + R300_RS_COUNT_HIRES_EN)); + + /* R300_INST_COUNT_RS - highest RS instruction used */ + OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6)); + + /* Pixel stack frame size. */ + OUT_ACCEL_REG(R300_US_PIXSIZE, 5); + + /* Indirection levels */ + OUT_ACCEL_REG(R300_US_CONFIG, ((2 << R300_NLEVEL_SHIFT) | + R300_FIRST_TEX)); + + /* Set nodes. */ + OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | + R300_ALU_CODE_SIZE(14) | + R300_TEX_CODE_OFFSET(0) | + R300_TEX_CODE_SIZE(6))); + + /* Nodes are allocated highest first, but executed lowest first */ + OUT_ACCEL_REG(R300_US_CODE_ADDR_0, 0); + OUT_ACCEL_REG(R300_US_CODE_ADDR_1, (R300_ALU_START(0) | + R300_ALU_SIZE(0) | + R300_TEX_START(0) | + R300_TEX_SIZE(0))); + OUT_ACCEL_REG(R300_US_CODE_ADDR_2, (R300_ALU_START(1) | + R300_ALU_SIZE(9) | + R300_TEX_START(1) | + R300_TEX_SIZE(0))); + OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(11) | + R300_ALU_SIZE(2) | + R300_TEX_START(2) | + R300_TEX_SIZE(3) | + R300_RGBA_OUT)); + + /* ** BICUBIC FP ** */ + + /* texcoord0 => temp0 + * texcoord1 => temp1 */ + + // first node + /* TEX temp2, temp1.rrr0, tex1, 1D */ + OUT_ACCEL_REG(R300_US_TEX_INST(0), (R300_TEX_INST(R300_TEX_INST_LD) | + R300_TEX_ID(1) | + R300_TEX_SRC_ADDR(1) | + R300_TEX_DST_ADDR(2))); + + /* MOV temp1.r, temp1.ggg0 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(1) | + R300_ALU_RGB_ADDRD(1) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDRD(1) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - cont = RTFContrast(pPriv->contrast); - bright = RTFBrightness(pPriv->brightness); - gamma = (float)pPriv->gamma / 1000.0; - uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue)); - uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue)); - /* overlay video also does pre-gamma contrast/sat adjust, should we? */ - yco = trans[ref].RefLuma * cont; - uco[0] = -trans[ref].RefRCr * uvsinf; - uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; - uco[2] = trans[ref].RefBCb * uvcosf; - vco[0] = trans[ref].RefRCr * uvcosf; - vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; - vco[2] = trans[ref].RefBCb * uvsinf; - off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright; - off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright; - off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright; + // second node + /* TEX temp1, temp1, tex1, 1D */ + OUT_ACCEL_REG(R300_US_TEX_INST(1), (R300_TEX_INST(R300_TEX_INST_LD) | + R300_TEX_ID(1) | + R300_TEX_SRC_ADDR(1) | + R300_TEX_DST_ADDR(1))); + + /* MUL temp3.rg, temp2.ggg0, const0.rgb0 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(2) | + R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) | + R300_ALU_RGB_ADDRD(3) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(3) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - if (gamma != 1.0) { - needgamma = TRUE; - /* note: gamma correction is out = in ^ gamma; - gpu can only do LG2/EX2 therefore we transform into - in ^ gamma = 2 ^ (log2(in) * gamma). - Lots of scalar ops, unfortunately (better solution?) - - without gamma that's 3 inst, with gamma it's 10... - could use different gamma factors per channel, - if that's of any use. */ - } - BEGIN_ACCEL(needgamma ? 28 + 33 : 33); - /* 2 components: same 2 for tex0/1/2 */ - OUT_ACCEL_REG(R300_RS_COUNT, - ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | - R300_RS_COUNT_HIRES_EN)); - /* R300_INST_COUNT_RS - highest RS instruction used */ - OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6)); - - OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */ - - /* Indirection levels */ - OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | - R300_FIRST_TEX)); - - OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | - R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) | - R300_TEX_CODE_OFFSET(0) | - R300_TEX_CODE_SIZE(3))); - - OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | - R300_ALU_SIZE(needgamma ? 7 + 2 : 2) | - R300_TEX_START(0) | - R300_TEX_SIZE(2) | - R300_RGBA_OUT)); - - /* tex inst */ - OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | - R300_TEX_DST_ADDR(2) | - R300_TEX_ID(0) | - R300_TEX_INST(R300_TEX_INST_LD))); - OUT_ACCEL_REG(R300_US_TEX_INST_1, (R300_TEX_SRC_ADDR(0) | - R300_TEX_DST_ADDR(1) | - R300_TEX_ID(1) | - R300_TEX_INST(R300_TEX_INST_LD))); - OUT_ACCEL_REG(R300_US_TEX_INST_2, (R300_TEX_SRC_ADDR(0) | - R300_TEX_DST_ADDR(0) | - R300_TEX_ID(2) | - R300_TEX_INST(R300_TEX_INST_LD))); - - /* ALU inst */ - /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) | - R300_ALU_RGB_ADDR1(2) | - R300_ALU_RGB_ADDR2(0) | - R300_ALU_RGB_ADDRD(2) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) | - R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | - R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | - R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); - /* alpha nop, but need to set up alpha source for rgb usage */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) | - R300_ALU_ALPHA_ADDR1(2) | - R300_ALU_ALPHA_ADDR2(0) | - R300_ALU_ALPHA_ADDRD(2) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - - /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) | - R300_ALU_RGB_ADDR1(1) | - R300_ALU_RGB_ADDR2(2) | - R300_ALU_RGB_ADDRD(2) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | - R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | - R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | - R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); - /* alpha nop */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(2) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - - /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) | - R300_ALU_RGB_ADDR1(0) | - R300_ALU_RGB_ADDR2(2) | - R300_ALU_RGB_ADDRD(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | - (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | - R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | - R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | - R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | - R300_ALU_RGB_CLAMP)); - /* write alpha 1 */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | - R300_ALU_ALPHA_TARGET_A)); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0))); - - if (needgamma) { - /* rgb temp0.r = op_sop, set up src0 reg */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), - R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); - /* alpha lg2 temp0, temp0.r */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | + /* MUL temp2.rg, temp2.rrr0, const0.rgb */ + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(2) | + R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) | + R300_ALU_RGB_ADDRD(2) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - - /* rgb temp0.g = op_sop, set up src0 reg */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), - R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); - /* alpha lg2 temp0, temp0.g */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) | + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(2) | R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | + + /* MAD temp4.rg, temp1.ggg0, const1.rgb, temp3.rgb0 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(1) | + R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | + R300_ALU_RGB_ADDR2(3) | + R300_ALU_RGB_ADDRD(4) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - - /* rgb temp0.b = op_sop, set up src0 reg */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), - R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); - /* alpha lg2 temp0, temp0.b */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) | + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(4) | R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | + + /* MAD temp5.rg, temp1.ggg0, const1.rgb, temp2.rgb0 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(1) | + R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | + R300_ALU_RGB_ADDR2(2) | + R300_ALU_RGB_ADDRD(5) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(5) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - /* MUL const1, temp1, temp0 */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_ADDR1(0) | - R300_ALU_RGB_ADDR2(0) | - R300_ALU_RGB_ADDRD(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | - R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) | - R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | - R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); - /* alpha nop, but set up const1 */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) | + /* MAD temp3.rg, temp1.rrr0, const1.rgb, temp3.rgb0 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(1) | + R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | + R300_ALU_RGB_ADDR2(3) | + R300_ALU_RGB_ADDRD(3) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(3) | R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + + /* MAD temp1.rg, temp1.rrr0, const1.rgb, temp2.rgb0 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(1) | + R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | + R300_ALU_RGB_ADDR2(2) | + R300_ALU_RGB_ADDRD(1) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(1) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - /* rgb out0.r = op_sop, set up src0 reg */ + /* ADD temp1.rg, temp0.rgb0, temp1.rgb0 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | - R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), - R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); - /* alpha ex2 temp0, temp0.r */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | + R300_ALU_RGB_ADDR2(1) | + R300_ALU_RGB_ADDRD(1) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(1) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - /* rgb out0.g = op_sop, set up src0 reg */ + /* ADD temp2.rg, temp0.rgb0, temp3.rgb0 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) | - R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), - R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); - /* alpha ex2 temp0, temp0.g */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | + R300_ALU_RGB_ADDR2(3) | + R300_ALU_RGB_ADDRD(2) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(2) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - /* rgb out0.b = op_sop, set up src0 reg */ + /* ADD temp3.rg, temp0.rgb0, temp5.rgb0 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) | - R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), - R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); - /* alpha ex2 temp0, temp0.b */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | + R300_ALU_RGB_ADDR2(5) | + R300_ALU_RGB_ADDRD(3) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - } - - /* Shader constants. */ - /* constant 0: off, yco */ - OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(off[0])); - OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), F_TO_24(off[1])); - OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), F_TO_24(off[2])); - OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), F_TO_24(yco)); - /* constant 1: uco */ - OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), F_TO_24(uco[0])); - OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(uco[1])); - OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), F_TO_24(uco[2])); - OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), F_TO_24(gamma)); - /* constant 2: vco */ - OUT_ACCEL_REG(R300_US_ALU_CONST_R(2), F_TO_24(vco[0])); - OUT_ACCEL_REG(R300_US_ALU_CONST_G(2), F_TO_24(vco[1])); - OUT_ACCEL_REG(R300_US_ALU_CONST_B(2), F_TO_24(vco[2])); - OUT_ACCEL_REG(R300_US_ALU_CONST_A(2), F_TO_24(0.0)); - - FINISH_ACCEL(); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(3) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + /* ADD temp0.rg, temp0.rgb0, temp4.rgb0 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(10), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(10), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_ADDR2(4) | + R300_ALU_RGB_ADDRD(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(10), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(10), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + + + // third node + /* TEX temp4, temp1.rg--, tex0, 1D */ + OUT_ACCEL_REG(R300_US_TEX_INST(2), (R300_TEX_INST(R300_TEX_INST_LD) | + R300_TEX_ID(0) | + R300_TEX_SRC_ADDR(1) | + R300_TEX_DST_ADDR(4))); + + /* TEX temp3, temp3.rg--, tex0, 1D */ + OUT_ACCEL_REG(R300_US_TEX_INST(3), (R300_TEX_INST(R300_TEX_INST_LD) | + R300_TEX_ID(0) | + R300_TEX_SRC_ADDR(3) | + R300_TEX_DST_ADDR(3))); + + /* TEX temp5, temp2.rg--, tex0, 1D */ + OUT_ACCEL_REG(R300_US_TEX_INST(4), (R300_TEX_INST(R300_TEX_INST_LD) | + R300_TEX_ID(0) | + R300_TEX_SRC_ADDR(2) | + R300_TEX_DST_ADDR(5))); + + /* TEX temp0, temp0.rg--, tex0, 1D */ + OUT_ACCEL_REG(R300_US_TEX_INST(5), (R300_TEX_INST(R300_TEX_INST_LD) | + R300_TEX_ID(0) | + R300_TEX_SRC_ADDR(0) | + R300_TEX_DST_ADDR(0))); + + /* LRP temp3, temp1.bbbb, temp4, temp3 -> + * - PRESUB temps, temp4 - temp3 + * - MAD temp3, temp1.bbbb, temps, temp3 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(11), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0))); + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(11), (R300_ALU_RGB_ADDR0(3) | + R300_ALU_RGB_ADDR1(4) | + R300_ALU_RGB_ADDR2(1) | + R300_ALU_RGB_ADDRD(3) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(11), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(11), (R300_ALU_ALPHA_ADDR0(3) | + R300_ALU_ALPHA_ADDR1(4) | + R300_ALU_ALPHA_ADDR2(1) | + R300_ALU_ALPHA_ADDRD(3) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A))); + + /* LRP temp0, temp1.bbbb, temp5, temp0 -> + * - PRESUB temps, temp5 - temp0 + * - MAD temp0, temp1.bbbb, temps, temp0 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(12), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0) | + R300_ALU_RGB_INSERT_NOP)); + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(12), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_ADDR1(5) | + R300_ALU_RGB_ADDR2(1) | + R300_ALU_RGB_ADDRD(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(12), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(12), (R300_ALU_ALPHA_ADDR0(0) | + R300_ALU_ALPHA_ADDR1(5) | + R300_ALU_ALPHA_ADDR2(1) | + R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A))); + + /* LRP output, temp2.bbbb, temp3, temp0 -> + * - PRESUB temps, temp3 - temp0 + * - MAD output, temp2.bbbb, temps, temp0 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(13), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0))); + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(13), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_ADDR1(3) | + R300_ALU_RGB_ADDR2(2) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(13), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(13), (R300_ALU_ALPHA_ADDR0(0) | + R300_ALU_ALPHA_ADDR1(3) | + R300_ALU_ALPHA_ADDR2(2) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A))); + + /* Shader constants. */ + OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(1.0/(float)pPriv->w)); + OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), 0); + OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), 0); + OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), 0); + + OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), 0); + OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(1.0/(float)pPriv->h)); + OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), 0); + OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), 0); + + FINISH_ACCEL(); + } else { + BEGIN_ACCEL(11); + /* 2 components: 2 for tex0 */ + OUT_ACCEL_REG(R300_RS_COUNT, + ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | + R300_RS_COUNT_HIRES_EN)); + /* R300_INST_COUNT_RS - highest RS instruction used */ + OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6)); + + OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */ + + /* Indirection levels */ + OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | + R300_FIRST_TEX)); + + OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | + R300_ALU_CODE_SIZE(1) | + R300_TEX_CODE_OFFSET(0) | + R300_TEX_CODE_SIZE(1))); + + OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | + R300_ALU_SIZE(0) | + R300_TEX_START(0) | + R300_TEX_SIZE(0) | + R300_RGBA_OUT)); + + /* tex inst */ + OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | + R300_TEX_DST_ADDR(0) | + R300_TEX_ID(0) | + R300_TEX_INST(R300_TEX_INST_LD))); + + /* ALU inst */ + /* RGB */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR_0, (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(0) | + R300_ALU_RGB_ADDRD(0) | + R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R | + R300_ALU_RGB_MASK_G | + R300_ALU_RGB_MASK_B)) | + R300_ALU_RGB_TARGET_A)); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST_0, (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | + R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | + R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | + R300_ALU_RGB_CLAMP)); + /* Alpha */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR_0, (R300_ALU_ALPHA_ADDR0(0) | + R300_ALU_ALPHA_ADDR1(0) | + R300_ALU_ALPHA_ADDR2(0) | + R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_TARGET_A | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST_0, (R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) | + R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) | + R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) | + R300_ALU_ALPHA_CLAMP)); + FINISH_ACCEL(); + } } else { /* * y' = y - .0625 @@ -1972,220 +1733,446 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) if that's of any use. */ } - BEGIN_ACCEL(needgamma ? 28 + 33 : 33); - /* 2 components */ - OUT_ACCEL_REG(R300_RS_COUNT, - ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | - R300_RS_COUNT_HIRES_EN)); - /* R300_INST_COUNT_RS - highest RS instruction used */ - OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6)); - - OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */ - - /* Indirection levels */ - OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | - R300_FIRST_TEX)); - - OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | - R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) | - R300_TEX_CODE_OFFSET(0) | - R300_TEX_CODE_SIZE(1))); - - OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | - R300_ALU_SIZE(needgamma ? 7 + 2 : 2) | - R300_TEX_START(0) | - R300_TEX_SIZE(0) | - R300_RGBA_OUT)); - - /* tex inst */ - OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | - R300_TEX_DST_ADDR(0) | - R300_TEX_ID(0) | - R300_TEX_INST(R300_TEX_INST_LD))); - - /* ALU inst */ - /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) | - R300_ALU_RGB_ADDR1(0) | - R300_ALU_RGB_ADDR2(0) | - R300_ALU_RGB_ADDRD(1) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) | - R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_GGG) | - R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | - R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); - /* alpha nop, but need to set up alpha source for rgb usage */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) | - R300_ALU_ALPHA_ADDR1(0) | - R300_ALU_ALPHA_ADDR2(0) | - R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - - /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) | - R300_ALU_RGB_ADDR1(0) | - R300_ALU_RGB_ADDR2(1) | - R300_ALU_RGB_ADDRD(1) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | - R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_BBB) | - R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | - R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); - /* alpha nop */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - - /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) | - R300_ALU_RGB_ADDR1(0) | - R300_ALU_RGB_ADDR2(1) | - R300_ALU_RGB_ADDRD(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | - (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | - R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RRR) | - R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | - R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | - R300_ALU_RGB_CLAMP)); - /* write alpha 1 */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | - R300_ALU_ALPHA_TARGET_A)); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0))); - - if (needgamma) { - /* rgb temp0.r = op_sop, set up src0 reg */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), - R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); - /* alpha lg2 temp0, temp0.r */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) | + if (isplanar) { + BEGIN_ACCEL(needgamma ? 28 + 33 : 33); + /* 2 components: same 2 for tex0/1/2 */ + OUT_ACCEL_REG(R300_RS_COUNT, + ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | + R300_RS_COUNT_HIRES_EN)); + /* R300_INST_COUNT_RS - highest RS instruction used */ + OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6)); + + OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */ + + /* Indirection levels */ + OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | + R300_FIRST_TEX)); + + OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | + R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) | + R300_TEX_CODE_OFFSET(0) | + R300_TEX_CODE_SIZE(3))); + + OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | + R300_ALU_SIZE(needgamma ? 7 + 2 : 2) | + R300_TEX_START(0) | + R300_TEX_SIZE(2) | + R300_RGBA_OUT)); + + /* tex inst */ + OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | + R300_TEX_DST_ADDR(2) | + R300_TEX_ID(0) | + R300_TEX_INST(R300_TEX_INST_LD))); + OUT_ACCEL_REG(R300_US_TEX_INST_1, (R300_TEX_SRC_ADDR(0) | + R300_TEX_DST_ADDR(1) | + R300_TEX_ID(1) | + R300_TEX_INST(R300_TEX_INST_LD))); + OUT_ACCEL_REG(R300_US_TEX_INST_2, (R300_TEX_SRC_ADDR(0) | + R300_TEX_DST_ADDR(0) | + R300_TEX_ID(2) | + R300_TEX_INST(R300_TEX_INST_LD))); + + /* ALU inst */ + /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) | + R300_ALU_RGB_ADDR1(2) | + R300_ALU_RGB_ADDR2(0) | + R300_ALU_RGB_ADDRD(2) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) | + R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | + R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); + /* alpha nop, but need to set up alpha source for rgb usage */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) | + R300_ALU_ALPHA_ADDR1(2) | + R300_ALU_ALPHA_ADDR2(0) | + R300_ALU_ALPHA_ADDRD(2) | R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - /* rgb temp0.g = op_sop, set up src0 reg */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), - R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); - /* alpha lg2 temp0, temp0.g */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) | + /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) | + R300_ALU_RGB_ADDR1(1) | + R300_ALU_RGB_ADDR2(2) | + R300_ALU_RGB_ADDRD(2) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | + R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | + R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); + /* alpha nop */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(2) | R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - /* rgb temp0.b = op_sop, set up src0 reg */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), - R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); - /* alpha lg2 temp0, temp0.b */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | + /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(2) | + R300_ALU_RGB_ADDRD(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | + (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | + R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | + R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | + R300_ALU_RGB_CLAMP)); + /* write alpha 1 */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_TARGET_A)); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - - /* MUL const1, temp1, temp0 */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0))); + + if (needgamma) { + /* rgb temp0.r = op_sop, set up src0 reg */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), + R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + /* alpha lg2 temp0, temp0.r */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* rgb temp0.g = op_sop, set up src0 reg */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), + R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + /* alpha lg2 temp0, temp0.g */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* rgb temp0.b = op_sop, set up src0 reg */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), + R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + /* alpha lg2 temp0, temp0.b */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* MUL const1, temp1, temp0 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(0) | + R300_ALU_RGB_ADDRD(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) | + R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | + R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); + /* alpha nop, but set up const1 */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* rgb out0.r = op_sop, set up src0 reg */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), + R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + /* alpha ex2 temp0, temp0.r */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* rgb out0.g = op_sop, set up src0 reg */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), + R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + /* alpha ex2 temp0, temp0.g */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* rgb out0.b = op_sop, set up src0 reg */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), + R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + /* alpha ex2 temp0, temp0.b */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + } + } else { + BEGIN_ACCEL(needgamma ? 28 + 33 : 33); + /* 2 components */ + OUT_ACCEL_REG(R300_RS_COUNT, + ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | + R300_RS_COUNT_HIRES_EN)); + /* R300_INST_COUNT_RS - highest RS instruction used */ + OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6)); + + OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */ + + /* Indirection levels */ + OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | + R300_FIRST_TEX)); + + OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | + R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) | + R300_TEX_CODE_OFFSET(0) | + R300_TEX_CODE_SIZE(1))); + + OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | + R300_ALU_SIZE(needgamma ? 7 + 2 : 2) | + R300_TEX_START(0) | + R300_TEX_SIZE(0) | + R300_RGBA_OUT)); + + /* tex inst */ + OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | + R300_TEX_DST_ADDR(0) | + R300_TEX_ID(0) | + R300_TEX_INST(R300_TEX_INST_LD))); + + /* ALU inst */ + /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) | R300_ALU_RGB_ADDR1(0) | R300_ALU_RGB_ADDR2(0) | - R300_ALU_RGB_ADDRD(0) | + R300_ALU_RGB_ADDRD(1) | R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_GGG) | R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); - /* alpha nop, but set up const1 */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) | + /* alpha nop, but need to set up alpha source for rgb usage */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) | + R300_ALU_ALPHA_ADDR1(0) | + R300_ALU_ALPHA_ADDR2(0) | + R300_ALU_ALPHA_ADDRD(0) | R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - /* rgb out0.r = op_sop, set up src0 reg */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | - R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), - R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); - /* alpha ex2 temp0, temp0.r */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - - /* rgb out0.g = op_sop, set up src0 reg */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) | - R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), - R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); - /* alpha ex2 temp0, temp0.g */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) | + /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(1) | + R300_ALU_RGB_ADDRD(1) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_BBB) | + R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | + R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); + /* alpha nop */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(0) | R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - /* rgb out0.b = op_sop, set up src0 reg */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) | - R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), - R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); - /* alpha ex2 temp0, temp0.b */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | + /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(1) | + R300_ALU_RGB_ADDRD(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | + (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RRR) | + R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | + R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | + R300_ALU_RGB_CLAMP)); + /* write alpha 1 */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_TARGET_A)); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0))); + + if (needgamma) { + /* rgb temp0.r = op_sop, set up src0 reg */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), + R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + /* alpha lg2 temp0, temp0.r */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* rgb temp0.g = op_sop, set up src0 reg */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), + R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + /* alpha lg2 temp0, temp0.g */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* rgb temp0.b = op_sop, set up src0 reg */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), + R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + /* alpha lg2 temp0, temp0.b */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* MUL const1, temp1, temp0 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(0) | + R300_ALU_RGB_ADDRD(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) | + R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | + R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); + /* alpha nop, but set up const1 */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* rgb out0.r = op_sop, set up src0 reg */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), + R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + /* alpha ex2 temp0, temp0.r */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* rgb out0.g = op_sop, set up src0 reg */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), + R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + /* alpha ex2 temp0, temp0.g */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* rgb out0.b = op_sop, set up src0 reg */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), + R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + /* alpha ex2 temp0, temp0.b */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + } } /* Shader constants. */ @@ -2491,9 +2478,9 @@ FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) if (RADEONTilingEnabled(pScrn, pPixmap)) colorpitch |= R300_COLORTILE; - if (!pPriv->bicubic_enabled && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) { + if (((pPriv->bicubic_state == BICUBIC_OFF)) && + (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) isplanar = TRUE; - } if (isplanar) { txformat1 = R300_TX_FORMAT_X8; @@ -2504,7 +2491,7 @@ FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) else txformat1 = R300_TX_FORMAT_VYUY422; - if (pPriv->bicubic_enabled) + if (pPriv->bicubic_state != BICUBIC_OFF) txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP; /* pitch is in pixels */ @@ -2694,805 +2681,574 @@ FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) FINISH_ACCEL(); /* setup pixel shader */ - if (pPriv->bicubic_enabled) { - BEGIN_ACCEL(7); - - /* 4 components: 2 for tex0 and 2 for tex1 */ - OUT_ACCEL_REG(R300_RS_COUNT, - ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) | - R300_RS_COUNT_HIRES_EN)); - - /* R300_INST_COUNT_RS - highest RS instruction used */ - OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6)); - - /* Pixel stack frame size. */ - OUT_ACCEL_REG(R300_US_PIXSIZE, 5); - - /* FP length. */ - OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | - R500_US_CODE_END_ADDR(13))); - OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | - R500_US_CODE_RANGE_SIZE(13))); - - /* Prepare for FP emission. */ - OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); - OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); - FINISH_ACCEL(); - - BEGIN_ACCEL(89); - /* Pixel shader. - * I've gone ahead and annotated each instruction, since this - * thing is MASSIVE. :3 - * Note: In order to avoid buggies with temps and multiple - * inputs, all temps are offset by 2. temp0 -> register2. */ - - /* TEX temp2, input1.xxxx, tex1, 1D */ - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | - R500_INST_RGB_WMASK_R | - R500_INST_RGB_WMASK_G | - R500_INST_RGB_WMASK_B)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | - R500_TEX_INST_LD | - R500_TEX_IGNORE_UNCOVERED)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) | - R500_TEX_SRC_S_SWIZ_R | - R500_TEX_SRC_T_SWIZ_R | - R500_TEX_SRC_R_SWIZ_R | - R500_TEX_SRC_Q_SWIZ_R | - R500_TEX_DST_ADDR(2) | - R500_TEX_DST_R_SWIZ_R | - R500_TEX_DST_G_SWIZ_G | - R500_TEX_DST_B_SWIZ_B | - R500_TEX_DST_A_SWIZ_A)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); - - /* TEX temp5, input1.yyyy, tex1, 1D */ - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | - R500_INST_TEX_SEM_WAIT | - R500_INST_RGB_WMASK_R | - R500_INST_RGB_WMASK_G | - R500_INST_RGB_WMASK_B)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | - R500_TEX_INST_LD | - R500_TEX_SEM_ACQUIRE | - R500_TEX_IGNORE_UNCOVERED)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) | - R500_TEX_SRC_S_SWIZ_G | - R500_TEX_SRC_T_SWIZ_G | - R500_TEX_SRC_R_SWIZ_G | - R500_TEX_SRC_Q_SWIZ_G | - R500_TEX_DST_ADDR(5) | - R500_TEX_DST_R_SWIZ_R | - R500_TEX_DST_G_SWIZ_G | - R500_TEX_DST_B_SWIZ_B | - R500_TEX_DST_A_SWIZ_A)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); - - /* MUL temp4, const0.x0x0, temp2.yyxx */ - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | - R500_INST_TEX_SEM_WAIT | - R500_INST_RGB_WMASK_R | - R500_INST_RGB_WMASK_G | - R500_INST_RGB_WMASK_B | - R500_INST_ALPHA_WMASK)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | - R500_RGB_ADDR0_CONST | - R500_RGB_ADDR1(2))); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | - R500_ALPHA_ADDR0_CONST | - R500_ALPHA_ADDR1(2))); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | - R500_ALU_RGB_R_SWIZ_A_R | - R500_ALU_RGB_G_SWIZ_A_0 | - R500_ALU_RGB_B_SWIZ_A_R | - R500_ALU_RGB_SEL_B_SRC1 | - R500_ALU_RGB_R_SWIZ_B_G | - R500_ALU_RGB_G_SWIZ_B_G | - R500_ALU_RGB_B_SWIZ_B_R)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) | - R500_ALPHA_OP_MAD | - R500_ALPHA_SEL_A_SRC0 | - R500_ALPHA_SWIZ_A_0 | - R500_ALPHA_SEL_B_SRC1 | - R500_ALPHA_SWIZ_B_R)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) | - R500_ALU_RGBA_OP_MAD | - R500_ALU_RGBA_R_SWIZ_0 | - R500_ALU_RGBA_G_SWIZ_0 | - R500_ALU_RGBA_B_SWIZ_0 | - R500_ALU_RGBA_A_SWIZ_0)); - - /* MAD temp3, const0.0y0y, temp5.xxxx, temp4 */ - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | - R500_INST_RGB_WMASK_R | - R500_INST_RGB_WMASK_G | - R500_INST_RGB_WMASK_B | - R500_INST_ALPHA_WMASK)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | - R500_RGB_ADDR0_CONST | - R500_RGB_ADDR1(5) | - R500_RGB_ADDR2(4))); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | - R500_ALPHA_ADDR0_CONST | - R500_ALPHA_ADDR1(5) | - R500_ALPHA_ADDR2(4))); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | - R500_ALU_RGB_R_SWIZ_A_0 | - R500_ALU_RGB_G_SWIZ_A_G | - R500_ALU_RGB_B_SWIZ_A_0 | - R500_ALU_RGB_SEL_B_SRC1 | - R500_ALU_RGB_R_SWIZ_B_R | - R500_ALU_RGB_G_SWIZ_B_R | - R500_ALU_RGB_B_SWIZ_B_R)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | - R500_ALPHA_OP_MAD | - R500_ALPHA_SEL_A_SRC0 | - R500_ALPHA_SWIZ_A_G | - R500_ALPHA_SEL_B_SRC1 | - R500_ALPHA_SWIZ_B_R)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | - R500_ALU_RGBA_OP_MAD | - R500_ALU_RGBA_SEL_C_SRC2 | - R500_ALU_RGBA_R_SWIZ_R | - R500_ALU_RGBA_G_SWIZ_G | - R500_ALU_RGBA_B_SWIZ_B | - R500_ALU_RGBA_A_SWIZ_A)); - - /* ADD temp3, temp3, input0.xyxy */ - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | - R500_INST_RGB_WMASK_R | - R500_INST_RGB_WMASK_G | - R500_INST_RGB_WMASK_B | - R500_INST_ALPHA_WMASK)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(3) | - R500_RGB_ADDR2(0))); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(3) | - R500_ALPHA_ADDR2(0))); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 | - R500_ALU_RGB_G_SWIZ_A_1 | - R500_ALU_RGB_B_SWIZ_A_1 | - R500_ALU_RGB_SEL_B_SRC1 | - R500_ALU_RGB_R_SWIZ_B_R | - R500_ALU_RGB_G_SWIZ_B_G | - R500_ALU_RGB_B_SWIZ_B_B)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | - R500_ALPHA_OP_MAD | - R500_ALPHA_SWIZ_A_1 | - R500_ALPHA_SEL_B_SRC1 | - R500_ALPHA_SWIZ_B_A)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | - R500_ALU_RGBA_OP_MAD | - R500_ALU_RGBA_SEL_C_SRC2 | - R500_ALU_RGBA_R_SWIZ_R | - R500_ALU_RGBA_G_SWIZ_G | - R500_ALU_RGBA_B_SWIZ_R | - R500_ALU_RGBA_A_SWIZ_G)); - - /* TEX temp1, temp3.zwxy, tex0, 2D */ - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | - R500_INST_RGB_WMASK_R | - R500_INST_RGB_WMASK_G | - R500_INST_RGB_WMASK_B | - R500_INST_ALPHA_WMASK)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | - R500_TEX_INST_LD | - R500_TEX_IGNORE_UNCOVERED)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) | - R500_TEX_SRC_S_SWIZ_B | - R500_TEX_SRC_T_SWIZ_A | - R500_TEX_SRC_R_SWIZ_R | - R500_TEX_SRC_Q_SWIZ_G | - R500_TEX_DST_ADDR(1) | - R500_TEX_DST_R_SWIZ_R | - R500_TEX_DST_G_SWIZ_G | - R500_TEX_DST_B_SWIZ_B | - R500_TEX_DST_A_SWIZ_A)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); - - /* TEX temp3, temp3.xyzw, tex0, 2D */ - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | - R500_INST_TEX_SEM_WAIT | - R500_INST_RGB_WMASK_R | - R500_INST_RGB_WMASK_G | - R500_INST_RGB_WMASK_B | - R500_INST_ALPHA_WMASK)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | - R500_TEX_INST_LD | - R500_TEX_SEM_ACQUIRE | - R500_TEX_IGNORE_UNCOVERED)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) | - R500_TEX_SRC_S_SWIZ_R | - R500_TEX_SRC_T_SWIZ_G | - R500_TEX_SRC_R_SWIZ_B | - R500_TEX_SRC_Q_SWIZ_A | - R500_TEX_DST_ADDR(3) | - R500_TEX_DST_R_SWIZ_R | - R500_TEX_DST_G_SWIZ_G | - R500_TEX_DST_B_SWIZ_B | - R500_TEX_DST_A_SWIZ_A)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); - - /* MAD temp4, const0.0y0y, temp5.yyyy, temp4 */ - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | - R500_INST_RGB_WMASK_R | - R500_INST_RGB_WMASK_G | - R500_INST_RGB_WMASK_B | - R500_INST_ALPHA_WMASK)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | - R500_RGB_ADDR0_CONST | - R500_RGB_ADDR1(5) | - R500_RGB_ADDR2(4))); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | - R500_ALPHA_ADDR0_CONST | - R500_ALPHA_ADDR1(5) | - R500_ALPHA_ADDR2(4))); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | - R500_ALU_RGB_R_SWIZ_A_0 | - R500_ALU_RGB_G_SWIZ_A_G | - R500_ALU_RGB_B_SWIZ_A_0 | - R500_ALU_RGB_SEL_B_SRC1 | - R500_ALU_RGB_R_SWIZ_B_G | - R500_ALU_RGB_G_SWIZ_B_G | - R500_ALU_RGB_B_SWIZ_B_G)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) | - R500_ALPHA_OP_MAD | - R500_ALPHA_SEL_A_SRC0 | - R500_ALPHA_SWIZ_A_G | - R500_ALPHA_SEL_B_SRC1 | - R500_ALPHA_SWIZ_B_G)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) | - R500_ALU_RGBA_OP_MAD | - R500_ALU_RGBA_SEL_C_SRC2 | - R500_ALU_RGBA_R_SWIZ_R | - R500_ALU_RGBA_G_SWIZ_G | - R500_ALU_RGBA_B_SWIZ_B | - R500_ALU_RGBA_A_SWIZ_A)); - - /* ADD temp0, temp4, input0.xyxy */ - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | - R500_INST_RGB_WMASK_R | - R500_INST_RGB_WMASK_G | - R500_INST_RGB_WMASK_B | - R500_INST_ALPHA_WMASK)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(4) | - R500_RGB_ADDR2(0))); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(4) | - R500_ALPHA_ADDR2(0))); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 | - R500_ALU_RGB_G_SWIZ_A_1 | - R500_ALU_RGB_B_SWIZ_A_1 | - R500_ALU_RGB_SEL_B_SRC1 | - R500_ALU_RGB_R_SWIZ_B_R | - R500_ALU_RGB_G_SWIZ_B_G | - R500_ALU_RGB_B_SWIZ_B_B)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | - R500_ALPHA_OP_MAD | - R500_ALPHA_SWIZ_A_1 | - R500_ALPHA_SEL_B_SRC1 | - R500_ALPHA_SWIZ_B_A)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | - R500_ALU_RGBA_OP_MAD | - R500_ALU_RGBA_SEL_C_SRC2 | - R500_ALU_RGBA_R_SWIZ_R | - R500_ALU_RGBA_G_SWIZ_G | - R500_ALU_RGBA_B_SWIZ_R | - R500_ALU_RGBA_A_SWIZ_G)); - - /* TEX temp4, temp0.zwzw, tex0, 2D */ - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | - R500_INST_TEX_SEM_WAIT | - R500_INST_RGB_WMASK_R | - R500_INST_RGB_WMASK_G | - R500_INST_RGB_WMASK_B | - R500_INST_ALPHA_WMASK)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | - R500_TEX_INST_LD | - R500_TEX_IGNORE_UNCOVERED)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | - R500_TEX_SRC_S_SWIZ_B | - R500_TEX_SRC_T_SWIZ_A | - R500_TEX_SRC_R_SWIZ_B | - R500_TEX_SRC_Q_SWIZ_A | - R500_TEX_DST_ADDR(4) | - R500_TEX_DST_R_SWIZ_R | - R500_TEX_DST_G_SWIZ_G | - R500_TEX_DST_B_SWIZ_B | - R500_TEX_DST_A_SWIZ_A)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); - - /* TEX temp0, temp0.xyzw, tex0, 2D */ - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | - R500_INST_TEX_SEM_WAIT | - R500_INST_RGB_WMASK_R | - R500_INST_RGB_WMASK_G | - R500_INST_RGB_WMASK_B | - R500_INST_ALPHA_WMASK)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | - R500_TEX_INST_LD | - R500_TEX_SEM_ACQUIRE | - R500_TEX_IGNORE_UNCOVERED)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | - R500_TEX_SRC_S_SWIZ_R | - R500_TEX_SRC_T_SWIZ_G | - R500_TEX_SRC_R_SWIZ_B | - R500_TEX_SRC_Q_SWIZ_A | - R500_TEX_DST_ADDR(0) | - R500_TEX_DST_R_SWIZ_R | - R500_TEX_DST_G_SWIZ_G | - R500_TEX_DST_B_SWIZ_B | - R500_TEX_DST_A_SWIZ_A)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); - - /* LRP temp3, temp2.zzzz, temp1, temp3 -> - * - PRESUB temps, temp1 - temp3 - * - MAD temp2.zzzz, temps, temp3 */ - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | - R500_INST_RGB_WMASK_R | - R500_INST_RGB_WMASK_G | - R500_INST_RGB_WMASK_B | - R500_INST_ALPHA_WMASK)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(3) | - R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | - R500_RGB_ADDR1(1) | - R500_RGB_ADDR2(2))); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(3) | - R500_ALPHA_SRCP_OP_A1_MINUS_A0 | - R500_ALPHA_ADDR1(1) | - R500_ALPHA_ADDR2(2))); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | - R500_ALU_RGB_R_SWIZ_A_B | - R500_ALU_RGB_G_SWIZ_A_B | - R500_ALU_RGB_B_SWIZ_A_B | - R500_ALU_RGB_SEL_B_SRCP | - R500_ALU_RGB_R_SWIZ_B_R | - R500_ALU_RGB_G_SWIZ_B_G | - R500_ALU_RGB_B_SWIZ_B_B)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | - R500_ALPHA_OP_MAD | - R500_ALPHA_SEL_A_SRC2 | - R500_ALPHA_SWIZ_A_B | - R500_ALPHA_SEL_B_SRCP | - R500_ALPHA_SWIZ_B_A)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | - R500_ALU_RGBA_OP_MAD | - R500_ALU_RGBA_SEL_C_SRC0 | - R500_ALU_RGBA_R_SWIZ_R | - R500_ALU_RGBA_G_SWIZ_G | - R500_ALU_RGBA_B_SWIZ_B | - R500_ALU_RGBA_A_SWIZ_A)); - - /* LRP temp0, temp2.zzzz, temp4, temp0 -> - * - PRESUB temps, temp4 - temp1 - * - MAD temp2.zzzz, temps, temp0 */ - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | - R500_INST_TEX_SEM_WAIT | - R500_INST_RGB_WMASK_R | - R500_INST_RGB_WMASK_G | - R500_INST_RGB_WMASK_B | - R500_INST_ALPHA_WMASK)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | - R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | - R500_RGB_ADDR1(4) | - R500_RGB_ADDR2(2))); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | - R500_ALPHA_SRCP_OP_A1_MINUS_A0 | - R500_ALPHA_ADDR1(4) | - R500_ALPHA_ADDR2(2))); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | - R500_ALU_RGB_R_SWIZ_A_B | - R500_ALU_RGB_G_SWIZ_A_B | - R500_ALU_RGB_B_SWIZ_A_B | - R500_ALU_RGB_SEL_B_SRCP | - R500_ALU_RGB_R_SWIZ_B_R | - R500_ALU_RGB_G_SWIZ_B_G | - R500_ALU_RGB_B_SWIZ_B_B)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | - R500_ALPHA_OP_MAD | - R500_ALPHA_SEL_A_SRC2 | - R500_ALPHA_SWIZ_A_B | - R500_ALPHA_SEL_B_SRCP | - R500_ALPHA_SWIZ_B_A)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | - R500_ALU_RGBA_OP_MAD | - R500_ALU_RGBA_SEL_C_SRC0 | - R500_ALU_RGBA_R_SWIZ_R | - R500_ALU_RGBA_G_SWIZ_G | - R500_ALU_RGBA_B_SWIZ_B | - R500_ALU_RGBA_A_SWIZ_A)); - - /* LRP output, temp5.zzzz, temp3, temp0 -> - * - PRESUB temps, temp3 - temp0 - * - MAD temp5.zzzz, temps, temp0 */ - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | - R500_INST_LAST | - R500_INST_TEX_SEM_WAIT | - R500_INST_RGB_WMASK_R | - R500_INST_RGB_WMASK_G | - R500_INST_RGB_WMASK_B | - R500_INST_ALPHA_WMASK | - R500_INST_RGB_OMASK_R | - R500_INST_RGB_OMASK_G | - R500_INST_RGB_OMASK_B | - R500_INST_ALPHA_OMASK)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | - R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | - R500_RGB_ADDR1(3) | - R500_RGB_ADDR2(5))); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | - R500_ALPHA_SRCP_OP_A1_MINUS_A0 | - R500_ALPHA_ADDR1(3) | - R500_ALPHA_ADDR2(5))); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | - R500_ALU_RGB_R_SWIZ_A_B | - R500_ALU_RGB_G_SWIZ_A_B | - R500_ALU_RGB_B_SWIZ_A_B | - R500_ALU_RGB_SEL_B_SRCP | - R500_ALU_RGB_R_SWIZ_B_R | - R500_ALU_RGB_G_SWIZ_B_G | - R500_ALU_RGB_B_SWIZ_B_B)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | - R500_ALPHA_OP_MAD | - R500_ALPHA_SEL_A_SRC2 | - R500_ALPHA_SWIZ_A_B | - R500_ALPHA_SEL_B_SRCP | - R500_ALPHA_SWIZ_B_A)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | - R500_ALU_RGBA_OP_MAD | - R500_ALU_RGBA_SEL_C_SRC0 | - R500_ALU_RGBA_R_SWIZ_R | - R500_ALU_RGBA_G_SWIZ_G | - R500_ALU_RGBA_B_SWIZ_B | - R500_ALU_RGBA_A_SWIZ_A)); - - /* Shader constants. */ - OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0)); - - /* const0 = {1 / texture[0].width, 1 / texture[0].height, 0, 0} */ - OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->w)); - OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->h)); - OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0); - OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0); - - FINISH_ACCEL(); - - } else if (isplanar) { - /* - * y' = y - .0625 - * u' = u - .5 - * v' = v - .5; - * - * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' - * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' - * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' - * - * DP3 might look like the straightforward solution - * but we'd need to move the texture yuv values in - * the same reg for this to work. Therefore use MADs. - * Brightness just adds to the off constant. - * Contrast is multiplication of luminance. - * Saturation and hue change the u and v coeffs. - * Default values (before adjustments - depend on colorspace): - * yco = 1.1643 - * uco = 0, -0.39173, 2.017 - * vco = 1.5958, -0.8129, 0 - * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r], - * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g], - * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b], - * - * temp = MAD(yco, yuv.yyyy, off) - * temp = MAD(uco, yuv.uuuu, temp) - * result = MAD(vco, yuv.vvvv, temp) - */ - /* TODO: don't recalc consts always */ - const float Loff = -0.0627; - const float Coff = -0.502; - float uvcosf, uvsinf; - float yco; - float uco[3], vco[3], off[3]; - float bright, cont, gamma; - int ref = pPriv->transform_index; - Bool needgamma = FALSE; - - cont = RTFContrast(pPriv->contrast); - bright = RTFBrightness(pPriv->brightness); - gamma = (float)pPriv->gamma / 1000.0; - uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue)); - uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue)); - /* overlay video also does pre-gamma contrast/sat adjust, should we? */ - - yco = trans[ref].RefLuma * cont; - uco[0] = -trans[ref].RefRCr * uvsinf; - uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; - uco[2] = trans[ref].RefBCb * uvcosf; - vco[0] = trans[ref].RefRCr * uvcosf; - vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; - vco[2] = trans[ref].RefBCb * uvsinf; - off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright; - off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright; - off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright; - - //XXX gamma + if (pPriv->bicubic_state != BICUBIC_OFF) { + if (pPriv->bicubic_enabled) { + BEGIN_ACCEL(7); - if (gamma != 1.0) { - needgamma = TRUE; - /* note: gamma correction is out = in ^ gamma; - gpu can only do LG2/EX2 therefore we transform into - in ^ gamma = 2 ^ (log2(in) * gamma). - Lots of scalar ops, unfortunately (better solution?) - - without gamma that's 3 inst, with gamma it's 10... - could use different gamma factors per channel, - if that's of any use. */ + /* 4 components: 2 for tex0 and 2 for tex1 */ + OUT_ACCEL_REG(R300_RS_COUNT, + ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) | + R300_RS_COUNT_HIRES_EN)); + + /* R300_INST_COUNT_RS - highest RS instruction used */ + OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6)); + + /* Pixel stack frame size. */ + OUT_ACCEL_REG(R300_US_PIXSIZE, 5); + + /* FP length. */ + OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | + R500_US_CODE_END_ADDR(13))); + OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | + R500_US_CODE_RANGE_SIZE(13))); + + /* Prepare for FP emission. */ + OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); + OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); + FINISH_ACCEL(); + + BEGIN_ACCEL(89); + /* Pixel shader. + * I've gone ahead and annotated each instruction, since this + * thing is MASSIVE. :3 + * Note: In order to avoid buggies with temps and multiple + * inputs, all temps are offset by 2. temp0 -> register2. */ + + /* TEX temp2, input1.xxxx, tex1, 1D */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | + R500_INST_RGB_WMASK_R | + R500_INST_RGB_WMASK_G | + R500_INST_RGB_WMASK_B)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | + R500_TEX_INST_LD | + R500_TEX_IGNORE_UNCOVERED)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) | + R500_TEX_SRC_S_SWIZ_R | + R500_TEX_SRC_T_SWIZ_R | + R500_TEX_SRC_R_SWIZ_R | + R500_TEX_SRC_Q_SWIZ_R | + R500_TEX_DST_ADDR(2) | + R500_TEX_DST_R_SWIZ_R | + R500_TEX_DST_G_SWIZ_G | + R500_TEX_DST_B_SWIZ_B | + R500_TEX_DST_A_SWIZ_A)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + + /* TEX temp5, input1.yyyy, tex1, 1D */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | + R500_INST_TEX_SEM_WAIT | + R500_INST_RGB_WMASK_R | + R500_INST_RGB_WMASK_G | + R500_INST_RGB_WMASK_B)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | + R500_TEX_INST_LD | + R500_TEX_SEM_ACQUIRE | + R500_TEX_IGNORE_UNCOVERED)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) | + R500_TEX_SRC_S_SWIZ_G | + R500_TEX_SRC_T_SWIZ_G | + R500_TEX_SRC_R_SWIZ_G | + R500_TEX_SRC_Q_SWIZ_G | + R500_TEX_DST_ADDR(5) | + R500_TEX_DST_R_SWIZ_R | + R500_TEX_DST_G_SWIZ_G | + R500_TEX_DST_B_SWIZ_B | + R500_TEX_DST_A_SWIZ_A)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + + /* MUL temp4, const0.x0x0, temp2.yyxx */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | + R500_INST_TEX_SEM_WAIT | + R500_INST_RGB_WMASK_R | + R500_INST_RGB_WMASK_G | + R500_INST_RGB_WMASK_B | + R500_INST_ALPHA_WMASK)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | + R500_RGB_ADDR0_CONST | + R500_RGB_ADDR1(2))); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | + R500_ALPHA_ADDR0_CONST | + R500_ALPHA_ADDR1(2))); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | + R500_ALU_RGB_R_SWIZ_A_R | + R500_ALU_RGB_G_SWIZ_A_0 | + R500_ALU_RGB_B_SWIZ_A_R | + R500_ALU_RGB_SEL_B_SRC1 | + R500_ALU_RGB_R_SWIZ_B_G | + R500_ALU_RGB_G_SWIZ_B_G | + R500_ALU_RGB_B_SWIZ_B_R)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) | + R500_ALPHA_OP_MAD | + R500_ALPHA_SEL_A_SRC0 | + R500_ALPHA_SWIZ_A_0 | + R500_ALPHA_SEL_B_SRC1 | + R500_ALPHA_SWIZ_B_R)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) | + R500_ALU_RGBA_OP_MAD | + R500_ALU_RGBA_R_SWIZ_0 | + R500_ALU_RGBA_G_SWIZ_0 | + R500_ALU_RGBA_B_SWIZ_0 | + R500_ALU_RGBA_A_SWIZ_0)); + + /* MAD temp3, const0.0y0y, temp5.xxxx, temp4 */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | + R500_INST_RGB_WMASK_R | + R500_INST_RGB_WMASK_G | + R500_INST_RGB_WMASK_B | + R500_INST_ALPHA_WMASK)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | + R500_RGB_ADDR0_CONST | + R500_RGB_ADDR1(5) | + R500_RGB_ADDR2(4))); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | + R500_ALPHA_ADDR0_CONST | + R500_ALPHA_ADDR1(5) | + R500_ALPHA_ADDR2(4))); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | + R500_ALU_RGB_R_SWIZ_A_0 | + R500_ALU_RGB_G_SWIZ_A_G | + R500_ALU_RGB_B_SWIZ_A_0 | + R500_ALU_RGB_SEL_B_SRC1 | + R500_ALU_RGB_R_SWIZ_B_R | + R500_ALU_RGB_G_SWIZ_B_R | + R500_ALU_RGB_B_SWIZ_B_R)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | + R500_ALPHA_OP_MAD | + R500_ALPHA_SEL_A_SRC0 | + R500_ALPHA_SWIZ_A_G | + R500_ALPHA_SEL_B_SRC1 | + R500_ALPHA_SWIZ_B_R)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | + R500_ALU_RGBA_OP_MAD | + R500_ALU_RGBA_SEL_C_SRC2 | + R500_ALU_RGBA_R_SWIZ_R | + R500_ALU_RGBA_G_SWIZ_G | + R500_ALU_RGBA_B_SWIZ_B | + R500_ALU_RGBA_A_SWIZ_A)); + + /* ADD temp3, temp3, input0.xyxy */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | + R500_INST_RGB_WMASK_R | + R500_INST_RGB_WMASK_G | + R500_INST_RGB_WMASK_B | + R500_INST_ALPHA_WMASK)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(3) | + R500_RGB_ADDR2(0))); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(3) | + R500_ALPHA_ADDR2(0))); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 | + R500_ALU_RGB_G_SWIZ_A_1 | + R500_ALU_RGB_B_SWIZ_A_1 | + R500_ALU_RGB_SEL_B_SRC1 | + R500_ALU_RGB_R_SWIZ_B_R | + R500_ALU_RGB_G_SWIZ_B_G | + R500_ALU_RGB_B_SWIZ_B_B)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | + R500_ALPHA_OP_MAD | + R500_ALPHA_SWIZ_A_1 | + R500_ALPHA_SEL_B_SRC1 | + R500_ALPHA_SWIZ_B_A)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | + R500_ALU_RGBA_OP_MAD | + R500_ALU_RGBA_SEL_C_SRC2 | + R500_ALU_RGBA_R_SWIZ_R | + R500_ALU_RGBA_G_SWIZ_G | + R500_ALU_RGBA_B_SWIZ_R | + R500_ALU_RGBA_A_SWIZ_G)); + + /* TEX temp1, temp3.zwxy, tex0, 2D */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | + R500_INST_RGB_WMASK_R | + R500_INST_RGB_WMASK_G | + R500_INST_RGB_WMASK_B | + R500_INST_ALPHA_WMASK)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | + R500_TEX_INST_LD | + R500_TEX_IGNORE_UNCOVERED)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) | + R500_TEX_SRC_S_SWIZ_B | + R500_TEX_SRC_T_SWIZ_A | + R500_TEX_SRC_R_SWIZ_R | + R500_TEX_SRC_Q_SWIZ_G | + R500_TEX_DST_ADDR(1) | + R500_TEX_DST_R_SWIZ_R | + R500_TEX_DST_G_SWIZ_G | + R500_TEX_DST_B_SWIZ_B | + R500_TEX_DST_A_SWIZ_A)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + + /* TEX temp3, temp3.xyzw, tex0, 2D */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | + R500_INST_TEX_SEM_WAIT | + R500_INST_RGB_WMASK_R | + R500_INST_RGB_WMASK_G | + R500_INST_RGB_WMASK_B | + R500_INST_ALPHA_WMASK)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | + R500_TEX_INST_LD | + R500_TEX_SEM_ACQUIRE | + R500_TEX_IGNORE_UNCOVERED)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) | + R500_TEX_SRC_S_SWIZ_R | + R500_TEX_SRC_T_SWIZ_G | + R500_TEX_SRC_R_SWIZ_B | + R500_TEX_SRC_Q_SWIZ_A | + R500_TEX_DST_ADDR(3) | + R500_TEX_DST_R_SWIZ_R | + R500_TEX_DST_G_SWIZ_G | + R500_TEX_DST_B_SWIZ_B | + R500_TEX_DST_A_SWIZ_A)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + + /* MAD temp4, const0.0y0y, temp5.yyyy, temp4 */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | + R500_INST_RGB_WMASK_R | + R500_INST_RGB_WMASK_G | + R500_INST_RGB_WMASK_B | + R500_INST_ALPHA_WMASK)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | + R500_RGB_ADDR0_CONST | + R500_RGB_ADDR1(5) | + R500_RGB_ADDR2(4))); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | + R500_ALPHA_ADDR0_CONST | + R500_ALPHA_ADDR1(5) | + R500_ALPHA_ADDR2(4))); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | + R500_ALU_RGB_R_SWIZ_A_0 | + R500_ALU_RGB_G_SWIZ_A_G | + R500_ALU_RGB_B_SWIZ_A_0 | + R500_ALU_RGB_SEL_B_SRC1 | + R500_ALU_RGB_R_SWIZ_B_G | + R500_ALU_RGB_G_SWIZ_B_G | + R500_ALU_RGB_B_SWIZ_B_G)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) | + R500_ALPHA_OP_MAD | + R500_ALPHA_SEL_A_SRC0 | + R500_ALPHA_SWIZ_A_G | + R500_ALPHA_SEL_B_SRC1 | + R500_ALPHA_SWIZ_B_G)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) | + R500_ALU_RGBA_OP_MAD | + R500_ALU_RGBA_SEL_C_SRC2 | + R500_ALU_RGBA_R_SWIZ_R | + R500_ALU_RGBA_G_SWIZ_G | + R500_ALU_RGBA_B_SWIZ_B | + R500_ALU_RGBA_A_SWIZ_A)); + + /* ADD temp0, temp4, input0.xyxy */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | + R500_INST_RGB_WMASK_R | + R500_INST_RGB_WMASK_G | + R500_INST_RGB_WMASK_B | + R500_INST_ALPHA_WMASK)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(4) | + R500_RGB_ADDR2(0))); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(4) | + R500_ALPHA_ADDR2(0))); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 | + R500_ALU_RGB_G_SWIZ_A_1 | + R500_ALU_RGB_B_SWIZ_A_1 | + R500_ALU_RGB_SEL_B_SRC1 | + R500_ALU_RGB_R_SWIZ_B_R | + R500_ALU_RGB_G_SWIZ_B_G | + R500_ALU_RGB_B_SWIZ_B_B)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | + R500_ALPHA_OP_MAD | + R500_ALPHA_SWIZ_A_1 | + R500_ALPHA_SEL_B_SRC1 | + R500_ALPHA_SWIZ_B_A)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | + R500_ALU_RGBA_OP_MAD | + R500_ALU_RGBA_SEL_C_SRC2 | + R500_ALU_RGBA_R_SWIZ_R | + R500_ALU_RGBA_G_SWIZ_G | + R500_ALU_RGBA_B_SWIZ_R | + R500_ALU_RGBA_A_SWIZ_G)); + + /* TEX temp4, temp0.zwzw, tex0, 2D */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | + R500_INST_TEX_SEM_WAIT | + R500_INST_RGB_WMASK_R | + R500_INST_RGB_WMASK_G | + R500_INST_RGB_WMASK_B | + R500_INST_ALPHA_WMASK)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | + R500_TEX_INST_LD | + R500_TEX_IGNORE_UNCOVERED)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | + R500_TEX_SRC_S_SWIZ_B | + R500_TEX_SRC_T_SWIZ_A | + R500_TEX_SRC_R_SWIZ_B | + R500_TEX_SRC_Q_SWIZ_A | + R500_TEX_DST_ADDR(4) | + R500_TEX_DST_R_SWIZ_R | + R500_TEX_DST_G_SWIZ_G | + R500_TEX_DST_B_SWIZ_B | + R500_TEX_DST_A_SWIZ_A)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + + /* TEX temp0, temp0.xyzw, tex0, 2D */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | + R500_INST_TEX_SEM_WAIT | + R500_INST_RGB_WMASK_R | + R500_INST_RGB_WMASK_G | + R500_INST_RGB_WMASK_B | + R500_INST_ALPHA_WMASK)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | + R500_TEX_INST_LD | + R500_TEX_SEM_ACQUIRE | + R500_TEX_IGNORE_UNCOVERED)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | + R500_TEX_SRC_S_SWIZ_R | + R500_TEX_SRC_T_SWIZ_G | + R500_TEX_SRC_R_SWIZ_B | + R500_TEX_SRC_Q_SWIZ_A | + R500_TEX_DST_ADDR(0) | + R500_TEX_DST_R_SWIZ_R | + R500_TEX_DST_G_SWIZ_G | + R500_TEX_DST_B_SWIZ_B | + R500_TEX_DST_A_SWIZ_A)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + + /* LRP temp3, temp2.zzzz, temp1, temp3 -> + * - PRESUB temps, temp1 - temp3 + * - MAD temp2.zzzz, temps, temp3 */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | + R500_INST_RGB_WMASK_R | + R500_INST_RGB_WMASK_G | + R500_INST_RGB_WMASK_B | + R500_INST_ALPHA_WMASK)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(3) | + R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | + R500_RGB_ADDR1(1) | + R500_RGB_ADDR2(2))); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(3) | + R500_ALPHA_SRCP_OP_A1_MINUS_A0 | + R500_ALPHA_ADDR1(1) | + R500_ALPHA_ADDR2(2))); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | + R500_ALU_RGB_R_SWIZ_A_B | + R500_ALU_RGB_G_SWIZ_A_B | + R500_ALU_RGB_B_SWIZ_A_B | + R500_ALU_RGB_SEL_B_SRCP | + R500_ALU_RGB_R_SWIZ_B_R | + R500_ALU_RGB_G_SWIZ_B_G | + R500_ALU_RGB_B_SWIZ_B_B)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) | + R500_ALPHA_OP_MAD | + R500_ALPHA_SEL_A_SRC2 | + R500_ALPHA_SWIZ_A_B | + R500_ALPHA_SEL_B_SRCP | + R500_ALPHA_SWIZ_B_A)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) | + R500_ALU_RGBA_OP_MAD | + R500_ALU_RGBA_SEL_C_SRC0 | + R500_ALU_RGBA_R_SWIZ_R | + R500_ALU_RGBA_G_SWIZ_G | + R500_ALU_RGBA_B_SWIZ_B | + R500_ALU_RGBA_A_SWIZ_A)); + + /* LRP temp0, temp2.zzzz, temp4, temp0 -> + * - PRESUB temps, temp4 - temp1 + * - MAD temp2.zzzz, temps, temp0 */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | + R500_INST_TEX_SEM_WAIT | + R500_INST_RGB_WMASK_R | + R500_INST_RGB_WMASK_G | + R500_INST_RGB_WMASK_B | + R500_INST_ALPHA_WMASK)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | + R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | + R500_RGB_ADDR1(4) | + R500_RGB_ADDR2(2))); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | + R500_ALPHA_SRCP_OP_A1_MINUS_A0 | + R500_ALPHA_ADDR1(4) | + R500_ALPHA_ADDR2(2))); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | + R500_ALU_RGB_R_SWIZ_A_B | + R500_ALU_RGB_G_SWIZ_A_B | + R500_ALU_RGB_B_SWIZ_A_B | + R500_ALU_RGB_SEL_B_SRCP | + R500_ALU_RGB_R_SWIZ_B_R | + R500_ALU_RGB_G_SWIZ_B_G | + R500_ALU_RGB_B_SWIZ_B_B)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | + R500_ALPHA_OP_MAD | + R500_ALPHA_SEL_A_SRC2 | + R500_ALPHA_SWIZ_A_B | + R500_ALPHA_SEL_B_SRCP | + R500_ALPHA_SWIZ_B_A)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | + R500_ALU_RGBA_OP_MAD | + R500_ALU_RGBA_SEL_C_SRC0 | + R500_ALU_RGBA_R_SWIZ_R | + R500_ALU_RGBA_G_SWIZ_G | + R500_ALU_RGBA_B_SWIZ_B | + R500_ALU_RGBA_A_SWIZ_A)); + + /* LRP output, temp5.zzzz, temp3, temp0 -> + * - PRESUB temps, temp3 - temp0 + * - MAD temp5.zzzz, temps, temp0 */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | + R500_INST_LAST | + R500_INST_TEX_SEM_WAIT | + R500_INST_RGB_WMASK_R | + R500_INST_RGB_WMASK_G | + R500_INST_RGB_WMASK_B | + R500_INST_ALPHA_WMASK | + R500_INST_RGB_OMASK_R | + R500_INST_RGB_OMASK_G | + R500_INST_RGB_OMASK_B | + R500_INST_ALPHA_OMASK)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | + R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 | + R500_RGB_ADDR1(3) | + R500_RGB_ADDR2(5))); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | + R500_ALPHA_SRCP_OP_A1_MINUS_A0 | + R500_ALPHA_ADDR1(3) | + R500_ALPHA_ADDR2(5))); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 | + R500_ALU_RGB_R_SWIZ_A_B | + R500_ALU_RGB_G_SWIZ_A_B | + R500_ALU_RGB_B_SWIZ_A_B | + R500_ALU_RGB_SEL_B_SRCP | + R500_ALU_RGB_R_SWIZ_B_R | + R500_ALU_RGB_G_SWIZ_B_G | + R500_ALU_RGB_B_SWIZ_B_B)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) | + R500_ALPHA_OP_MAD | + R500_ALPHA_SEL_A_SRC2 | + R500_ALPHA_SWIZ_A_B | + R500_ALPHA_SEL_B_SRCP | + R500_ALPHA_SWIZ_B_A)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) | + R500_ALU_RGBA_OP_MAD | + R500_ALU_RGBA_SEL_C_SRC0 | + R500_ALU_RGBA_R_SWIZ_R | + R500_ALU_RGBA_G_SWIZ_G | + R500_ALU_RGBA_B_SWIZ_B | + R500_ALU_RGBA_A_SWIZ_A)); + + /* Shader constants. */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0)); + + /* const0 = {1 / texture[0].width, 1 / texture[0].height, 0, 0} */ + OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->w)); + OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->h)); + OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0); + OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0); + + FINISH_ACCEL(); + } else { + BEGIN_ACCEL(19); + /* 2 components: 2 for tex0 */ + OUT_ACCEL_REG(R300_RS_COUNT, + ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | + R300_RS_COUNT_HIRES_EN)); + + /* R300_INST_COUNT_RS - highest RS instruction used */ + OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6)); + + /* Pixel stack frame size. */ + OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */ + + /* FP length. */ + OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | + R500_US_CODE_END_ADDR(1))); + OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | + R500_US_CODE_RANGE_SIZE(1))); + + /* Prepare for FP emission. */ + OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); + OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); + + /* tex inst */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | + R500_INST_TEX_SEM_WAIT | + R500_INST_RGB_WMASK_R | + R500_INST_RGB_WMASK_G | + R500_INST_RGB_WMASK_B | + R500_INST_ALPHA_WMASK | + R500_INST_RGB_CLAMP | + R500_INST_ALPHA_CLAMP)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | + R500_TEX_INST_LD | + R500_TEX_SEM_ACQUIRE | + R500_TEX_IGNORE_UNCOVERED)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | + R500_TEX_SRC_S_SWIZ_R | + R500_TEX_SRC_T_SWIZ_G | + R500_TEX_DST_ADDR(0) | + R500_TEX_DST_R_SWIZ_R | + R500_TEX_DST_G_SWIZ_G | + R500_TEX_DST_B_SWIZ_B | + R500_TEX_DST_A_SWIZ_A)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | + R500_DX_S_SWIZ_R | + R500_DX_T_SWIZ_R | + R500_DX_R_SWIZ_R | + R500_DX_Q_SWIZ_R | + R500_DY_ADDR(0) | + R500_DY_S_SWIZ_R | + R500_DY_T_SWIZ_R | + R500_DY_R_SWIZ_R | + R500_DY_Q_SWIZ_R)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + + /* ALU inst */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | + R500_INST_TEX_SEM_WAIT | + R500_INST_LAST | + R500_INST_RGB_OMASK_R | + R500_INST_RGB_OMASK_G | + R500_INST_RGB_OMASK_B | + R500_INST_ALPHA_OMASK | + R500_INST_RGB_CLAMP | + R500_INST_ALPHA_CLAMP)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | + R500_RGB_ADDR1(0) | + R500_RGB_ADDR1_CONST | + R500_RGB_ADDR2(0) | + R500_RGB_ADDR2_CONST)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | + R500_ALPHA_ADDR1(0) | + R500_ALPHA_ADDR1_CONST | + R500_ALPHA_ADDR2(0) | + R500_ALPHA_ADDR2_CONST)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | + R500_ALU_RGB_R_SWIZ_A_R | + R500_ALU_RGB_G_SWIZ_A_G | + R500_ALU_RGB_B_SWIZ_A_B | + R500_ALU_RGB_SEL_B_SRC0 | + R500_ALU_RGB_R_SWIZ_B_1 | + R500_ALU_RGB_B_SWIZ_B_1 | + R500_ALU_RGB_G_SWIZ_B_1)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | + R500_ALPHA_SWIZ_A_A | + R500_ALPHA_SWIZ_B_1)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | + R500_ALU_RGBA_R_SWIZ_0 | + R500_ALU_RGBA_G_SWIZ_0 | + R500_ALU_RGBA_B_SWIZ_0 | + R500_ALU_RGBA_A_SWIZ_0)); + FINISH_ACCEL(); } - - BEGIN_ACCEL(56); - /* 2 components: 2 for tex0 */ - OUT_ACCEL_REG(R300_RS_COUNT, - ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | - R300_RS_COUNT_HIRES_EN)); - - /* R300_INST_COUNT_RS - highest RS instruction used */ - OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6)); - - /* Pixel stack frame size. */ - OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */ - - /* FP length. */ - OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | - R500_US_CODE_END_ADDR(5))); - OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | - R500_US_CODE_RANGE_SIZE(5))); - - /* Prepare for FP emission. */ - OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); - OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); - - /* tex inst */ - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | - R500_INST_TEX_SEM_WAIT | - R500_INST_RGB_WMASK_R | - R500_INST_RGB_WMASK_G | - R500_INST_RGB_WMASK_B | - R500_INST_ALPHA_WMASK | - R500_INST_RGB_CLAMP | - R500_INST_ALPHA_CLAMP)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | - R500_TEX_INST_LD | - R500_TEX_IGNORE_UNCOVERED)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | - R500_TEX_SRC_S_SWIZ_R | - R500_TEX_SRC_T_SWIZ_G | - R500_TEX_DST_ADDR(2) | - R500_TEX_DST_R_SWIZ_R | - R500_TEX_DST_G_SWIZ_G | - R500_TEX_DST_B_SWIZ_B | - R500_TEX_DST_A_SWIZ_A)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | - R500_DX_S_SWIZ_R | - R500_DX_T_SWIZ_R | - R500_DX_R_SWIZ_R | - R500_DX_Q_SWIZ_R | - R500_DY_ADDR(0) | - R500_DY_S_SWIZ_R | - R500_DY_T_SWIZ_R | - R500_DY_R_SWIZ_R | - R500_DY_Q_SWIZ_R)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); - - /* tex inst */ - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | - R500_INST_TEX_SEM_WAIT | - R500_INST_RGB_WMASK_R | - R500_INST_RGB_WMASK_G | - R500_INST_RGB_WMASK_B | - R500_INST_ALPHA_WMASK | - R500_INST_RGB_CLAMP | - R500_INST_ALPHA_CLAMP)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | - R500_TEX_INST_LD | - R500_TEX_IGNORE_UNCOVERED)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | - R500_TEX_SRC_S_SWIZ_R | - R500_TEX_SRC_T_SWIZ_G | - R500_TEX_DST_ADDR(1) | - R500_TEX_DST_R_SWIZ_R | - R500_TEX_DST_G_SWIZ_G | - R500_TEX_DST_B_SWIZ_B | - R500_TEX_DST_A_SWIZ_A)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | - R500_DX_S_SWIZ_R | - R500_DX_T_SWIZ_R | - R500_DX_R_SWIZ_R | - R500_DX_Q_SWIZ_R | - R500_DY_ADDR(0) | - R500_DY_S_SWIZ_R | - R500_DY_T_SWIZ_R | - R500_DY_R_SWIZ_R | - R500_DY_Q_SWIZ_R)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); - - /* tex inst */ - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | - R500_INST_TEX_SEM_WAIT | - R500_INST_RGB_WMASK_R | - R500_INST_RGB_WMASK_G | - R500_INST_RGB_WMASK_B | - R500_INST_ALPHA_WMASK | - R500_INST_RGB_CLAMP | - R500_INST_ALPHA_CLAMP)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(2) | - R500_TEX_INST_LD | - R500_TEX_SEM_ACQUIRE | - R500_TEX_IGNORE_UNCOVERED)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | - R500_TEX_SRC_S_SWIZ_R | - R500_TEX_SRC_T_SWIZ_G | - R500_TEX_DST_ADDR(0) | - R500_TEX_DST_R_SWIZ_R | - R500_TEX_DST_G_SWIZ_G | - R500_TEX_DST_B_SWIZ_B | - R500_TEX_DST_A_SWIZ_A)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | - R500_DX_S_SWIZ_R | - R500_DX_T_SWIZ_R | - R500_DX_R_SWIZ_R | - R500_DX_Q_SWIZ_R | - R500_DY_ADDR(0) | - R500_DY_S_SWIZ_R | - R500_DY_T_SWIZ_R | - R500_DY_R_SWIZ_R | - R500_DY_Q_SWIZ_R)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); - - /* ALU inst */ - /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */ - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | - R500_INST_TEX_SEM_WAIT | - R500_INST_RGB_WMASK_R | - R500_INST_RGB_WMASK_G | - R500_INST_RGB_WMASK_B | - R500_INST_ALPHA_WMASK)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | - R500_RGB_ADDR0_CONST | - R500_RGB_ADDR1(2) | - R500_RGB_ADDR2(0) | - R500_RGB_ADDR2_CONST)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | - R500_ALPHA_ADDR0_CONST | - R500_ALPHA_ADDR1(2) | - R500_ALPHA_ADDR2(0) | - R500_ALPHA_ADDR2_CONST)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | - R500_ALU_RGB_R_SWIZ_A_A | - R500_ALU_RGB_G_SWIZ_A_A | - R500_ALU_RGB_B_SWIZ_A_A | - R500_ALU_RGB_SEL_B_SRC1 | - R500_ALU_RGB_R_SWIZ_B_R | - R500_ALU_RGB_B_SWIZ_B_G | - R500_ALU_RGB_G_SWIZ_B_B)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | - R500_ALPHA_ADDRD(2) | - R500_ALPHA_SWIZ_A_0 | - R500_ALPHA_SWIZ_B_0)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | - R500_ALU_RGBA_ADDRD(2) | - R500_ALU_RGBA_SEL_C_SRC0 | - R500_ALU_RGBA_R_SWIZ_R | - R500_ALU_RGBA_G_SWIZ_G | - R500_ALU_RGBA_B_SWIZ_B | - R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | - R500_ALU_RGBA_A_SWIZ_0)); - - /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */ - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | - R500_INST_TEX_SEM_WAIT | - R500_INST_RGB_WMASK_R | - R500_INST_RGB_WMASK_G | - R500_INST_RGB_WMASK_B | - R500_INST_ALPHA_WMASK)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(1) | - R500_RGB_ADDR0_CONST | - R500_RGB_ADDR1(1) | - R500_RGB_ADDR2(2))); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) | - R500_ALPHA_ADDR0_CONST | - R500_ALPHA_ADDR1(1) | - R500_ALPHA_ADDR2(2))); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | - R500_ALU_RGB_R_SWIZ_A_R | - R500_ALU_RGB_G_SWIZ_A_G | - R500_ALU_RGB_B_SWIZ_A_B | - R500_ALU_RGB_SEL_B_SRC1 | - R500_ALU_RGB_R_SWIZ_B_R | - R500_ALU_RGB_B_SWIZ_B_G | - R500_ALU_RGB_G_SWIZ_B_B)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | - R500_ALPHA_ADDRD(2) | - R500_ALPHA_SWIZ_A_0 | - R500_ALPHA_SWIZ_B_0)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | - R500_ALU_RGBA_ADDRD(2) | - R500_ALU_RGBA_SEL_C_SRC2 | - R500_ALU_RGBA_R_SWIZ_R | - R500_ALU_RGBA_G_SWIZ_G | - R500_ALU_RGBA_B_SWIZ_B | - R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | - R500_ALU_RGBA_A_SWIZ_0)); - - /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */ - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | - R500_INST_TEX_SEM_WAIT | - R500_INST_LAST | - R500_INST_RGB_OMASK_R | - R500_INST_RGB_OMASK_G | - R500_INST_RGB_OMASK_B | - R500_INST_ALPHA_OMASK | - R500_INST_RGB_CLAMP | - R500_INST_ALPHA_CLAMP)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(2) | - R500_RGB_ADDR0_CONST | - R500_RGB_ADDR1(0) | - R500_RGB_ADDR2(2))); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(2) | - R500_ALPHA_ADDR0_CONST | - R500_ALPHA_ADDR1(0) | - R500_ALPHA_ADDR2(2))); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | - R500_ALU_RGB_R_SWIZ_A_R | - R500_ALU_RGB_G_SWIZ_A_G | - R500_ALU_RGB_B_SWIZ_A_B | - R500_ALU_RGB_SEL_B_SRC1 | - R500_ALU_RGB_R_SWIZ_B_R | - R500_ALU_RGB_B_SWIZ_B_G | - R500_ALU_RGB_G_SWIZ_B_B)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | - R500_ALPHA_ADDRD(0) | - R500_ALPHA_SWIZ_A_0 | - R500_ALPHA_SWIZ_B_0)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | - R500_ALU_RGBA_ADDRD(0) | - R500_ALU_RGBA_SEL_C_SRC2 | - R500_ALU_RGBA_R_SWIZ_R | - R500_ALU_RGBA_G_SWIZ_G | - R500_ALU_RGBA_B_SWIZ_B | - R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | - R500_ALU_RGBA_A_SWIZ_1)); - - /* Shader constants. */ - OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0)); - - /* constant 0: off, yco */ - OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[0]); - OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[1]); - OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[2]); - OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, yco); - /* constant 1: uco */ - OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[0]); - OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[1]); - OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[2]); - OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, gamma); - /* constant 2: vco */ - OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[0]); - OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[1]); - OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[2]); - OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0.0); - - FINISH_ACCEL(); - } else { /* * y' = y - .0625 @@ -3562,175 +3318,414 @@ FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) if that's of any use. */ } - BEGIN_ACCEL(44); - /* 2 components: 2 for tex0/1/2 */ - OUT_ACCEL_REG(R300_RS_COUNT, - ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | - R300_RS_COUNT_HIRES_EN)); - - /* R300_INST_COUNT_RS - highest RS instruction used */ - OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6)); - - /* Pixel stack frame size. */ - OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */ - - /* FP length. */ - OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | - R500_US_CODE_END_ADDR(3))); - OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | - R500_US_CODE_RANGE_SIZE(3))); - - /* Prepare for FP emission. */ - OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); - OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); - - /* tex inst */ - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | - R500_INST_TEX_SEM_WAIT | - R500_INST_RGB_WMASK_R | - R500_INST_RGB_WMASK_G | - R500_INST_RGB_WMASK_B | - R500_INST_ALPHA_WMASK | - R500_INST_RGB_CLAMP | - R500_INST_ALPHA_CLAMP)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | - R500_TEX_INST_LD | - R500_TEX_SEM_ACQUIRE | - R500_TEX_IGNORE_UNCOVERED)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | - R500_TEX_SRC_S_SWIZ_R | - R500_TEX_SRC_T_SWIZ_G | - R500_TEX_DST_ADDR(0) | - R500_TEX_DST_R_SWIZ_R | - R500_TEX_DST_G_SWIZ_G | - R500_TEX_DST_B_SWIZ_B | - R500_TEX_DST_A_SWIZ_A)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | - R500_DX_S_SWIZ_R | - R500_DX_T_SWIZ_R | - R500_DX_R_SWIZ_R | - R500_DX_Q_SWIZ_R | - R500_DY_ADDR(0) | - R500_DY_S_SWIZ_R | - R500_DY_T_SWIZ_R | - R500_DY_R_SWIZ_R | - R500_DY_Q_SWIZ_R)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); - - /* ALU inst */ - /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */ - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | - R500_INST_TEX_SEM_WAIT | - R500_INST_RGB_WMASK_R | - R500_INST_RGB_WMASK_G | - R500_INST_RGB_WMASK_B | - R500_INST_ALPHA_WMASK)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | - R500_RGB_ADDR0_CONST | - R500_RGB_ADDR1(0) | - R500_RGB_ADDR2(0) | - R500_RGB_ADDR2_CONST)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | - R500_ALPHA_ADDR0_CONST | - R500_ALPHA_ADDR1(0) | - R500_ALPHA_ADDR2(0) | - R500_ALPHA_ADDR2_CONST)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | - R500_ALU_RGB_R_SWIZ_A_A | - R500_ALU_RGB_G_SWIZ_A_A | - R500_ALU_RGB_B_SWIZ_A_A | - R500_ALU_RGB_SEL_B_SRC1 | - R500_ALU_RGB_R_SWIZ_B_G | - R500_ALU_RGB_B_SWIZ_B_G | - R500_ALU_RGB_G_SWIZ_B_G)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | - R500_ALPHA_ADDRD(1) | - R500_ALPHA_SWIZ_A_0 | - R500_ALPHA_SWIZ_B_0)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | - R500_ALU_RGBA_ADDRD(1) | - R500_ALU_RGBA_SEL_C_SRC0 | - R500_ALU_RGBA_R_SWIZ_R | - R500_ALU_RGBA_G_SWIZ_G | - R500_ALU_RGBA_B_SWIZ_B | - R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | - R500_ALU_RGBA_A_SWIZ_0)); - - /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */ - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | - R500_INST_TEX_SEM_WAIT | - R500_INST_RGB_WMASK_R | - R500_INST_RGB_WMASK_G | - R500_INST_RGB_WMASK_B | - R500_INST_ALPHA_WMASK)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(1) | - R500_RGB_ADDR0_CONST | - R500_RGB_ADDR1(0) | - R500_RGB_ADDR2(1))); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) | - R500_ALPHA_ADDR0_CONST | - R500_ALPHA_ADDR1(0) | - R500_ALPHA_ADDR2(1))); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | - R500_ALU_RGB_R_SWIZ_A_R | - R500_ALU_RGB_G_SWIZ_A_G | - R500_ALU_RGB_B_SWIZ_A_B | - R500_ALU_RGB_SEL_B_SRC1 | - R500_ALU_RGB_R_SWIZ_B_B | - R500_ALU_RGB_B_SWIZ_B_B | - R500_ALU_RGB_G_SWIZ_B_B)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | - R500_ALPHA_ADDRD(1) | - R500_ALPHA_SWIZ_A_0 | - R500_ALPHA_SWIZ_B_0)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | - R500_ALU_RGBA_ADDRD(1) | - R500_ALU_RGBA_SEL_C_SRC2 | - R500_ALU_RGBA_R_SWIZ_R | - R500_ALU_RGBA_G_SWIZ_G | - R500_ALU_RGBA_B_SWIZ_B | - R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | - R500_ALU_RGBA_A_SWIZ_0)); - - /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */ - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | - R500_INST_TEX_SEM_WAIT | - R500_INST_LAST | - R500_INST_RGB_OMASK_R | - R500_INST_RGB_OMASK_G | - R500_INST_RGB_OMASK_B | - R500_INST_ALPHA_OMASK | - R500_INST_RGB_CLAMP | - R500_INST_ALPHA_CLAMP)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(2) | - R500_RGB_ADDR0_CONST | - R500_RGB_ADDR1(0) | - R500_RGB_ADDR2(1))); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) | - R500_ALPHA_ADDR0_CONST | - R500_ALPHA_ADDR1(0) | - R500_ALPHA_ADDR2(1))); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | - R500_ALU_RGB_R_SWIZ_A_R | - R500_ALU_RGB_G_SWIZ_A_G | - R500_ALU_RGB_B_SWIZ_A_B | - R500_ALU_RGB_SEL_B_SRC1 | - R500_ALU_RGB_R_SWIZ_B_R | - R500_ALU_RGB_B_SWIZ_B_R | - R500_ALU_RGB_G_SWIZ_B_R)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | - R500_ALPHA_ADDRD(1) | - R500_ALPHA_SWIZ_A_0 | - R500_ALPHA_SWIZ_B_0)); - OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | - R500_ALU_RGBA_ADDRD(1) | - R500_ALU_RGBA_SEL_C_SRC2 | - R500_ALU_RGBA_R_SWIZ_R | - R500_ALU_RGBA_G_SWIZ_G | - R500_ALU_RGBA_B_SWIZ_B | - R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | - R500_ALU_RGBA_A_SWIZ_1)); + if (isplanar) { + BEGIN_ACCEL(56); + /* 2 components: 2 for tex0 */ + OUT_ACCEL_REG(R300_RS_COUNT, + ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | + R300_RS_COUNT_HIRES_EN)); + + /* R300_INST_COUNT_RS - highest RS instruction used */ + OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6)); + + /* Pixel stack frame size. */ + OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */ + + /* FP length. */ + OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | + R500_US_CODE_END_ADDR(5))); + OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | + R500_US_CODE_RANGE_SIZE(5))); + + /* Prepare for FP emission. */ + OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); + OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); + + /* tex inst */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | + R500_INST_TEX_SEM_WAIT | + R500_INST_RGB_WMASK_R | + R500_INST_RGB_WMASK_G | + R500_INST_RGB_WMASK_B | + R500_INST_ALPHA_WMASK | + R500_INST_RGB_CLAMP | + R500_INST_ALPHA_CLAMP)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | + R500_TEX_INST_LD | + R500_TEX_IGNORE_UNCOVERED)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | + R500_TEX_SRC_S_SWIZ_R | + R500_TEX_SRC_T_SWIZ_G | + R500_TEX_DST_ADDR(2) | + R500_TEX_DST_R_SWIZ_R | + R500_TEX_DST_G_SWIZ_G | + R500_TEX_DST_B_SWIZ_B | + R500_TEX_DST_A_SWIZ_A)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | + R500_DX_S_SWIZ_R | + R500_DX_T_SWIZ_R | + R500_DX_R_SWIZ_R | + R500_DX_Q_SWIZ_R | + R500_DY_ADDR(0) | + R500_DY_S_SWIZ_R | + R500_DY_T_SWIZ_R | + R500_DY_R_SWIZ_R | + R500_DY_Q_SWIZ_R)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + + /* tex inst */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | + R500_INST_TEX_SEM_WAIT | + R500_INST_RGB_WMASK_R | + R500_INST_RGB_WMASK_G | + R500_INST_RGB_WMASK_B | + R500_INST_ALPHA_WMASK | + R500_INST_RGB_CLAMP | + R500_INST_ALPHA_CLAMP)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) | + R500_TEX_INST_LD | + R500_TEX_IGNORE_UNCOVERED)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | + R500_TEX_SRC_S_SWIZ_R | + R500_TEX_SRC_T_SWIZ_G | + R500_TEX_DST_ADDR(1) | + R500_TEX_DST_R_SWIZ_R | + R500_TEX_DST_G_SWIZ_G | + R500_TEX_DST_B_SWIZ_B | + R500_TEX_DST_A_SWIZ_A)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | + R500_DX_S_SWIZ_R | + R500_DX_T_SWIZ_R | + R500_DX_R_SWIZ_R | + R500_DX_Q_SWIZ_R | + R500_DY_ADDR(0) | + R500_DY_S_SWIZ_R | + R500_DY_T_SWIZ_R | + R500_DY_R_SWIZ_R | + R500_DY_Q_SWIZ_R)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + + /* tex inst */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | + R500_INST_TEX_SEM_WAIT | + R500_INST_RGB_WMASK_R | + R500_INST_RGB_WMASK_G | + R500_INST_RGB_WMASK_B | + R500_INST_ALPHA_WMASK | + R500_INST_RGB_CLAMP | + R500_INST_ALPHA_CLAMP)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(2) | + R500_TEX_INST_LD | + R500_TEX_SEM_ACQUIRE | + R500_TEX_IGNORE_UNCOVERED)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | + R500_TEX_SRC_S_SWIZ_R | + R500_TEX_SRC_T_SWIZ_G | + R500_TEX_DST_ADDR(0) | + R500_TEX_DST_R_SWIZ_R | + R500_TEX_DST_G_SWIZ_G | + R500_TEX_DST_B_SWIZ_B | + R500_TEX_DST_A_SWIZ_A)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | + R500_DX_S_SWIZ_R | + R500_DX_T_SWIZ_R | + R500_DX_R_SWIZ_R | + R500_DX_Q_SWIZ_R | + R500_DY_ADDR(0) | + R500_DY_S_SWIZ_R | + R500_DY_T_SWIZ_R | + R500_DY_R_SWIZ_R | + R500_DY_Q_SWIZ_R)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + + /* ALU inst */ + /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | + R500_INST_TEX_SEM_WAIT | + R500_INST_RGB_WMASK_R | + R500_INST_RGB_WMASK_G | + R500_INST_RGB_WMASK_B | + R500_INST_ALPHA_WMASK)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | + R500_RGB_ADDR0_CONST | + R500_RGB_ADDR1(2) | + R500_RGB_ADDR2(0) | + R500_RGB_ADDR2_CONST)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | + R500_ALPHA_ADDR0_CONST | + R500_ALPHA_ADDR1(2) | + R500_ALPHA_ADDR2(0) | + R500_ALPHA_ADDR2_CONST)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | + R500_ALU_RGB_R_SWIZ_A_A | + R500_ALU_RGB_G_SWIZ_A_A | + R500_ALU_RGB_B_SWIZ_A_A | + R500_ALU_RGB_SEL_B_SRC1 | + R500_ALU_RGB_R_SWIZ_B_R | + R500_ALU_RGB_B_SWIZ_B_G | + R500_ALU_RGB_G_SWIZ_B_B)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | + R500_ALPHA_ADDRD(2) | + R500_ALPHA_SWIZ_A_0 | + R500_ALPHA_SWIZ_B_0)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | + R500_ALU_RGBA_ADDRD(2) | + R500_ALU_RGBA_SEL_C_SRC0 | + R500_ALU_RGBA_R_SWIZ_R | + R500_ALU_RGBA_G_SWIZ_G | + R500_ALU_RGBA_B_SWIZ_B | + R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | + R500_ALU_RGBA_A_SWIZ_0)); + + /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | + R500_INST_TEX_SEM_WAIT | + R500_INST_RGB_WMASK_R | + R500_INST_RGB_WMASK_G | + R500_INST_RGB_WMASK_B | + R500_INST_ALPHA_WMASK)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(1) | + R500_RGB_ADDR0_CONST | + R500_RGB_ADDR1(1) | + R500_RGB_ADDR2(2))); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) | + R500_ALPHA_ADDR0_CONST | + R500_ALPHA_ADDR1(1) | + R500_ALPHA_ADDR2(2))); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | + R500_ALU_RGB_R_SWIZ_A_R | + R500_ALU_RGB_G_SWIZ_A_G | + R500_ALU_RGB_B_SWIZ_A_B | + R500_ALU_RGB_SEL_B_SRC1 | + R500_ALU_RGB_R_SWIZ_B_R | + R500_ALU_RGB_B_SWIZ_B_G | + R500_ALU_RGB_G_SWIZ_B_B)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | + R500_ALPHA_ADDRD(2) | + R500_ALPHA_SWIZ_A_0 | + R500_ALPHA_SWIZ_B_0)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | + R500_ALU_RGBA_ADDRD(2) | + R500_ALU_RGBA_SEL_C_SRC2 | + R500_ALU_RGBA_R_SWIZ_R | + R500_ALU_RGBA_G_SWIZ_G | + R500_ALU_RGBA_B_SWIZ_B | + R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | + R500_ALU_RGBA_A_SWIZ_0)); + + /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | + R500_INST_TEX_SEM_WAIT | + R500_INST_LAST | + R500_INST_RGB_OMASK_R | + R500_INST_RGB_OMASK_G | + R500_INST_RGB_OMASK_B | + R500_INST_ALPHA_OMASK | + R500_INST_RGB_CLAMP | + R500_INST_ALPHA_CLAMP)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(2) | + R500_RGB_ADDR0_CONST | + R500_RGB_ADDR1(0) | + R500_RGB_ADDR2(2))); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(2) | + R500_ALPHA_ADDR0_CONST | + R500_ALPHA_ADDR1(0) | + R500_ALPHA_ADDR2(2))); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | + R500_ALU_RGB_R_SWIZ_A_R | + R500_ALU_RGB_G_SWIZ_A_G | + R500_ALU_RGB_B_SWIZ_A_B | + R500_ALU_RGB_SEL_B_SRC1 | + R500_ALU_RGB_R_SWIZ_B_R | + R500_ALU_RGB_B_SWIZ_B_G | + R500_ALU_RGB_G_SWIZ_B_B)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | + R500_ALPHA_ADDRD(0) | + R500_ALPHA_SWIZ_A_0 | + R500_ALPHA_SWIZ_B_0)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | + R500_ALU_RGBA_ADDRD(0) | + R500_ALU_RGBA_SEL_C_SRC2 | + R500_ALU_RGBA_R_SWIZ_R | + R500_ALU_RGBA_G_SWIZ_G | + R500_ALU_RGBA_B_SWIZ_B | + R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | + R500_ALU_RGBA_A_SWIZ_1)); + + } else { + BEGIN_ACCEL(44); + /* 2 components: 2 for tex0/1/2 */ + OUT_ACCEL_REG(R300_RS_COUNT, + ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | + R300_RS_COUNT_HIRES_EN)); + + /* R300_INST_COUNT_RS - highest RS instruction used */ + OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6)); + + /* Pixel stack frame size. */ + OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */ + + /* FP length. */ + OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) | + R500_US_CODE_END_ADDR(3))); + OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) | + R500_US_CODE_RANGE_SIZE(3))); + + /* Prepare for FP emission. */ + OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0); + OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0)); + + /* tex inst */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX | + R500_INST_TEX_SEM_WAIT | + R500_INST_RGB_WMASK_R | + R500_INST_RGB_WMASK_G | + R500_INST_RGB_WMASK_B | + R500_INST_ALPHA_WMASK | + R500_INST_RGB_CLAMP | + R500_INST_ALPHA_CLAMP)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) | + R500_TEX_INST_LD | + R500_TEX_SEM_ACQUIRE | + R500_TEX_IGNORE_UNCOVERED)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) | + R500_TEX_SRC_S_SWIZ_R | + R500_TEX_SRC_T_SWIZ_G | + R500_TEX_DST_ADDR(0) | + R500_TEX_DST_R_SWIZ_R | + R500_TEX_DST_G_SWIZ_G | + R500_TEX_DST_B_SWIZ_B | + R500_TEX_DST_A_SWIZ_A)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) | + R500_DX_S_SWIZ_R | + R500_DX_T_SWIZ_R | + R500_DX_R_SWIZ_R | + R500_DX_Q_SWIZ_R | + R500_DY_ADDR(0) | + R500_DY_S_SWIZ_R | + R500_DY_T_SWIZ_R | + R500_DY_R_SWIZ_R | + R500_DY_Q_SWIZ_R)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); + + /* ALU inst */ + /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | + R500_INST_TEX_SEM_WAIT | + R500_INST_RGB_WMASK_R | + R500_INST_RGB_WMASK_G | + R500_INST_RGB_WMASK_B | + R500_INST_ALPHA_WMASK)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) | + R500_RGB_ADDR0_CONST | + R500_RGB_ADDR1(0) | + R500_RGB_ADDR2(0) | + R500_RGB_ADDR2_CONST)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) | + R500_ALPHA_ADDR0_CONST | + R500_ALPHA_ADDR1(0) | + R500_ALPHA_ADDR2(0) | + R500_ALPHA_ADDR2_CONST)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | + R500_ALU_RGB_R_SWIZ_A_A | + R500_ALU_RGB_G_SWIZ_A_A | + R500_ALU_RGB_B_SWIZ_A_A | + R500_ALU_RGB_SEL_B_SRC1 | + R500_ALU_RGB_R_SWIZ_B_G | + R500_ALU_RGB_B_SWIZ_B_G | + R500_ALU_RGB_G_SWIZ_B_G)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | + R500_ALPHA_ADDRD(1) | + R500_ALPHA_SWIZ_A_0 | + R500_ALPHA_SWIZ_B_0)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | + R500_ALU_RGBA_ADDRD(1) | + R500_ALU_RGBA_SEL_C_SRC0 | + R500_ALU_RGBA_R_SWIZ_R | + R500_ALU_RGBA_G_SWIZ_G | + R500_ALU_RGBA_B_SWIZ_B | + R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | + R500_ALU_RGBA_A_SWIZ_0)); + + /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU | + R500_INST_TEX_SEM_WAIT | + R500_INST_RGB_WMASK_R | + R500_INST_RGB_WMASK_G | + R500_INST_RGB_WMASK_B | + R500_INST_ALPHA_WMASK)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(1) | + R500_RGB_ADDR0_CONST | + R500_RGB_ADDR1(0) | + R500_RGB_ADDR2(1))); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) | + R500_ALPHA_ADDR0_CONST | + R500_ALPHA_ADDR1(0) | + R500_ALPHA_ADDR2(1))); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | + R500_ALU_RGB_R_SWIZ_A_R | + R500_ALU_RGB_G_SWIZ_A_G | + R500_ALU_RGB_B_SWIZ_A_B | + R500_ALU_RGB_SEL_B_SRC1 | + R500_ALU_RGB_R_SWIZ_B_B | + R500_ALU_RGB_B_SWIZ_B_B | + R500_ALU_RGB_G_SWIZ_B_B)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | + R500_ALPHA_ADDRD(1) | + R500_ALPHA_SWIZ_A_0 | + R500_ALPHA_SWIZ_B_0)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | + R500_ALU_RGBA_ADDRD(1) | + R500_ALU_RGBA_SEL_C_SRC2 | + R500_ALU_RGBA_R_SWIZ_R | + R500_ALU_RGBA_G_SWIZ_G | + R500_ALU_RGBA_B_SWIZ_B | + R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | + R500_ALU_RGBA_A_SWIZ_0)); + + /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */ + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT | + R500_INST_TEX_SEM_WAIT | + R500_INST_LAST | + R500_INST_RGB_OMASK_R | + R500_INST_RGB_OMASK_G | + R500_INST_RGB_OMASK_B | + R500_INST_ALPHA_OMASK | + R500_INST_RGB_CLAMP | + R500_INST_ALPHA_CLAMP)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(2) | + R500_RGB_ADDR0_CONST | + R500_RGB_ADDR1(0) | + R500_RGB_ADDR2(1))); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) | + R500_ALPHA_ADDR0_CONST | + R500_ALPHA_ADDR1(0) | + R500_ALPHA_ADDR2(1))); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 | + R500_ALU_RGB_R_SWIZ_A_R | + R500_ALU_RGB_G_SWIZ_A_G | + R500_ALU_RGB_B_SWIZ_A_B | + R500_ALU_RGB_SEL_B_SRC1 | + R500_ALU_RGB_R_SWIZ_B_R | + R500_ALU_RGB_B_SWIZ_B_R | + R500_ALU_RGB_G_SWIZ_B_R)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD | + R500_ALPHA_ADDRD(1) | + R500_ALPHA_SWIZ_A_0 | + R500_ALPHA_SWIZ_B_0)); + OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD | + R500_ALU_RGBA_ADDRD(1) | + R500_ALU_RGBA_SEL_C_SRC2 | + R500_ALU_RGBA_R_SWIZ_R | + R500_ALU_RGBA_G_SWIZ_G | + R500_ALU_RGBA_B_SWIZ_B | + R500_ALU_RGBA_ALPHA_SEL_C_SRC0 | + R500_ALU_RGBA_A_SWIZ_1)); + } /* Shader constants. */ OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0)); |