summaryrefslogtreecommitdiff
path: root/src/evergreen_shader.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/evergreen_shader.c')
-rw-r--r--src/evergreen_shader.c596
1 files changed, 383 insertions, 213 deletions
diff --git a/src/evergreen_shader.c b/src/evergreen_shader.c
index ebc58f21..4852578e 100644
--- a/src/evergreen_shader.c
+++ b/src/evergreen_shader.c
@@ -2472,15 +2472,16 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
}
/* comp ps --------------------------------------- */
-int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
+int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t *shader)
{
int i = 0;
/* 0 */
- shader[i++] = CF_DWORD0(ADDR(3),
+ /* call interp-fetch-mask if boolean1 == true */
+ shader[i++] = CF_DWORD0(ADDR(11),
JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
- CF_CONST(0),
+ CF_CONST(1),
COND(SQ_CF_COND_BOOL),
I_COUNT(0),
VALID_PIXEL_MODE(0),
@@ -2488,11 +2489,13 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_CALL),
WHOLE_QUAD_MODE(0),
BARRIER(0));
+
/* 1 */
- shader[i++] = CF_DWORD0(ADDR(8),
+ /* call read-constant-mask if boolean1 == false */
+ shader[i++] = CF_DWORD0(ADDR(14),
JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
- CF_CONST(0),
+ CF_CONST(1),
COND(SQ_CF_COND_NOT_BOOL),
I_COUNT(0),
VALID_PIXEL_MODE(0),
@@ -2500,48 +2503,118 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_CALL),
WHOLE_QUAD_MODE(0),
BARRIER(0));
+
/* 2 */
- shader[i++] = CF_DWORD0(ADDR(0),
- JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ /* call interp-fetch-src if boolean0 == true */
+ shader[i++] = CF_DWORD0(ADDR(6),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
- COND(SQ_CF_COND_ACTIVE),
+ COND(SQ_CF_COND_BOOL),
I_COUNT(0),
VALID_PIXEL_MODE(0),
- END_OF_PROGRAM(1),
- CF_INST(SQ_CF_INST_NOP),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_CALL),
WHOLE_QUAD_MODE(0),
- BARRIER(1));
+ BARRIER(0));
- /* 3 - mask sub */
- shader[i++] = CF_ALU_DWORD0(ADDR(12),
+ /* 3 */
+ /* call read-constant-src if boolean0 == false */
+ shader[i++] = CF_DWORD0(ADDR(9),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_NOT_BOOL),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_CALL),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ /* 4 */
+ /* src IN mask (GPR2 := GPR1 .* GPR0) */
+ shader[i++] = CF_ALU_DWORD0(ADDR(16),
KCACHE_BANK0(0),
KCACHE_BANK1(0),
KCACHE_MODE0(SQ_CF_KCACHE_NOP));
shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
KCACHE_ADDR0(0),
KCACHE_ADDR1(0),
- I_COUNT(8),
+ I_COUNT(4),
ALT_CONST(0),
CF_INST(SQ_CF_INST_ALU),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 4 */
- shader[i++] = CF_DWORD0(ADDR(28),
+ /* 5 */
+ /* export pixel data */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(1));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(1),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+
+ /* subroutine interp-fetch-src */
+
+ /* 6 */
+ /* interpolate src */
+ shader[i++] = CF_ALU_DWORD0(ADDR(20),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(4),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 7 */
+ /* texture fetch src into GPR0 */
+ shader[i++] = CF_DWORD0(ADDR(24),
JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
- I_COUNT(2),
+ I_COUNT(1),
VALID_PIXEL_MODE(0),
END_OF_PROGRAM(0),
CF_INST(SQ_CF_INST_TC),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 5 */
- shader[i++] = CF_ALU_DWORD0(ADDR(20),
+ /* 8 */
+ /* return */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+
+ /* subroutine read-constant-src */
+
+ /* 9 */
+ /* read constants into GPR0 */
+ shader[i++] = CF_ALU_DWORD0(ADDR(26),
KCACHE_BANK0(0),
KCACHE_BANK1(0),
KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
@@ -2549,30 +2622,13 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
KCACHE_ADDR0(0),
KCACHE_ADDR1(0),
I_COUNT(4),
- ALT_CONST(0),
+ ALT_CONST(1),
CF_INST(SQ_CF_INST_ALU),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 6 */
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
- TYPE(SQ_EXPORT_PIXEL),
- RW_GPR(2),
- RW_REL(ABSOLUTE),
- INDEX_GPR(0),
- ELEM_SIZE(1));
-
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_Z),
- SRC_SEL_W(SQ_SEL_W),
- BURST_COUNT(1),
- VALID_PIXEL_MODE(0),
- END_OF_PROGRAM(0),
- CF_INST(SQ_CF_INST_EXPORT_DONE),
- MARK(0),
- BARRIER(1));
- /* 7 */
+ /* 10 */
+ /* return */
shader[i++] = CF_DWORD0(ADDR(0),
JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
@@ -2583,10 +2639,13 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
END_OF_PROGRAM(0),
CF_INST(SQ_CF_INST_RETURN),
WHOLE_QUAD_MODE(0),
- BARRIER(1));
+ BARRIER(0));
+
+ /* subroutine interp-fetch-mask */
- /* 8 - non-mask sub */
- shader[i++] = CF_ALU_DWORD0(ADDR(24),
+ /* 11 */
+ /* interpolate mask */
+ shader[i++] = CF_ALU_DWORD0(ADDR(30),
KCACHE_BANK0(0),
KCACHE_BANK1(0),
KCACHE_MODE0(SQ_CF_KCACHE_NOP));
@@ -2598,8 +2657,10 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_ALU),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 9 */
- shader[i++] = CF_DWORD0(ADDR(32),
+
+ /* 12 */
+ /* texture fetch mask into GPR1 */
+ shader[i++] = CF_DWORD0(ADDR(34),
JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
@@ -2611,25 +2672,39 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 10 */
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
- TYPE(SQ_EXPORT_PIXEL),
- RW_GPR(0),
- RW_REL(ABSOLUTE),
- INDEX_GPR(0),
- ELEM_SIZE(1));
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_Z),
- SRC_SEL_W(SQ_SEL_W),
- BURST_COUNT(1),
- VALID_PIXEL_MODE(0),
- END_OF_PROGRAM(0),
- CF_INST(SQ_CF_INST_EXPORT_DONE),
- MARK(0),
- BARRIER(1));
+ /* 13 */
+ /* return */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
- /* 11 */
+ /* subroutine read-constant-src */
+
+ /* 14 */
+ /* read constants into GPR1 */
+ shader[i++] = CF_ALU_DWORD0(ADDR(36),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(4),
+ ALT_CONST(1),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 15 */
+ /* return */
shader[i++] = CF_DWORD0(ADDR(0),
JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
@@ -2640,18 +2715,21 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
END_OF_PROGRAM(0),
CF_INST(SQ_CF_INST_RETURN),
WHOLE_QUAD_MODE(0),
- BARRIER(1));
+ BARRIER(0));
- /* 12 interpolate src tex coords - mask */
+ /* ALU clauses */
+
+ /* 16 */
+ /* MUL gpr[0].x gpr[0].x gpr[1].x */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
+ SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
@@ -2660,22 +2738,24 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
UPDATE_PRED(0),
WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_INTERP_XY),
- BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(1),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
- CLAMP(0));
- /* 13 */
+ CLAMP(1));
+
+ /* 17 */
+ /* MUL gpr[0].y gpr[0].y gpr[1].y */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
+ SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
@@ -2684,67 +2764,70 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
UPDATE_PRED(0),
WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_INTERP_XY),
- BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(1),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
- CLAMP(0));
- /* 14 */
+ CLAMP(1));
+ /* 18 */
+ /* MUL gpr[0].z gpr[0].z gpr[1].z */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
+ SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
SRC1_ABS(0),
UPDATE_EXECUTE_MASK(0),
UPDATE_PRED(0),
- WRITE_MASK(0),
+ WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_INTERP_XY),
- BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(1),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
- CLAMP(0));
- /* 15 */
+ CLAMP(1));
+ /* 19 */
+ /* MUL gpr[0].w gpr[0].w gpr[1].w */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
+ SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(1));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
SRC1_ABS(0),
UPDATE_EXECUTE_MASK(0),
UPDATE_PRED(0),
- WRITE_MASK(0),
+ WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_INTERP_XY),
- BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(1),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
- CLAMP(0));
+ CLAMP(1));
- /* 16 interpolate mask tex coords */
+ /* 20 */
+ /* INTERP_XY GPR0.x, GPR0.y PARAM0.x */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -2763,12 +2846,13 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 17 */
+ /* 21 */
+ /* INTERP_XY GPR0.y, GPR0.x PARAM0.x */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -2787,12 +2871,13 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* 18 */
+ /* 22 */
+ /* INTERP_XY GPR0.z, GPR0.y PARAM0.x */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -2811,12 +2896,14 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(0));
- /* 19 */
+
+ /* 23 */
+ /* INTERP_XY GPR0.w, GPR0.x PARAM0.x */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -2836,17 +2923,49 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_W),
CLAMP(0));
- /* 20 - alu 0 */
- /* MUL gpr[2].x gpr[0].x gpr[1].x */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ /* 24/25 */
+ /* SAMPLE RID=0 GPR0, GPR0 */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
+ DST_SEL_W(SQ_SEL_W),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+
+ /* 26 */
+ /* MOV GPR0.x, KC4.x */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
+ INDEX_MODE(SQ_INDEX_AR_X),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
@@ -2855,23 +2974,24 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
UPDATE_PRED(0),
WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
+ ALU_INST(SQ_OP2_INST_MOV),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(1));
- /* 21 - alu 1 */
- /* MUL gpr[2].y gpr[0].y gpr[1].y */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+
+ /* 27 */
+ /* MOV GPR0.y, KC4.y */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
+ SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
+ INDEX_MODE(SQ_INDEX_AR_X),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
@@ -2880,23 +3000,24 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
UPDATE_PRED(0),
WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
+ ALU_INST(SQ_OP2_INST_MOV),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(1));
- /* 22 - alu 2 */
- /* MUL gpr[2].z gpr[0].z gpr[1].z */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+
+ /* 28 */
+ /* MOV GPR0.z, KC4.z */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Z),
+ SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
+ INDEX_MODE(SQ_INDEX_AR_X),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
@@ -2905,23 +3026,24 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
UPDATE_PRED(0),
WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
+ ALU_INST(SQ_OP2_INST_MOV),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(1));
- /* 23 - alu 3 */
- /* MUL gpr[2].w gpr[0].w gpr[1].w */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+
+ /* 29 */
+ /* MOV GPR0.w, KC4.w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_W),
+ SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
+ INDEX_MODE(SQ_INDEX_AR_X),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(1));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
@@ -2930,19 +3052,20 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
UPDATE_PRED(0),
WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
+ ALU_INST(SQ_OP2_INST_MOV),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
CLAMP(1));
- /* 24 - interpolate tex coords - non-mask */
+ /* 30 */
+ /* INTERP_XY GPR1.x, PARAM1 */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -2957,16 +3080,17 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
OMOD(SQ_ALU_OMOD_OFF),
ALU_INST(SQ_OP2_INST_INTERP_XY),
BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(0),
+ DST_GPR(1),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 25 */
+ /* 31 */
+ /* INTERP_XY GPR1.y, PARAM1 */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -2981,16 +3105,17 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
OMOD(SQ_ALU_OMOD_OFF),
ALU_INST(SQ_OP2_INST_INTERP_XY),
BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(0),
+ DST_GPR(1),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* 26 */
+ /* 32 */
+ /* INTERP_XY GPR1.z, PARAM1 */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -3005,16 +3130,17 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
OMOD(SQ_ALU_OMOD_OFF),
ALU_INST(SQ_OP2_INST_INTERP_XY),
BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(0),
+ DST_GPR(1),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(0));
- /* 27 */
+ /* 33 */
+ /* INTERP_XY GPR1.w, PARAM1 */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -3029,16 +3155,17 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
OMOD(SQ_ALU_OMOD_OFF),
ALU_INST(SQ_OP2_INST_INTERP_XY),
BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(0),
+ DST_GPR(1),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
CLAMP(0));
- /* 28/29 - src - mask */
+ /* 34/35 */
+ /* SAMPLE RID=1 GPR1, GPR1 */
shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
INST_MOD(0),
FETCH_WHOLE_QUAD(0),
- RESOURCE_ID(0),
+ RESOURCE_ID(1),
SRC_GPR(1),
SRC_REL(ABSOLUTE),
ALT_CONST(0),
@@ -3058,36 +3185,6 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
shader[i++] = TEX_DWORD2(OFFSET_X(0),
OFFSET_Y(0),
OFFSET_Z(0),
- SAMPLER_ID(0),
- SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_0),
- SRC_SEL_W(SQ_SEL_1));
- shader[i++] = TEX_DWORD_PAD;
- /* 30/31 - mask */
- shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
- INST_MOD(0),
- FETCH_WHOLE_QUAD(0),
- RESOURCE_ID(1),
- SRC_GPR(0),
- SRC_REL(ABSOLUTE),
- ALT_CONST(0),
- RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
- SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
- shader[i++] = TEX_DWORD1(DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_SEL_X(SQ_SEL_X),
- DST_SEL_Y(SQ_SEL_Y),
- DST_SEL_Z(SQ_SEL_Z),
- DST_SEL_W(SQ_SEL_W),
- LOD_BIAS(0),
- COORD_TYPE_X(TEX_NORMALIZED),
- COORD_TYPE_Y(TEX_NORMALIZED),
- COORD_TYPE_Z(TEX_NORMALIZED),
- COORD_TYPE_W(TEX_NORMALIZED));
- shader[i++] = TEX_DWORD2(OFFSET_X(0),
- OFFSET_Y(0),
- OFFSET_Z(0),
SAMPLER_ID(1),
SRC_SEL_X(SQ_SEL_X),
SRC_SEL_Y(SQ_SEL_Y),
@@ -3095,36 +3192,109 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
SRC_SEL_W(SQ_SEL_1));
shader[i++] = TEX_DWORD_PAD;
- /* 32/33 - src - non-mask */
- shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
- INST_MOD(0),
- FETCH_WHOLE_QUAD(0),
- RESOURCE_ID(0),
- SRC_GPR(0),
- SRC_REL(ABSOLUTE),
- ALT_CONST(0),
- RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
- SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
- shader[i++] = TEX_DWORD1(DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_SEL_X(SQ_SEL_X),
- DST_SEL_Y(SQ_SEL_Y),
- DST_SEL_Z(SQ_SEL_Z),
- DST_SEL_W(SQ_SEL_W),
- LOD_BIAS(0),
- COORD_TYPE_X(TEX_NORMALIZED),
- COORD_TYPE_Y(TEX_NORMALIZED),
- COORD_TYPE_Z(TEX_NORMALIZED),
- COORD_TYPE_W(TEX_NORMALIZED));
- shader[i++] = TEX_DWORD2(OFFSET_X(0),
- OFFSET_Y(0),
- OFFSET_Z(0),
- SAMPLER_ID(0),
- SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_0),
- SRC_SEL_W(SQ_SEL_1));
- shader[i++] = TEX_DWORD_PAD;
+ /* 36 */
+ /* MOV GPR1.x, KC5.x */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+
+ /* 37 */
+ /* MOV GPR1.y, KC5.y */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+
+ /* 38 */
+ /* MOV GPR1.z, KC5.z */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1));
+
+ /* 39 */
+ /* MOV GPR1.w, KC5.w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1));
return i;
}