summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTan Hu <tan.hu@zte.com.cn>2016-05-27 17:05:14 +0800
committerMichel Dänzer <michel@daenzer.net>2016-06-01 18:17:16 +0900
commit9b9ad669c748f53247e53fa3f3b03a77da5e5cb3 (patch)
treeb4068b0f3177f19b561f0695572f5e0bebcee63a
parentaa07b365d7b0610411e118f105e49daff5f5a5cf (diff)
EXA/6xx/7xx: fast solid pixmap support
Solid pixmaps are currently implemented with scratch pixmaps, which is slow. This replaces the hack with a proper implementation. The Composite shader can now either sample a src/mask or use a constant value. r6xx still be used on some machine, Ported from commit 94d0d14914a025525a0766669b556eaa6681def7. Signed-off-by: Tan Hu <tan.hu@zte.com.cn> Reviewed-by: Grigori Goronzy <greg@chown.ath.cx>
-rw-r--r--src/r600_exa.c257
-rw-r--r--src/r600_shader.c418
2 files changed, 526 insertions, 149 deletions
diff --git a/src/r600_exa.c b/src/r600_exa.c
index 8d11ce71..10df4eca 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -1165,6 +1165,134 @@ static Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskP
}
+static void R600SetSolidConsts(ScrnInfoPtr pScrn, float *buf, int format, uint32_t fg, int unit)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ float pix_r = 0, pix_g = 0, pix_b = 0, pix_a = 0;
+
+ uint32_t w = (fg >> 24) & 0xff;
+ uint32_t z = (fg >> 16) & 0xff;
+ uint32_t y = (fg >> 8) & 0xff;
+ uint32_t x = (fg >> 0) & 0xff;
+ float xf = (float)x / 255; /* R */
+ float yf = (float)y / 255; /* G */
+ float zf = (float)z / 255; /* B */
+ float wf = (float)w / 255; /* A */
+
+ /* component swizzles */
+ switch (format) {
+ case PICT_a1r5g5b5:
+ case PICT_a8r8g8b8:
+ pix_r = zf; /* R */
+ pix_g = yf; /* G */
+ pix_b = xf; /* B */
+ pix_a = wf; /* A */
+ break;
+ case PICT_a8b8g8r8:
+ pix_r = xf; /* R */
+ pix_g = yf; /* G */
+ pix_b = zf; /* B */
+ pix_a = wf; /* A */
+ break;
+ case PICT_x8b8g8r8:
+ pix_r = xf; /* R */
+ pix_g = yf; /* G */
+ pix_b = zf; /* B */
+ pix_a = 1.0; /* A */
+ break;
+ case PICT_b8g8r8a8:
+ pix_r = yf; /* R */
+ pix_g = zf; /* G */
+ pix_b = wf; /* B */
+ pix_a = xf; /* A */
+ break;
+ case PICT_b8g8r8x8:
+ pix_r = yf; /* R */
+ pix_g = zf; /* G */
+ pix_b = wf; /* B */
+ pix_a = 1.0; /* A */
+ break;
+ case PICT_x1r5g5b5:
+ case PICT_x8r8g8b8:
+ case PICT_r5g6b5:
+ pix_r = zf; /* R */
+ pix_g = yf; /* G */
+ pix_b = xf; /* B */
+ pix_a = 1.0; /* A */
+ break;
+ case PICT_a8:
+ pix_r = 0.0; /* R */
+ pix_g = 0.0; /* G */
+ pix_b = 0.0; /* B */
+ pix_a = xf; /* A */
+ break;
+ default:
+ ErrorF("Bad format 0x%x\n", format);
+ }
+
+ if (unit == 0) {
+ if (!accel_state->msk_pic) {
+ if (PICT_FORMAT_RGB(format) == 0) {
+ pix_r = 0.0;
+ pix_g = 0.0;
+ pix_b = 0.0;
+ }
+
+ if (PICT_FORMAT_A(format) == 0)
+ pix_a = 1.0;
+ } else {
+ if (accel_state->component_alpha) {
+ if (accel_state->src_alpha) {
+ if (PICT_FORMAT_A(format) == 0) {
+ pix_r = 1.0;
+ pix_g = 1.0;
+ pix_b = 1.0;
+ pix_a = 1.0;
+ } else {
+ pix_r = pix_a;
+ pix_g = pix_a;
+ pix_b = pix_a;
+ }
+ } else {
+ if (PICT_FORMAT_A(format) == 0)
+ pix_a = 1.0;
+ }
+ } else {
+ if (PICT_FORMAT_RGB(format) == 0) {
+ pix_r = 0;
+ pix_g = 0;
+ pix_b = 0;
+ }
+
+ if (PICT_FORMAT_A(format) == 0)
+ pix_a = 1.0;
+ }
+ }
+ } else {
+ if (accel_state->component_alpha) {
+ if (PICT_FORMAT_A(format) == 0)
+ pix_a = 1.0;
+ } else {
+ if (PICT_FORMAT_A(format) == 0) {
+ pix_r = 1.0;
+ pix_g = 1.0;
+ pix_b = 1.0;
+ pix_a = 1.0;
+ } else {
+ pix_r = pix_a;
+ pix_g = pix_a;
+ pix_b = pix_a;
+ }
+ }
+ }
+
+ buf[0] = pix_r;
+ buf[1] = pix_g;
+ buf[2] = pix_b;
+ buf[3] = pix_a;
+}
+
static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
PicturePtr pMaskPicture, PicturePtr pDstPicture,
PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
@@ -1177,31 +1305,27 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
cb_config_t cb_conf;
shader_config_t vs_conf, ps_conf;
struct r600_accel_object src_obj, mask_obj, dst_obj;
+ uint32_t ps_bool_consts = 0;
+ float ps_alu_consts[8];
if (pDst->drawable.bitsPerPixel < 8 || (pSrc && pSrc->drawable.bitsPerPixel < 8))
return FALSE;
- if (!pSrc) {
- pSrc = RADEONSolidPixmap(pScreen, pSrcPicture->pSourcePict->solidFill.color);
- if (!pSrc)
- RADEON_FALLBACK(("Failed to create solid scratch pixmap\n"));
+ if (pSrc) {
+ src_obj.bo = radeon_get_pixmap_bo(pSrc);
+ src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
+ src_obj.surface = radeon_get_pixmap_surface(pSrc);
+ src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
+ src_obj.width = pSrc->drawable.width;
+ src_obj.height = pSrc->drawable.height;
+ src_obj.bpp = pSrc->drawable.bitsPerPixel;
+ src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
}
dst_obj.bo = radeon_get_pixmap_bo(pDst);
- src_obj.bo = radeon_get_pixmap_bo(pSrc);
dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
- src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
dst_obj.surface = radeon_get_pixmap_surface(pDst);
- src_obj.surface = radeon_get_pixmap_surface(pSrc);
-
- src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
-
- src_obj.width = pSrc->drawable.width;
- src_obj.height = pSrc->drawable.height;
- src_obj.bpp = pSrc->drawable.bitsPerPixel;
- src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
-
dst_obj.width = pDst->drawable.width;
dst_obj.height = pDst->drawable.height;
dst_obj.bpp = pDst->drawable.bitsPerPixel;
@@ -1211,34 +1335,17 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
if (pMaskPicture) {
- if (!pMask) {
- pMask = RADEONSolidPixmap(pScreen, pMaskPicture->pSourcePict->solidFill.color);
- if (!pMask) {
- if (!pSrcPicture->pDrawable)
- pScreen->DestroyPixmap(pSrc);
- RADEON_FALLBACK(("Failed to create solid scratch pixmap\n"));
- }
+ if (pMask) {
+ mask_obj.bo = radeon_get_pixmap_bo(pMask);
+ mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask);
+ mask_obj.surface = radeon_get_pixmap_surface(pMask);
+ mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8);
+ mask_obj.width = pMask->drawable.width;
+ mask_obj.height = pMask->drawable.height;
+ mask_obj.bpp = pMask->drawable.bitsPerPixel;
+ mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
}
- mask_obj.bo = radeon_get_pixmap_bo(pMask);
- mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask);
- mask_obj.surface = radeon_get_pixmap_surface(pMask);
-
- mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8);
-
- mask_obj.width = pMask->drawable.width;
- mask_obj.height = pMask->drawable.height;
- mask_obj.bpp = pMask->drawable.bitsPerPixel;
- mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
-
- if (!R600SetAccelState(pScrn,
- &src_obj,
- &mask_obj,
- &dst_obj,
- accel_state->comp_vs_offset, accel_state->comp_ps_offset,
- 3, 0xffffffff))
- return FALSE;
-
accel_state->msk_pic = pMaskPicture;
if (pMaskPicture->componentAlpha) {
accel_state->component_alpha = TRUE;
@@ -1251,19 +1358,19 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
accel_state->src_alpha = FALSE;
}
} else {
- if (!R600SetAccelState(pScrn,
- &src_obj,
- NULL,
- &dst_obj,
- accel_state->comp_vs_offset, accel_state->comp_ps_offset,
- 3, 0xffffffff))
- return FALSE;
-
accel_state->msk_pic = NULL;
accel_state->component_alpha = FALSE;
accel_state->src_alpha = FALSE;
}
+ if (!R600SetAccelState(pScrn,
+ pSrc ? &src_obj : NULL,
+ (pMaskPicture && pMask) ? &mask_obj : NULL,
+ &dst_obj,
+ accel_state->comp_vs_offset, accel_state->comp_ps_offset,
+ 3, 0xffffffff))
+ return FALSE;
+
if (!R600GetDestFormat(pDstPicture, &dst_format))
return FALSE;
@@ -1284,10 +1391,13 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
r600_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
r600_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
- if (!R600TextureSetup(pSrcPicture, pSrc, 0)) {
- R600IBDiscard(pScrn);
- return FALSE;
- }
+ if (pSrc) {
+ if (!R600TextureSetup(pSrcPicture, pSrc, 0)) {
+ R600IBDiscard(pScrn);
+ return FALSE;
+ }
+ } else
+ accel_state->is_transform[0] = FALSE;
if (pMask) {
if (!R600TextureSetup(pMaskPicture, pMask, 1)) {
@@ -1297,12 +1407,16 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
} else
accel_state->is_transform[1] = FALSE;
+ if (pSrc)
+ ps_bool_consts |= (1 << 0);
+ if (pMask)
+ ps_bool_consts |= (1 << 1);
+ r600_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, ps_bool_consts);
+
if (pMask) {
r600_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (1 << 0));
- r600_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (1 << 0));
} else {
r600_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (0 << 0));
- r600_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (0 << 0));
}
/* Shader */
@@ -1315,7 +1429,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
ps_conf.shader_addr = accel_state->ps_mc_addr;
ps_conf.shader_size = accel_state->ps_size;
- ps_conf.num_gprs = 3;
+ ps_conf.num_gprs = 2;
ps_conf.stack_size = 1;
ps_conf.uncached_first_inst = 1;
ps_conf.clamp_consts = 0;
@@ -1381,6 +1495,27 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
else
r600_set_spi(pScrn, (1 - 1), 1);
+ if (!pSrc) {
+ /* solid src color */
+ R600SetSolidConsts(pScrn, &ps_alu_consts[0], pSrcPicture->format,
+ pSrcPicture->pSourcePict->solidFill.color, 0);
+ }
+
+ if (!pMaskPicture) {
+ /* use identity constant if there is no mask */
+ ps_alu_consts[4] = 1.0;
+ ps_alu_consts[5] = 1.0;
+ ps_alu_consts[6] = 1.0;
+ ps_alu_consts[7] = 1.0;
+ } else if (!pMask) {
+ /* solid mask color */
+ R600SetSolidConsts(pScrn, &ps_alu_consts[4], pMaskPicture->format,
+ pMaskPicture->pSourcePict->solidFill.color, 1);
+ }
+
+ r600_set_alu_consts(pScrn, SQ_ALU_CONSTANT_ps,
+ sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
+
if (accel_state->vsync)
RADEONVlineHelperClear(pScrn);
@@ -1405,7 +1540,7 @@ static void R600FinishComposite(ScrnInfoPtr pScrn, PixmapPtr pDst,
accel_state->vline_y1,
accel_state->vline_y2);
- vtx_size = accel_state->msk_pic ? 24 : 16;
+ vtx_size = accel_state->msk_pix ? 24 : 16;
r600_finish_op(pScrn, vtx_size);
}
@@ -1418,12 +1553,6 @@ static void R600DoneComposite(PixmapPtr pDst)
struct radeon_accel_state *accel_state = info->accel_state;
R600FinishComposite(pScrn, pDst, accel_state);
-
- if (!accel_state->src_pic->pDrawable)
- pScreen->DestroyPixmap(accel_state->src_pix);
-
- if (accel_state->msk_pic && !accel_state->msk_pic->pDrawable)
- pScreen->DestroyPixmap(accel_state->msk_pix);
}
static void R600Composite(PixmapPtr pDst,
@@ -1455,7 +1584,7 @@ static void R600Composite(PixmapPtr pDst,
if (accel_state->vsync)
RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
- if (accel_state->msk_pic) {
+ if (accel_state->msk_pix) {
vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24);
diff --git a/src/r600_shader.c b/src/r600_shader.c
index 4cb2fc89..26a6ab64 100644
--- a/src/r600_shader.c
+++ b/src/r600_shader.c
@@ -2318,9 +2318,10 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
int i = 0;
/* 0 */
- shader[i++] = CF_DWORD0(ADDR(3));
+ /* call fetch-mask if boolean1 == true */
+ shader[i++] = CF_DWORD0(ADDR(10));
shader[i++] = CF_DWORD1(POP_COUNT(0),
- CF_CONST(0),
+ CF_CONST(1),
COND(SQ_CF_COND_BOOL),
I_COUNT(0),
CALL_COUNT(0),
@@ -2330,9 +2331,10 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
WHOLE_QUAD_MODE(0),
BARRIER(0));
/* 1 */
- shader[i++] = CF_DWORD0(ADDR(7));
+ /* call read-constant-mask if boolean1 == false */
+ shader[i++] = CF_DWORD0(ADDR(12));
shader[i++] = CF_DWORD1(POP_COUNT(0),
- CF_CONST(0),
+ CF_CONST(1),
COND(SQ_CF_COND_NOT_BOOL),
I_COUNT(0),
CALL_COUNT(0),
@@ -2342,33 +2344,36 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
WHOLE_QUAD_MODE(0),
BARRIER(0));
/* 2 */
- shader[i++] = CF_DWORD0(ADDR(0));
+ /* call fetch-src if boolean0 == true */
+ shader[i++] = CF_DWORD0(ADDR(6));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
- COND(SQ_CF_COND_ACTIVE),
+ COND(SQ_CF_COND_BOOL),
I_COUNT(0),
CALL_COUNT(0),
- END_OF_PROGRAM(1),
+ END_OF_PROGRAM(0),
VALID_PIXEL_MODE(0),
- CF_INST(SQ_CF_INST_NOP),
+ CF_INST(SQ_CF_INST_CALL),
WHOLE_QUAD_MODE(0),
- BARRIER(1));
+ BARRIER(0));
- /* 3 - mask sub */
- shader[i++] = CF_DWORD0(ADDR(14));
+ /* 3 */
+ /* call read-constant-src if boolean0 == false */
+ shader[i++] = CF_DWORD0(ADDR(8));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
- COND(SQ_CF_COND_ACTIVE),
- I_COUNT(2),
+ COND(SQ_CF_COND_NOT_BOOL),
+ I_COUNT(0),
CALL_COUNT(0),
END_OF_PROGRAM(0),
VALID_PIXEL_MODE(0),
- CF_INST(SQ_CF_INST_TEX),
+ CF_INST(SQ_CF_INST_CALL),
WHOLE_QUAD_MODE(0),
- BARRIER(1));
+ BARRIER(0));
/* 4 */
- shader[i++] = CF_ALU_DWORD0(ADDR(10),
+ /* src IN mask (GPR0 := GPR1 .* GPR0) */
+ shader[i++] = CF_ALU_DWORD0(ADDR(14),
KCACHE_BANK0(0),
KCACHE_BANK1(0),
KCACHE_MODE0(SQ_CF_KCACHE_NOP));
@@ -2382,9 +2387,10 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
BARRIER(1));
/* 5 */
+ /* export pixel data */
shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
TYPE(SQ_EXPORT_PIXEL),
- RW_GPR(2),
+ RW_GPR(0),
RW_REL(ABSOLUTE),
INDEX_GPR(0),
ELEM_SIZE(1));
@@ -2394,55 +2400,57 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
SRC_SEL_W(SQ_SEL_W),
R6xx_ELEM_LOOP(0),
BURST_COUNT(1),
- END_OF_PROGRAM(0),
+ END_OF_PROGRAM(1),
VALID_PIXEL_MODE(0),
CF_INST(SQ_CF_INST_EXPORT_DONE),
WHOLE_QUAD_MODE(0),
BARRIER(1));
+ /* subroutine fetch src */
/* 6 */
- shader[i++] = CF_DWORD0(ADDR(0));
+ /* fetch src into GPR0*/
+ shader[i++] = CF_DWORD0(ADDR(26));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
- I_COUNT(0),
+ I_COUNT(1),
CALL_COUNT(0),
END_OF_PROGRAM(0),
VALID_PIXEL_MODE(0),
- CF_INST(SQ_CF_INST_RETURN),
+ CF_INST(SQ_CF_INST_TEX),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 7 non-mask sub */
- shader[i++] = CF_DWORD0(ADDR(18));
+ /* 7 */
+ /* return */
+ shader[i++] = CF_DWORD0(ADDR(0));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
- I_COUNT(1),
+ I_COUNT(0),
CALL_COUNT(0),
END_OF_PROGRAM(0),
VALID_PIXEL_MODE(0),
- CF_INST(SQ_CF_INST_TEX),
+ CF_INST(SQ_CF_INST_RETURN),
WHOLE_QUAD_MODE(0),
BARRIER(1));
+
+ /* subroutine read-constant-src*/
/* 8 */
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
- TYPE(SQ_EXPORT_PIXEL),
- RW_GPR(0),
- RW_REL(ABSOLUTE),
- INDEX_GPR(0),
- ELEM_SIZE(1));
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_Z),
- SRC_SEL_W(SQ_SEL_W),
- R6xx_ELEM_LOOP(0),
- BURST_COUNT(1),
- END_OF_PROGRAM(0),
- VALID_PIXEL_MODE(0),
- CF_INST(SQ_CF_INST_EXPORT_DONE),
- WHOLE_QUAD_MODE(0),
- BARRIER(1));
+ /* read constants into GPR0 */
+ shader[i++] = CF_ALU_DWORD0(ADDR(18),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(4),
+ USES_WATERFALL(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
/* 9 */
+ /* return */
shader[i++] = CF_DWORD0(ADDR(0));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
@@ -2455,8 +2463,67 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 10 - alu 0 */
- /* MUL gpr[2].x gpr[1].x gpr[0].x */
+ /* subroutine fetch mask */
+ /* 10 */
+ /* fetch mask into GPR1*/
+ shader[i++] = CF_DWORD0(ADDR(28));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(1),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_TEX),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 11 */
+ /* return */
+ shader[i++] = CF_DWORD0(ADDR(0));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* subroutine read-constant-mask*/
+ /* 12 */
+ /* read constants into GPR1 */
+ shader[i++] = CF_ALU_DWORD0(ADDR(22),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(4),
+ USES_WATERFALL(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 13 */
+ /* return */
+ shader[i++] = CF_DWORD0(ADDR(0));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* ALU clauses */
+
+ /* 14 - alu 0 */
+ /* MUL gpr[0].x gpr[1].x gpr[0].x */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
@@ -2478,12 +2545,12 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
OMOD(SQ_ALU_OMOD_OFF),
ALU_INST(SQ_OP2_INST_MUL),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(1));
- /* 11 - alu 1 */
- /* MUL gpr[2].y gpr[1].y gpr[0].y */
+ /* 15 - alu 1 */
+ /* MUL gpr[0].y gpr[1].y gpr[0].y */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
@@ -2505,12 +2572,12 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
OMOD(SQ_ALU_OMOD_OFF),
ALU_INST(SQ_OP2_INST_MUL),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(1));
- /* 12 - alu 2 */
- /* MUL gpr[2].z gpr[1].z gpr[0].z */
+ /* 16 - alu 2 */
+ /* MUL gpr[0].z gpr[1].z gpr[0].z */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
@@ -2532,12 +2599,12 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
OMOD(SQ_ALU_OMOD_OFF),
ALU_INST(SQ_OP2_INST_MUL),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(1));
- /* 13 - alu 3 */
- /* MUL gpr[2].w gpr[1].w gpr[0].w */
+ /* 17 - alu 3 */
+ /* MUL gpr[0].w gpr[1].w gpr[0].w */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_W),
@@ -2559,12 +2626,222 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
OMOD(SQ_ALU_OMOD_OFF),
ALU_INST(SQ_OP2_INST_MUL),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1));
+
+ /* 18 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+ /* 19 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+ /* 20 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1));
+ /* 21 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
CLAMP(1));
- /* 14/15 - src - mask */
+ /* 22 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+ /* 23 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+ /* 24 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1));
+ /* 25 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1));
+
+ /* 26/27 - src */
shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
BC_FRAC_MODE(0),
FETCH_WHOLE_QUAD(0),
@@ -2592,7 +2869,7 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
SRC_SEL_Z(SQ_SEL_0),
SRC_SEL_W(SQ_SEL_1));
shader[i++] = TEX_DWORD_PAD;
- /* 16/17 - mask */
+ /* 28/29 - mask */
shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
BC_FRAC_MODE(0),
FETCH_WHOLE_QUAD(0),
@@ -2621,34 +2898,5 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
SRC_SEL_W(SQ_SEL_1));
shader[i++] = TEX_DWORD_PAD;
- /* 18/19 - src - non-mask */
- shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
- BC_FRAC_MODE(0),
- FETCH_WHOLE_QUAD(0),
- RESOURCE_ID(0),
- SRC_GPR(0),
- SRC_REL(ABSOLUTE),
- R7xx_ALT_CONST(0));
- shader[i++] = TEX_DWORD1(DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_SEL_X(SQ_SEL_X),
- DST_SEL_Y(SQ_SEL_Y),
- DST_SEL_Z(SQ_SEL_Z),
- DST_SEL_W(SQ_SEL_W),
- LOD_BIAS(0),
- COORD_TYPE_X(TEX_NORMALIZED),
- COORD_TYPE_Y(TEX_NORMALIZED),
- COORD_TYPE_Z(TEX_NORMALIZED),
- COORD_TYPE_W(TEX_NORMALIZED));
- shader[i++] = TEX_DWORD2(OFFSET_X(0),
- OFFSET_Y(0),
- OFFSET_Z(0),
- SAMPLER_ID(0),
- SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_0),
- SRC_SEL_W(SQ_SEL_1));
- shader[i++] = TEX_DWORD_PAD;
-
return i;
}