summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVadim Girlin <vadimgirlin@gmail.com>2013-08-02 06:38:23 +0400
committerVadim Girlin <vadimgirlin@gmail.com>2013-08-02 06:38:23 +0400
commit545b6f52556981d02b20af0fa70699d59d033d8b (patch)
tree14ea6f0fd6ce56f8bb173df8780f6d802ded56f2
parent573bda2468d0e74677913501d85cdabc1a407fc8 (diff)
r600g: initial support for geometry shaders on evergreenr600-geom-shaders
So far supports GPUs with single shader engine only (e.g. juniper) Passes geometry-basic piglit test. Use MESA_GL_VERSION_OVERRIDE=3.2 MESA_GLSL_VERSION_OVERRIDE=150 to test it. Signed-off-by: Vadim Girlin <vadimgirlin@gmail.com>
-rw-r--r--src/gallium/drivers/r600/eg_asm.c9
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c154
-rw-r--r--src/gallium/drivers/r600/r600_asm.c2
-rw-r--r--src/gallium/drivers/r600/r600_blit.c1
-rw-r--r--src/gallium/drivers/r600/r600_hw_context.c6
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c9
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h28
-rw-r--r--src/gallium/drivers/r600/r600_shader.c713
-rw-r--r--src/gallium/drivers/r600/r600_shader.h12
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c160
-rw-r--r--src/gallium/drivers/r600/sb/sb_bc.h2
-rw-r--r--src/gallium/drivers/r600/sb/sb_bc_dump.cpp2
-rw-r--r--src/gallium/drivers/r600/sb/sb_bc_finalize.cpp2
-rw-r--r--src/gallium/drivers/r600/sb/sb_bc_parser.cpp11
-rw-r--r--src/gallium/drivers/r600/sb/sb_shader.cpp1
15 files changed, 907 insertions, 205 deletions
diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index 42e78c0f06..a20171e65d 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -92,8 +92,8 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
if (bc->chip_class == EVERGREEN) /* no EOP on cayman */
bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
id++;
- } else if (cfop->flags & CF_STRM) {
- /* MEM_STREAM instructions */
+ } else if (cfop->flags & CF_MEM) {
+ /* MEM_STREAM, MEM_RING instructions */
bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) |
S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
@@ -107,12 +107,13 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
id++;
} else {
- /* branch, loop, call, return instructions */
+ /* other instructions */
bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(opcode)|
S_SQ_CF_WORD1_BARRIER(1) |
S_SQ_CF_WORD1_COND(cf->cond) |
- S_SQ_CF_WORD1_POP_COUNT(cf->pop_count);
+ S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) |
+ S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
}
}
return 0;
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 926ff35aca..99e774d9ad 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2494,6 +2494,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
struct r600_resource *rbuffer;
uint64_t va;
unsigned buffer_index = ffs(dirty_mask) - 1;
+ unsigned gs_ring_buffer = (buffer_index == R600_GS_RING_CONST_BUFFER);
cb = &state->cb[buffer_index];
rbuffer = (struct r600_resource*)cb->buffer;
@@ -2502,10 +2503,12 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
va = r600_resource_va(&rctx->screen->screen, &rbuffer->b.b);
va += cb->buffer_offset;
- r600_write_context_reg_flag(cs, reg_alu_constbuf_size + buffer_index * 4,
- ALIGN_DIVUP(cb->buffer_size >> 4, 16), pkt_flags);
- r600_write_context_reg_flag(cs, reg_alu_const_cache + buffer_index * 4, va >> 8,
- pkt_flags);
+ if (!gs_ring_buffer) {
+ r600_write_context_reg_flag(cs, reg_alu_constbuf_size + buffer_index * 4,
+ ALIGN_DIVUP(cb->buffer_size >> 4, 16), pkt_flags);
+ r600_write_context_reg_flag(cs, reg_alu_const_cache + buffer_index * 4, va >> 8,
+ pkt_flags);
+ }
r600_write_value(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, rbuffer, RADEON_USAGE_READ));
@@ -2515,10 +2518,12 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
r600_write_value(cs, va); /* RESOURCEi_WORD0 */
r600_write_value(cs, rbuffer->buf->size - cb->buffer_offset - 1); /* RESOURCEi_WORD1 */
r600_write_value(cs, /* RESOURCEi_WORD2 */
- S_030008_ENDIAN_SWAP(r600_endian_swap(32)) |
- S_030008_STRIDE(16) |
- S_030008_BASE_ADDRESS_HI(va >> 32UL));
+ S_030008_ENDIAN_SWAP(gs_ring_buffer ? ENDIAN_NONE : r600_endian_swap(32)) |
+ S_030008_STRIDE(gs_ring_buffer ? 4 : 16) |
+ S_030008_BASE_ADDRESS_HI(va >> 32UL) |
+ S_030008_DATA_FORMAT(FMT_32_32_32_32_FLOAT));
r600_write_value(cs, /* RESOURCEi_WORD3 */
+ S_03000C_UNCACHED(gs_ring_buffer ? 1 : 0) |
S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) |
S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) |
S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) |
@@ -2526,7 +2531,8 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
r600_write_value(cs, 0); /* RESOURCEi_WORD4 */
r600_write_value(cs, 0); /* RESOURCEi_WORD5 */
r600_write_value(cs, 0); /* RESOURCEi_WORD6 */
- r600_write_value(cs, 0xc0000000); /* RESOURCEi_WORD7 */
+ r600_write_value(cs, /* RESOURCEi_WORD7 */
+ S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER));
r600_write_value(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, rbuffer, RADEON_USAGE_READ));
@@ -2690,6 +2696,63 @@ static void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct
r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, shader->buffer, RADEON_USAGE_READ));
}
+static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a)
+{
+ struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
+ struct r600_shader_stages_state *state = (struct r600_shader_stages_state*)a;
+
+ uint32_t v = 0, v2 = 0;
+
+ if (state->geom_enable) {
+ v = S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) |
+ S_028B54_GS_EN(1) |
+ S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
+
+ v2 = S_028A40_MODE(V_028A40_GS_SCENARIO_G) |
+ S_028A40_CUT_MODE(V_028A40_GS_CUT_128);
+ }
+
+ r600_write_context_reg(cs, R_028B54_VGT_SHADER_STAGES_EN, v);
+ r600_write_context_reg(cs, R_028A40_VGT_GS_MODE, v2);
+}
+
+static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
+{
+ struct pipe_screen *screen = rctx->context.screen;
+ struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
+ struct r600_gs_rings_state *state = (struct r600_gs_rings_state*)a;
+ struct r600_resource *rbuffer;
+
+ r600_write_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
+ r600_write_value(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ r600_write_value(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH));
+
+ if (state->enable) {
+ rbuffer =(struct r600_resource*)state->esgs_ring.buffer;
+ r600_write_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE,
+ (r600_resource_va(screen, &rbuffer->b.b)) >> 8);
+ r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
+ r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, rbuffer, RADEON_USAGE_READWRITE));
+ r600_write_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE,
+ state->esgs_ring.buffer_size >> 8);
+
+ rbuffer =(struct r600_resource*)state->gsvs_ring.buffer;
+ r600_write_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE,
+ (r600_resource_va(screen, &rbuffer->b.b)) >> 8);
+ r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
+ r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, rbuffer, RADEON_USAGE_READWRITE));
+ r600_write_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE,
+ state->gsvs_ring.buffer_size >> 8);
+ } else {
+ r600_write_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, 0);
+ r600_write_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, 0);
+ }
+
+ r600_write_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
+ r600_write_value(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ r600_write_value(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH));
+}
+
void cayman_init_common_regs(struct r600_command_buffer *cb,
enum chip_class ctx_chip_class,
enum radeon_family ctx_family,
@@ -3485,6 +3548,77 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
shader->flatshade = rctx->rasterizer->flatshade;
}
+void evergreen_update_es_state(struct pipe_context *ctx, struct r600_pipe_shader *shader)
+{
+ struct r600_command_buffer *cb = &shader->command_buffer;
+ struct r600_shader *rshader = &shader->shader;
+
+ r600_init_command_buffer(cb, 32);
+
+ r600_store_context_reg(cb, R_028890_SQ_PGM_RESOURCES_ES,
+ S_028890_NUM_GPRS(rshader->bc.ngpr) |
+ S_028890_STACK_SIZE(rshader->bc.nstack));
+ r600_store_context_reg(cb, R_02888C_SQ_PGM_START_ES,
+ r600_resource_va(ctx->screen, (void *)shader->bo) >> 8);
+ /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */
+}
+
+void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader)
+{
+ struct r600_command_buffer *cb = &shader->command_buffer;
+ struct r600_shader *rshader = &shader->shader;
+ struct r600_shader *cp_shader = &shader->gs_copy_shader->shader;
+ unsigned gsvs_itemsize =
+ (cp_shader->ring_item_size * rshader->gs_max_out_vertices) >> 2;
+
+ r600_init_command_buffer(cb, 64);
+
+ /* VGT_GS_OUT_PRIM_TYPE is written by r6000_draw_vbo */
+ /* VGT_GS_MODE is written by evergreen_emit_shader_stages */
+
+ r600_store_context_reg(cb, R_028AB8_VGT_VTX_CNT_EN, 1);
+
+ r600_store_context_reg(cb, R_028B38_VGT_GS_MAX_VERT_OUT,
+ S_028B38_MAX_VERT_OUT(rshader->gs_max_out_vertices));
+
+
+/* XXX kernel checker fails
+ r600_store_context_reg(cb, R_028B90_VGT_GS_INSTANCE_CNT,
+ S_028B90_CNT(0) |
+ S_028B90_ENABLE(0));
+*/
+ r600_store_context_reg_seq(cb, R_02891C_SQ_GS_VERT_ITEMSIZE, 4);
+ r600_store_value(cb, cp_shader->ring_item_size >> 2);
+ r600_store_value(cb, 0);
+ r600_store_value(cb, 0);
+ r600_store_value(cb, 0);
+
+ r600_store_context_reg(cb, R_028900_SQ_ESGS_RING_ITEMSIZE,
+ (rshader->ring_item_size) >> 2);
+
+ r600_store_context_reg(cb, R_028904_SQ_GSVS_RING_ITEMSIZE,
+ gsvs_itemsize);
+
+ r600_store_context_reg_seq(cb, R_02892C_SQ_GSVS_RING_OFFSET_1, 3);
+ r600_store_value(cb, gsvs_itemsize);
+ r600_store_value(cb, gsvs_itemsize);
+ r600_store_value(cb, gsvs_itemsize);
+
+ /* FIXME calculate these values somehow ??? */
+ r600_store_context_reg_seq(cb, R_028A54_GS_PER_ES, 3);
+ r600_store_value(cb, 0x80); /* GS_PER_ES */
+ r600_store_value(cb, 0x100); /* ES_PER_GS */
+ r600_store_value(cb, 0x2); /* GS_PER_VS */
+
+ r600_store_context_reg(cb, R_028878_SQ_PGM_RESOURCES_GS,
+ S_028878_NUM_GPRS(rshader->bc.ngpr) |
+ S_028878_STACK_SIZE(rshader->bc.nstack));
+ r600_store_context_reg(cb, R_028874_SQ_PGM_START_GS,
+ r600_resource_va(ctx->screen, (void *)shader->bo) >> 8);
+ /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */
+}
+
+
void evergreen_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader)
{
struct r600_command_buffer *cb = &shader->command_buffer;
@@ -3864,6 +3998,10 @@ void evergreen_init_state_functions(struct r600_context *rctx)
r600_init_atom(rctx, &rctx->streamout.begin_atom, id++, r600_emit_streamout_begin, 0);
r600_init_atom(rctx, &rctx->vertex_shader.atom, id++, r600_emit_shader, 23);
r600_init_atom(rctx, &rctx->pixel_shader.atom, id++, r600_emit_shader, 0);
+ r600_init_atom(rctx, &rctx->geometry_shader.atom, id++, r600_emit_shader, 0);
+ r600_init_atom(rctx, &rctx->export_shader.atom, id++, r600_emit_shader, 0);
+ r600_init_atom(rctx, &rctx->shader_stages.atom, id++, evergreen_emit_shader_stages, 6);
+ r600_init_atom(rctx, &rctx->gs_rings.atom, id++, evergreen_emit_gs_rings, 26);
rctx->context.create_blend_state = evergreen_create_blend_state;
rctx->context.create_depth_stencil_alpha_state = evergreen_create_dsa_state;
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 5255b8839e..76f2c10a49 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -1936,7 +1936,7 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
if (cf->end_of_program)
fprintf(stderr, "EOP ");
fprintf(stderr, "\n");
- } else if (r600_isa_cf(cf->op)->flags & CF_STRM) {
+ } else if (r600_isa_cf(cf->op)->flags & CF_MEM) {
int o = 0;
const char *exp_type[] = {"WRITE", "WRITE_IND", "WRITE_ACK",
"WRITE_IND_ACK"};
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 2230e7bbde..0ba519017a 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -58,6 +58,7 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op
util_blitter_save_vertex_buffer_slot(rctx->blitter, rctx->vertex_buffer_state.vb);
util_blitter_save_vertex_elements(rctx->blitter, rctx->vertex_fetch_shader.cso);
util_blitter_save_vertex_shader(rctx->blitter, rctx->vs_shader);
+ util_blitter_save_geometry_shader(rctx->blitter, rctx->gs_shader);
util_blitter_save_so_targets(rctx->blitter, rctx->streamout.num_targets,
(struct pipe_stream_output_target**)rctx->streamout.targets);
util_blitter_save_rasterizer(rctx->blitter, rctx->rasterizer_state.cso);
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 97b0f9cb0e..23b7d00a6e 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -379,6 +379,12 @@ void r600_begin_new_cs(struct r600_context *ctx)
ctx->config_state.atom.dirty = true;
ctx->stencil_ref.atom.dirty = true;
ctx->vertex_fetch_shader.atom.dirty = true;
+ ctx->export_shader.atom.dirty = true;
+ if (ctx->gs_shader) {
+ ctx->geometry_shader.atom.dirty = true;
+ ctx->shader_stages.atom.dirty = true;
+ ctx->gs_rings.atom.dirty = true;
+ }
ctx->vertex_shader.atom.dirty = true;
ctx->viewport.atom.dirty = true;
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 74dd9606a3..c32bbba24e 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -715,15 +715,18 @@ static float r600_get_paramf(struct pipe_screen* pscreen,
static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enum pipe_shader_cap param)
{
+ struct r600_screen *rscreen = (struct r600_screen *)pscreen;
+
switch(shader)
{
case PIPE_SHADER_FRAGMENT:
case PIPE_SHADER_VERTEX:
- case PIPE_SHADER_COMPUTE:
+ case PIPE_SHADER_COMPUTE:
break;
case PIPE_SHADER_GEOMETRY:
- /* XXX: support and enable geometry programs */
- return 0;
+ if (rscreen->chip_class < EVERGREEN)
+ return 0;
+ break;
default:
/* XXX: support tessellation on Evergreen */
return 0;
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 349a6cb6cf..542b1ccd27 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -35,21 +35,22 @@
#include "r600_public.h"
#include "r600_resource.h"
-#define R600_NUM_ATOMS 41
+#define R600_NUM_ATOMS 42
/* the number of CS dwords for flushing and drawing */
#define R600_MAX_FLUSH_CS_DWORDS 16
#define R600_MAX_DRAW_CS_DWORDS 34
#define R600_TRACE_CS_DWORDS 7
-#define R600_MAX_USER_CONST_BUFFERS 13
-#define R600_MAX_DRIVER_CONST_BUFFERS 3
+#define R600_MAX_USER_CONST_BUFFERS 12
+#define R600_MAX_DRIVER_CONST_BUFFERS 4
#define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS)
/* start driver buffers after user buffers */
#define R600_UCP_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
#define R600_TXQ_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
+#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 3)
#define R600_MAX_CONST_BUFFER_SIZE 4096
@@ -212,6 +213,18 @@ struct r600_viewport_state {
struct pipe_viewport_state state;
};
+struct r600_shader_stages_state {
+ struct r600_atom atom;
+ unsigned geom_enable;
+};
+
+struct r600_gs_rings_state {
+ struct r600_atom atom;
+ unsigned enable;
+ struct pipe_constant_buffer esgs_ring;
+ struct pipe_constant_buffer gsvs_ring;
+};
+
struct r600_pipe_fences {
struct r600_resource *bo;
unsigned *data;
@@ -458,7 +471,7 @@ struct r600_fetch_shader {
struct r600_shader_state {
struct r600_atom atom;
- struct r600_pipe_shader_selector *shader;
+ struct r600_pipe_shader *shader;
};
struct r600_query_buffer {
@@ -595,7 +608,11 @@ struct r600_context {
struct r600_cso_state vertex_fetch_shader;
struct r600_shader_state vertex_shader;
struct r600_shader_state pixel_shader;
+ struct r600_shader_state geometry_shader;
+ struct r600_shader_state export_shader;
struct r600_cs_shader_state cs_shader_state;
+ struct r600_shader_stages_state shader_stages;
+ struct r600_gs_rings_state gs_rings;
struct r600_constbuf_state constbuf_state[PIPE_SHADER_TYPES];
struct r600_textures_info samplers[PIPE_SHADER_TYPES];
/** Vertex buffers for fetch shaders */
@@ -609,6 +626,7 @@ struct r600_context {
unsigned compute_cb_target_mask;
struct r600_pipe_shader_selector *ps_shader;
struct r600_pipe_shader_selector *vs_shader;
+ struct r600_pipe_shader_selector *gs_shader;
struct r600_rasterizer_state *rasterizer;
bool alpha_to_one;
bool force_blend_disable;
@@ -702,6 +720,8 @@ void cayman_init_common_regs(struct r600_command_buffer *cb,
void evergreen_init_state_functions(struct r600_context *rctx);
void evergreen_init_atom_start_cs(struct r600_context *rctx);
void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *shader);
+void evergreen_update_es_state(struct pipe_context *ctx, struct r600_pipe_shader *shader);
+void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader);
void evergreen_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader);
void *evergreen_create_db_flush_dsa(struct r600_context *rctx);
void *evergreen_create_resolve_blend(struct r600_context *rctx);
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 11b5bedfc5..8ea31bc42f 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -60,7 +60,7 @@ issued in the w slot as well.
The compiler must issue the source argument to slots z, y, and x
*/
-static int r600_shader_from_tgsi(struct r600_screen *rscreen,
+static int r600_shader_from_tgsi(struct r600_context *rctx,
struct r600_pipe_shader *pipeshader,
struct r600_shader_key key);
@@ -131,17 +131,43 @@ static void r600_dump_streamout(struct pipe_stream_output_info *so)
}
}
+static int store_shader(struct pipe_context *ctx,
+ struct r600_pipe_shader *shader)
+{
+ struct r600_context *rctx = (struct r600_context *)ctx;
+ uint32_t *ptr, i;
+
+ if (shader->bo == NULL) {
+ shader->bo = (struct r600_resource*)
+ pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, shader->shader.bc.ndw * 4);
+ if (shader->bo == NULL) {
+ return -ENOMEM;
+ }
+ ptr = r600_buffer_mmap_sync_with_rings(rctx, shader->bo, PIPE_TRANSFER_WRITE);
+ if (R600_BIG_ENDIAN) {
+ for (i = 0; i < shader->shader.bc.ndw; ++i) {
+ ptr[i] = util_bswap32(shader->shader.bc.bytecode[i]);
+ }
+ } else {
+ memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr));
+ }
+ rctx->ws->buffer_unmap(shader->bo->cs_buf);
+ }
+
+ return 0;
+}
+
int r600_pipe_shader_create(struct pipe_context *ctx,
struct r600_pipe_shader *shader,
struct r600_shader_key key)
{
struct r600_context *rctx = (struct r600_context *)ctx;
struct r600_pipe_shader_selector *sel = shader->selector;
- int r, i;
- uint32_t *ptr;
+ int r;
bool dump = r600_can_dump_shader(rctx->screen, tgsi_get_processor_type(sel->tokens));
unsigned use_sb = rctx->screen->debug_flags & DBG_SB;
unsigned sb_disasm = use_sb || (rctx->screen->debug_flags & DBG_SB_DISASM);
+ unsigned export_shader = key.vs_as_es;
shader->shader.bc.isa = rctx->isa;
@@ -153,7 +179,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
r600_dump_streamout(&sel->so);
}
}
- r = r600_shader_from_tgsi(rctx->screen, shader, key);
+ r = r600_shader_from_tgsi(rctx, shader, key);
if (r) {
R600_ERR("translation from TGSI failed !\n");
return r;
@@ -184,29 +210,39 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
}
}
- /* Store the shader in a buffer. */
- if (shader->bo == NULL) {
- shader->bo = (struct r600_resource*)
- pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, shader->shader.bc.ndw * 4);
- if (shader->bo == NULL) {
- return -ENOMEM;
- }
- ptr = r600_buffer_mmap_sync_with_rings(rctx, shader->bo, PIPE_TRANSFER_WRITE);
- if (R600_BIG_ENDIAN) {
- for (i = 0; i < shader->shader.bc.ndw; ++i) {
- ptr[i] = util_bswap32(shader->shader.bc.bytecode[i]);
- }
- } else {
- memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr));
+ if (shader->gs_copy_shader) {
+ if (dump) {
+ // dump copy shader
+ r = r600_sb_bytecode_process(rctx, &shader->gs_copy_shader->shader.bc,
+ &shader->gs_copy_shader->shader, dump, 0);
+ if (r)
+ return r;
}
- rctx->ws->buffer_unmap(shader->bo->cs_buf);
+
+ if ((r = store_shader(ctx, shader->gs_copy_shader)))
+ return r;
}
+ /* Store the shader in a buffer. */
+ if ((r = store_shader(ctx, shader)))
+ return r;
+
/* Build state. */
switch (shader->shader.processor_type) {
+ case TGSI_PROCESSOR_GEOMETRY:
+ if (rctx->chip_class >= EVERGREEN) {
+ evergreen_update_gs_state(ctx, shader);
+ evergreen_update_vs_state(ctx, shader->gs_copy_shader);
+ } else {
+ assert(!"not suported yet");
+ }
+ break;
case TGSI_PROCESSOR_VERTEX:
if (rctx->chip_class >= EVERGREEN) {
- evergreen_update_vs_state(ctx, shader);
+ if (export_shader)
+ evergreen_update_es_state(ctx, shader);
+ else
+ evergreen_update_vs_state(ctx, shader);
} else {
r600_update_vs_state(ctx, shader);
}
@@ -272,6 +308,9 @@ struct r600_shader_ctx {
unsigned cv_output;
int fragcoord_input;
int native_integers;
+ int next_ring_offset;
+ int gs_next_vertex;
+ struct r600_shader *gs_for_vs;
};
struct r600_shader_tgsi_instruction {
@@ -281,6 +320,7 @@ struct r600_shader_tgsi_instruction {
int (*process)(struct r600_shader_ctx *ctx);
};
+static int emit_gs_ring_writes(struct r600_shader_ctx *ctx);
static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason);
@@ -312,7 +352,13 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx)
#endif
for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
if (i->Src[j].Register.Dimension) {
- if (i->Src[j].Register.File != TGSI_FILE_CONSTANT) {
+ switch (i->Src[j].Register.File) {
+ case TGSI_FILE_CONSTANT:
+ break;
+ case TGSI_FILE_INPUT:
+ if (ctx->type == TGSI_PROCESSOR_GEOMETRY)
+ break;
+ default:
R600_ERR("unsupported src %d (dimension %d)\n", j,
i->Src[j].Register.Dimension);
return -EINVAL;
@@ -563,6 +609,10 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
if ((r = evergreen_interp_input(ctx, i)))
return r;
}
+ } else if (ctx->type == TGSI_PROCESSOR_GEOMETRY) {
+ /* FIXME probably skip inputs if they aren't passed in the ring */
+ ctx->shader->input[i].ring_offset = ctx->next_ring_offset;
+ ctx->next_ring_offset += 16;
}
for (j = 1; j < count; ++j) {
ctx->shader->input[i + j] = ctx->shader->input[i];
@@ -577,7 +627,8 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First;
ctx->shader->output[i].interpolate = d->Interp.Interpolate;
ctx->shader->output[i].write_mask = d->Declaration.UsageMask;
- if (ctx->type == TGSI_PROCESSOR_VERTEX) {
+ if (ctx->type == TGSI_PROCESSOR_VERTEX ||
+ ctx->type == TGSI_PROCESSOR_GEOMETRY) {
ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);
switch (d->Semantic.Name) {
case TGSI_SEMANTIC_CLIPDIST:
@@ -800,6 +851,59 @@ static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int cb_idx
return 0;
}
+static int fetch_gs_input(struct r600_shader_ctx *ctx, unsigned index, unsigned vtx_id, unsigned int dst_reg)
+{
+ struct r600_bytecode_vtx vtx;
+ int r;
+ int offset_reg = vtx_id / 3;
+ int offset_chan = vtx_id % 3;
+
+ /* offsets of per-vertex data in ESGS ring are passed to GS in R0.x, R0.y,
+ * R0.w, R1.x, R1.y, R1.z (it seems R0.z is used for PrimitiveID) */
+
+ if (offset_reg == 0 && offset_chan == 2)
+ offset_chan = 3;
+
+ memset(&vtx, 0, sizeof(vtx));
+ vtx.buffer_id = R600_GS_RING_CONST_BUFFER;
+ vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
+ vtx.src_gpr = offset_reg;
+ vtx.src_sel_x = offset_chan;
+ vtx.offset = index * 16; /*bytes*/
+ vtx.mega_fetch_count = 16;
+ vtx.dst_gpr = dst_reg;
+ vtx.dst_sel_x = 0; /* SEL_X */
+ vtx.dst_sel_y = 1; /* SEL_Y */
+ vtx.dst_sel_z = 2; /* SEL_Z */
+ vtx.dst_sel_w = 3; /* SEL_W */
+ vtx.use_const_fields = 1;
+
+ if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
+ return r;
+
+ return 0;
+}
+
+static int tgsi_split_gs_inputs(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ int i;
+
+ for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ struct tgsi_full_src_register *src = &inst->Src[i];
+
+ if (src->Register.File == TGSI_FILE_INPUT && src->Register.Dimension) {
+ int treg = r600_get_temp(ctx);
+ int index = src->Register.Index;
+ int vtx_id = src->Dimension.Index;
+
+ fetch_gs_input(ctx, index, vtx_id, treg);
+ ctx->src[i].sel = treg;
+ }
+ }
+ return 0;
+}
+
static int tgsi_split_constant(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -902,11 +1006,248 @@ static int process_twoside_color_inputs(struct r600_shader_ctx *ctx)
return 0;
}
+static int generate_gs_copy_shader(struct r600_context *rctx,
+ struct r600_pipe_shader *gs)
+{
+ struct r600_shader_ctx ctx = {};
+ struct r600_shader *gs_shader = &gs->shader;
+ struct r600_pipe_shader *cshader;
+ int ocnt = gs_shader->noutput;
+ struct r600_bytecode_alu alu;
+ struct r600_bytecode_vtx vtx;
+ struct r600_bytecode_output output;
+ struct r600_bytecode_cf *cf_jump, *cf_pop,
+ *last_exp_pos = NULL, *last_exp_param = NULL;
+ int i, next_pos = 60, next_param = 0;
+
+ cshader = calloc(1, sizeof(struct r600_pipe_shader));
+ if (!cshader)
+ return 0;
+
+ memcpy(cshader->shader.output, gs_shader->output, ocnt *
+ sizeof(struct r600_shader_io));
+
+ cshader->shader.noutput = ocnt;
+
+ ctx.shader = &cshader->shader;
+ ctx.bc = &ctx.shader->bc;
+ ctx.type = ctx.bc->type = TGSI_PROCESSOR_VERTEX;
+
+ r600_bytecode_init(ctx.bc, rctx->chip_class, rctx->family,
+ rctx->screen->has_compressed_msaa_texturing);
+
+ ctx.bc->isa = rctx->isa;
+
+ /* R0.x = R0.x & 0x3fffffff */
+ memset(&alu, 0, sizeof(alu));
+ alu.op = ALU_OP2_AND_INT;
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = 0x3fffffff;
+ alu.dst.write = 1;
+ r600_bytecode_add_alu(ctx.bc, &alu);
+
+ /* R0.y = R0.x >> 30 */
+ memset(&alu, 0, sizeof(alu));
+ alu.op = ALU_OP2_LSHR_INT;
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = 0x1e;
+ alu.dst.chan = 1;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r600_bytecode_add_alu(ctx.bc, &alu);
+
+ /* PRED_SETE_INT __, R0.y, 0 */
+ memset(&alu, 0, sizeof(alu));
+ alu.op = ALU_OP2_PRED_SETE_INT;
+ alu.src[0].chan = 1;
+ alu.src[1].sel = V_SQ_ALU_SRC_0;
+ alu.execute_mask = 1;
+ alu.update_pred = 1;
+ alu.last = 1;
+ r600_bytecode_add_alu_type(ctx.bc, &alu, CF_OP_ALU_PUSH_BEFORE);
+
+ r600_bytecode_add_cfinst(ctx.bc, CF_OP_JUMP);
+ cf_jump = ctx.bc->cf_last;
+
+ /* fetch vertex data from GSVS ring */
+ for (i = 0; i < ocnt; ++i) {
+ struct r600_shader_io *out = &ctx.shader->output[i];
+ out->gpr = i + 1;
+ out->ring_offset = i * 16;
+
+ memset(&vtx, 0, sizeof(vtx));
+ vtx.op = FETCH_OP_VFETCH;
+ vtx.buffer_id = R600_GS_RING_CONST_BUFFER;
+ vtx.fetch_type = 2;
+ vtx.offset = out->ring_offset;
+ vtx.dst_gpr = out->gpr;
+ vtx.dst_sel_x = 0;
+ vtx.dst_sel_y = 1;
+ vtx.dst_sel_z = 2;
+ vtx.dst_sel_w = 3;
+ vtx.use_const_fields = 1;
+
+ r600_bytecode_add_vtx(ctx.bc, &vtx);
+ }
-static int r600_shader_from_tgsi(struct r600_screen *rscreen,
+ /* XXX handle clipvertex, streamout? */
+
+ /* export vertex data */
+ /* XXX factor out common code with r600_shader_from_tgsi ? */
+ for (i = 0; i < ocnt; ++i) {
+ struct r600_shader_io *out = &ctx.shader->output[i];
+
+ if (out->name == TGSI_SEMANTIC_CLIPVERTEX)
+ continue;
+
+ memset(&output, 0, sizeof(output));
+ output.gpr = out->gpr;
+ output.elem_size = 3;
+ output.swizzle_x = 0;
+ output.swizzle_y = 1;
+ output.swizzle_z = 2;
+ output.swizzle_w = 3;
+ output.burst_count = 1;
+ output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+ output.op = CF_OP_EXPORT;
+ switch (out->name) {
+ case TGSI_SEMANTIC_POSITION:
+ output.array_base = next_pos++;
+ output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ break;
+
+ case TGSI_SEMANTIC_PSIZE:
+ output.array_base = next_pos++;
+ output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ break;
+ case TGSI_SEMANTIC_CLIPDIST:
+ /* spi_sid is 0 for clipdistance outputs that were generated
+ * for clipvertex - we don't need to pass them to PS */
+ if (out->spi_sid) {
+ /* duplicate it as PARAM to pass to the pixel shader */
+ output.array_base = next_param++;
+ r600_bytecode_add_output(ctx.bc, &output);
+ last_exp_param = ctx.bc->cf_last;
+ }
+ output.array_base = next_pos++;
+ output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ break;
+ case TGSI_SEMANTIC_FOG:
+ output.swizzle_y = 4; /* 0 */
+ output.swizzle_z = 4; /* 0 */
+ output.swizzle_w = 5; /* 1 */
+ break;
+ }
+ r600_bytecode_add_output(ctx.bc, &output);
+ if (output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM)
+ last_exp_param = ctx.bc->cf_last;
+ else
+ last_exp_pos = ctx.bc->cf_last;
+ }
+
+ if (!last_exp_pos) {
+ memset(&output, 0, sizeof(output));
+ output.gpr = 0;
+ output.elem_size = 3;
+ output.swizzle_x = 7;
+ output.swizzle_y = 7;
+ output.swizzle_z = 7;
+ output.swizzle_w = 7;
+ output.burst_count = 1;
+ output.type = 2;
+ output.op = CF_OP_EXPORT;
+ output.array_base = next_pos++;
+ output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ r600_bytecode_add_output(ctx.bc, &output);
+ last_exp_pos = ctx.bc->cf_last;
+ }
+
+ if (!last_exp_param) {
+ memset(&output, 0, sizeof(output));
+ output.gpr = 0;
+ output.elem_size = 3;
+ output.swizzle_x = 7;
+ output.swizzle_y = 7;
+ output.swizzle_z = 7;
+ output.swizzle_w = 7;
+ output.burst_count = 1;
+ output.type = 2;
+ output.op = CF_OP_EXPORT;
+ output.array_base = next_param++;
+ output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+ r600_bytecode_add_output(ctx.bc, &output);
+ last_exp_param = ctx.bc->cf_last;
+ }
+
+ last_exp_pos->op = CF_OP_EXPORT_DONE;
+ last_exp_param->op = CF_OP_EXPORT_DONE;
+
+ r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP);
+ cf_pop = ctx.bc->cf_last;
+
+ cf_jump->cf_addr = cf_pop->id + 2;
+ cf_jump->pop_count = 1;
+ cf_pop->cf_addr = cf_pop->id + 2;
+ cf_pop->pop_count = 1;
+
+ r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP);
+ ctx.bc->cf_last->end_of_program = 1;
+
+ gs->gs_copy_shader = cshader;
+
+ ctx.bc->nstack = 1;
+ cshader->shader.ring_item_size = ocnt * 16;
+
+ return r600_bytecode_build(ctx.bc);
+}
+
+static int emit_gs_ring_writes(struct r600_shader_ctx *ctx)
+{
+ struct r600_bytecode_output output;
+ int i, k, ring_offset;
+
+ for (i = 0; i < ctx->shader->noutput; i++) {
+ if (ctx->gs_for_vs) {
+ /* for ES we need to lookup corresponding ring offset expected by GS
+ * (map this output to GS input by name and sid) */
+ /* FIXME precompute offsets */
+ ring_offset = -1;
+ for(k = 0; k < ctx->gs_for_vs->ninput; ++k) {
+ struct r600_shader_io *in = &ctx->gs_for_vs->input[k];
+ struct r600_shader_io *out = &ctx->shader->output[i];
+ if (in->name == out->name && in->sid == out->sid)
+ ring_offset = in->ring_offset;
+ }
+ if (ring_offset == -1) {
+ R600_ERR("error mapping VS->GS outputs\n");
+ return -1;
+ }
+ } else
+ ring_offset = i * 16;
+
+ /* next_ring_offset after parsing input decls contains total size of
+ * single vertex data, gs_next_vertex - current vertex index */
+ ring_offset += ctx->next_ring_offset * ctx->gs_next_vertex;
+
+ memset(&output, 0, sizeof(struct r600_bytecode_output));
+ output.gpr = ctx->shader->output[i].gpr;
+ output.elem_size = 3;
+ output.comp_mask = 0xF;
+ output.burst_count = 1;
+ output.op = CF_OP_MEM_RING;
+ output.array_base = ring_offset >> 2; /* in dwords */
+ r600_bytecode_add_output(ctx->bc, &output);
+ }
+ ++ctx->gs_next_vertex;
+ return 0;
+}
+
+
+static int r600_shader_from_tgsi(struct r600_context *rctx,
struct r600_pipe_shader *pipeshader,
struct r600_shader_key key)
{
+ struct r600_screen *rscreen = rctx->screen;
struct r600_shader *shader = &pipeshader->shader;
struct tgsi_token *tokens = pipeshader->selector->tokens;
struct pipe_stream_output_info so = pipeshader->selector->so;
@@ -921,6 +1262,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
/* Declarations used by llvm code */
bool use_llvm = false;
bool indirect_gprs;
+ bool ring_outputs = false;
#ifdef R600_USE_LLVM
use_llvm = !(rscreen->debug_flags & DBG_NO_LLVM);
@@ -929,6 +1271,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
ctx.shader = shader;
ctx.native_integers = true;
+ shader->vs_as_es = key.vs_as_es;
+
r600_bytecode_init(ctx.bc, rscreen->chip_class, rscreen->family,
rscreen->has_compressed_msaa_texturing);
ctx.tokens = tokens;
@@ -940,6 +1284,17 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
shader->processor_type = ctx.type;
ctx.bc->type = shader->processor_type;
+ ring_outputs = key.vs_as_es || (ctx.type == TGSI_PROCESSOR_GEOMETRY);
+
+ if (key.vs_as_es) {
+ ctx.gs_for_vs = &rctx->gs_shader->current->shader;
+ } else {
+ ctx.gs_for_vs = NULL;
+ }
+
+ ctx.next_ring_offset = 0;
+ ctx.gs_next_vertex = 0;
+
ctx.face_gpr = -1;
ctx.fragcoord_input = -1;
ctx.colors_used = 0;
@@ -992,6 +1347,10 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
}
+ if (ctx.type == TGSI_PROCESSOR_GEOMETRY && ctx.bc->chip_class >= EVERGREEN) {
+ /* FIXME 1 would be enough in some cases (3 or less input vertices) */
+ ctx.file_offset[TGSI_FILE_INPUT] = 2;
+ }
ctx.use_llvm = use_llvm;
if (use_llvm) {
@@ -1068,6 +1427,15 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
/* we don't need this one */
break;
+ case TGSI_PROPERTY_GS_INPUT_PRIM:
+ shader->gs_input_prim = property->u[0].Data;
+ break;
+ case TGSI_PROPERTY_GS_OUTPUT_PRIM:
+ shader->gs_output_prim = property->u[0].Data;
+ break;
+ case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
+ shader->gs_max_out_vertices = property->u[0].Data;
+ break;
}
break;
default:
@@ -1077,6 +1445,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
}
}
+ shader->ring_item_size = ctx.next_ring_offset;
+
/* Process two side if needed */
if (shader->two_side && ctx.colors_used) {
int i, count = ctx.shader->ninput;
@@ -1213,6 +1583,9 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
goto out_err;
if ((r = tgsi_split_literal_constant(&ctx)))
goto out_err;
+ if (ctx.type == TGSI_PROCESSOR_GEOMETRY)
+ if ((r = tgsi_split_gs_inputs(&ctx)))
+ goto out_err;
if (ctx.bc->chip_class == CAYMAN)
ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
else if (ctx.bc->chip_class >= EVERGREEN)
@@ -1234,7 +1607,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
noutput = shader->noutput;
- if (ctx.clip_vertex_write) {
+ if (!ring_outputs && ctx.clip_vertex_write) {
unsigned clipdist_temp[2];
clipdist_temp[0] = r600_get_temp(&ctx);
@@ -1285,7 +1658,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
}
/* Add stream outputs. */
- if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs && !use_llvm) {
+ if (!ring_outputs && ctx.type == TGSI_PROCESSOR_VERTEX &&
+ so.num_outputs && !use_llvm) {
unsigned so_gpr[PIPE_MAX_SHADER_OUTPUTS];
/* Sanity checking. */
@@ -1388,114 +1762,118 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
}
}
- /* export output */
- for (i = 0, j = 0; i < noutput; i++, j++) {
- memset(&output[j], 0, sizeof(struct r600_bytecode_output));
- output[j].gpr = shader->output[i].gpr;
- output[j].elem_size = 3;
- output[j].swizzle_x = 0;
- output[j].swizzle_y = 1;
- output[j].swizzle_z = 2;
- output[j].swizzle_w = 3;
- output[j].burst_count = 1;
- output[j].type = -1;
- output[j].op = CF_OP_EXPORT;
- switch (ctx.type) {
- case TGSI_PROCESSOR_VERTEX:
- switch (shader->output[i].name) {
- case TGSI_SEMANTIC_POSITION:
- output[j].array_base = next_pos_base++;
- output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
- break;
+ if (ring_outputs) {
+ if (key.vs_as_es)
+ emit_gs_ring_writes(&ctx);
+ } else {
+ /* export output */
+ for (i = 0, j = 0; i < noutput; i++, j++) {
+ memset(&output[j], 0, sizeof(struct r600_bytecode_output));
+ output[j].gpr = shader->output[i].gpr;
+ output[j].elem_size = 3;
+ output[j].swizzle_x = 0;
+ output[j].swizzle_y = 1;
+ output[j].swizzle_z = 2;
+ output[j].swizzle_w = 3;
+ output[j].burst_count = 1;
+ output[j].type = -1;
+ output[j].op = CF_OP_EXPORT;
+ switch (ctx.type) {
+ case TGSI_PROCESSOR_VERTEX:
+ switch (shader->output[i].name) {
+ case TGSI_SEMANTIC_POSITION:
+ output[j].array_base = next_pos_base++;
+ output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ break;
- case TGSI_SEMANTIC_PSIZE:
- output[j].array_base = next_pos_base++;
- output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
- break;
- case TGSI_SEMANTIC_CLIPVERTEX:
- j--;
- break;
- case TGSI_SEMANTIC_CLIPDIST:
- output[j].array_base = next_pos_base++;
- output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
- /* spi_sid is 0 for clipdistance outputs that were generated
- * for clipvertex - we don't need to pass them to PS */
- if (shader->output[i].spi_sid) {
- j++;
- /* duplicate it as PARAM to pass to the pixel shader */
- memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
- output[j].array_base = next_param_base++;
- output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
- }
- break;
- case TGSI_SEMANTIC_FOG:
- output[j].swizzle_y = 4; /* 0 */
- output[j].swizzle_z = 4; /* 0 */
- output[j].swizzle_w = 5; /* 1 */
- break;
- }
- break;
- case TGSI_PROCESSOR_FRAGMENT:
- if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
- /* never export more colors than the number of CBs */
- if (next_pixel_base && next_pixel_base >= key.nr_cbufs) {
- /* skip export */
+ case TGSI_SEMANTIC_PSIZE:
+ output[j].array_base = next_pos_base++;
+ output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ break;
+ case TGSI_SEMANTIC_CLIPVERTEX:
j--;
- continue;
- }
- output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
- output[j].array_base = next_pixel_base++;
- output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
- shader->nr_ps_color_exports++;
- if (shader->fs_write_all && (rscreen->chip_class >= EVERGREEN)) {
- for (k = 1; k < key.nr_cbufs; k++) {
+ break;
+ case TGSI_SEMANTIC_CLIPDIST:
+ output[j].array_base = next_pos_base++;
+ output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ /* spi_sid is 0 for clipdistance outputs that were generated
+ * for clipvertex - we don't need to pass them to PS */
+ if (shader->output[i].spi_sid) {
j++;
- memset(&output[j], 0, sizeof(struct r600_bytecode_output));
- output[j].gpr = shader->output[i].gpr;
- output[j].elem_size = 3;
- output[j].swizzle_x = 0;
- output[j].swizzle_y = 1;
- output[j].swizzle_z = 2;
+ /* duplicate it as PARAM to pass to the pixel shader */
+ memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
+ output[j].array_base = next_param_base++;
+ output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+ }
+ break;
+ case TGSI_SEMANTIC_FOG:
+ output[j].swizzle_y = 4; /* 0 */
+ output[j].swizzle_z = 4; /* 0 */
+ output[j].swizzle_w = 5; /* 1 */
+ break;
+ }
+ break;
+ case TGSI_PROCESSOR_FRAGMENT:
+ if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
+ /* never export more colors than the number of CBs */
+ if (next_pixel_base && next_pixel_base >= key.nr_cbufs) {
+ /* skip export */
+ j--;
+ continue;
+ }
output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
- output[j].burst_count = 1;
output[j].array_base = next_pixel_base++;
- output[j].op = CF_OP_EXPORT;
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
shader->nr_ps_color_exports++;
+ if (shader->fs_write_all && (rscreen->chip_class >= EVERGREEN)) {
+ for (k = 1; k < key.nr_cbufs; k++) {
+ j++;
+ memset(&output[j], 0, sizeof(struct r600_bytecode_output));
+ output[j].gpr = shader->output[i].gpr;
+ output[j].elem_size = 3;
+ output[j].swizzle_x = 0;
+ output[j].swizzle_y = 1;
+ output[j].swizzle_z = 2;
+ output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
+ output[j].burst_count = 1;
+ output[j].array_base = next_pixel_base++;
+ output[j].op = CF_OP_EXPORT;
+ output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+ shader->nr_ps_color_exports++;
+ }
+ }
+ } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
+ output[j].array_base = 61;
+ output[j].swizzle_x = 2;
+ output[j].swizzle_y = 7;
+ output[j].swizzle_z = output[j].swizzle_w = 7;
+ output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+ } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
+ output[j].array_base = 61;
+ output[j].swizzle_x = 7;
+ output[j].swizzle_y = 1;
+ output[j].swizzle_z = output[j].swizzle_w = 7;
+ output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+ } else {
+ R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
+ r = -EINVAL;
+ goto out_err;
}
- }
- } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
- output[j].array_base = 61;
- output[j].swizzle_x = 2;
- output[j].swizzle_y = 7;
- output[j].swizzle_z = output[j].swizzle_w = 7;
- output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
- } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
- output[j].array_base = 61;
- output[j].swizzle_x = 7;
- output[j].swizzle_y = 1;
- output[j].swizzle_z = output[j].swizzle_w = 7;
- output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
- } else {
- R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
- r = -EINVAL;
- goto out_err;
+ break;
+ default:
+ R600_ERR("unsupported processor type %d\n", ctx.type);
+ r = -EINVAL;
+ goto out_err;
}
- break;
- default:
- R600_ERR("unsupported processor type %d\n", ctx.type);
- r = -EINVAL;
- goto out_err;
- }
- if (output[j].type==-1) {
- output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
- output[j].array_base = next_param_base++;
+ if (output[j].type==-1) {
+ output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+ output[j].array_base = next_param_base++;
+ }
}
- }
- /* add fake position export */
- if (ctx.type == TGSI_PROCESSOR_VERTEX && next_pos_base == 60) {
+ /* add fake position export */
+ if (ctx.type == TGSI_PROCESSOR_VERTEX && next_pos_base == 60) {
memset(&output[j], 0, sizeof(struct r600_bytecode_output));
output[j].gpr = 0;
output[j].elem_size = 3;
@@ -1508,10 +1886,10 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
output[j].array_base = next_pos_base;
output[j].op = CF_OP_EXPORT;
j++;
- }
+ }
- /* add fake param output for vertex shader if no param is exported */
- if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) {
+ /* add fake param output for vertex shader if no param is exported */
+ if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) {
memset(&output[j], 0, sizeof(struct r600_bytecode_output));
output[j].gpr = 0;
output[j].elem_size = 3;
@@ -1524,39 +1902,40 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
output[j].array_base = 0;
output[j].op = CF_OP_EXPORT;
j++;
- }
+ }
- /* add fake pixel export */
- if (ctx.type == TGSI_PROCESSOR_FRAGMENT && next_pixel_base == 0) {
- memset(&output[j], 0, sizeof(struct r600_bytecode_output));
- output[j].gpr = 0;
- output[j].elem_size = 3;
- output[j].swizzle_x = 7;
- output[j].swizzle_y = 7;
- output[j].swizzle_z = 7;
- output[j].swizzle_w = 7;
- output[j].burst_count = 1;
- output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
- output[j].array_base = 0;
- output[j].op = CF_OP_EXPORT;
- j++;
- }
-
- noutput = j;
-
- /* set export done on last export of each type */
- for (i = noutput - 1, output_done = 0; i >= 0; i--) {
- if (!(output_done & (1 << output[i].type))) {
- output_done |= (1 << output[i].type);
- output[i].op = CF_OP_EXPORT_DONE;
+ /* add fake pixel export */
+ if (ctx.type == TGSI_PROCESSOR_FRAGMENT && next_pixel_base == 0) {
+ memset(&output[j], 0, sizeof(struct r600_bytecode_output));
+ output[j].gpr = 0;
+ output[j].elem_size = 3;
+ output[j].swizzle_x = 7;
+ output[j].swizzle_y = 7;
+ output[j].swizzle_z = 7;
+ output[j].swizzle_w = 7;
+ output[j].burst_count = 1;
+ output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+ output[j].array_base = 0;
+ output[j].op = CF_OP_EXPORT;
+ j++;
}
- }
- /* add output to bytecode */
- if (!use_llvm) {
- for (i = 0; i < noutput; i++) {
- r = r600_bytecode_add_output(ctx.bc, &output[i]);
- if (r)
- goto out_err;
+
+ noutput = j;
+
+ /* set export done on last export of each type */
+ for (i = noutput - 1, output_done = 0; i >= 0; i--) {
+ if (!(output_done & (1 << output[i].type))) {
+ output_done |= (1 << output[i].type);
+ output[i].op = CF_OP_EXPORT_DONE;
+ }
+ }
+ /* add output to bytecode */
+ if (!use_llvm) {
+ for (i = 0; i < noutput; i++) {
+ r = r600_bytecode_add_output(ctx.bc, &output[i]);
+ if (r)
+ goto out_err;
+ }
}
}
@@ -1567,7 +1946,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
else {
const struct cf_op_info *last = r600_isa_cf(ctx.bc->cf_last->op);
- if (last->flags & CF_CLAUSE)
+ /* alu clause instructions don't have EOP bit, so add NOP */
+ if (last->flags & CF_ALU)
r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP);
ctx.bc->cf_last->end_of_program = 1;
@@ -1582,6 +1962,11 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
goto out_err;
}
+ if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
+ if ((r = generate_gs_copy_shader(rctx, pipeshader)))
+ return r;
+ }
+
free(ctx.literals);
tgsi_parse_free(&ctx.parse);
return 0;
@@ -5550,6 +5935,14 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
return 0;
}
+static int tgsi_gs_emit(struct r600_shader_ctx *ctx)
+{
+ if (ctx->inst_info->op == CF_OP_EMIT_VERTEX)
+ emit_gs_ring_writes(ctx);
+
+ return r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op);
+}
+
static int tgsi_umad(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -5924,8 +6317,8 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
{TGSI_OPCODE_TXF, 0, FETCH_OP_LD, tgsi_tex},
{TGSI_OPCODE_TXQ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
{TGSI_OPCODE_CONT, 0, CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont},
- {TGSI_OPCODE_EMIT, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ENDPRIM, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_EMIT, 0, CF_OP_EMIT_VERTEX, tgsi_gs_emit},
+ {TGSI_OPCODE_ENDPRIM, 0, CF_OP_CUT_VERTEX, tgsi_gs_emit},
{TGSI_OPCODE_BGNLOOP, 0, ALU_OP0_NOP, tgsi_bgnloop},
{TGSI_OPCODE_BGNSUB, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_ENDLOOP, 0, ALU_OP0_NOP, tgsi_endloop},
@@ -6117,8 +6510,8 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
{TGSI_OPCODE_TXF, 0, FETCH_OP_LD, tgsi_tex},
{TGSI_OPCODE_TXQ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
{TGSI_OPCODE_CONT, 0, CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont},
- {TGSI_OPCODE_EMIT, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ENDPRIM, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_EMIT, 0, CF_OP_EMIT_VERTEX, tgsi_gs_emit},
+ {TGSI_OPCODE_ENDPRIM, 0, CF_OP_CUT_VERTEX, tgsi_gs_emit},
{TGSI_OPCODE_BGNLOOP, 0, ALU_OP0_NOP, tgsi_bgnloop},
{TGSI_OPCODE_BGNSUB, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_ENDLOOP, 0, ALU_OP0_NOP, tgsi_endloop},
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index d989ce4364..0bf8b51fff 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -37,6 +37,7 @@ struct r600_shader_io {
unsigned lds_pos; /* for evergreen */
unsigned back_color_input;
unsigned write_mask;
+ int ring_offset;
};
struct r600_shader {
@@ -64,9 +65,17 @@ struct r600_shader {
boolean has_txq_cube_array_z_comp;
boolean uses_tex_buffers;
+ /* geometry shader properties */
+ unsigned gs_input_prim;
+ unsigned gs_output_prim;
+ unsigned gs_max_out_vertices;
+ /* size in bytes of a data item in the ring (single vertex data) */
+ unsigned ring_item_size;
+
unsigned indirect_files;
unsigned max_arrays;
unsigned num_arrays;
+ unsigned vs_as_es;
struct r600_shader_array * arrays;
};
@@ -74,6 +83,7 @@ struct r600_shader_key {
unsigned color_two_side:1;
unsigned alpha_to_one:1;
unsigned nr_cbufs:4;
+ unsigned vs_as_es:1;
};
struct r600_shader_array {
@@ -85,6 +95,8 @@ struct r600_shader_array {
struct r600_pipe_shader {
struct r600_pipe_shader_selector *selector;
struct r600_pipe_shader *next_variant;
+ /* for GS - corresponding copy shader (installed as VS) */
+ struct r600_pipe_shader *gs_copy_shader;
struct r600_shader shader;
struct r600_command_buffer command_buffer; /* register writes */
struct r600_resource *bo;
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 173d96e376..78f53eb33b 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -698,6 +698,8 @@ static INLINE struct r600_shader_key r600_shader_selector_key(struct pipe_contex
/* Dual-source blending only makes sense with nr_cbufs == 1. */
if (key.nr_cbufs == 1 && rctx->dual_src_blend)
key.nr_cbufs = 2;
+ } else if (sel->type == PIPE_SHADER_VERTEX) {
+ key.vs_as_es = (rctx->gs_shader != NULL);
}
return key;
}
@@ -797,6 +799,12 @@ static void *r600_create_vs_state(struct pipe_context *ctx,
return r600_create_shader_state(ctx, state, PIPE_SHADER_VERTEX);
}
+static void *r600_create_gs_state(struct pipe_context *ctx,
+ const struct pipe_shader_state *state)
+{
+ return r600_create_shader_state(ctx, state, PIPE_SHADER_GEOMETRY);
+}
+
static void r600_bind_ps_state(struct pipe_context *ctx, void *state)
{
struct r600_context *rctx = (struct r600_context *)ctx;
@@ -817,6 +825,13 @@ static void r600_bind_vs_state(struct pipe_context *ctx, void *state)
rctx->vs_shader = (struct r600_pipe_shader_selector *)state;
}
+static void r600_bind_gs_state(struct pipe_context *ctx, void *state)
+{
+ struct r600_context *rctx = (struct r600_context *)ctx;
+
+ rctx->gs_shader = (struct r600_pipe_shader_selector *)state;
+}
+
static void r600_delete_shader_selector(struct pipe_context *ctx,
struct r600_pipe_shader_selector *sel)
{
@@ -857,6 +872,20 @@ static void r600_delete_vs_state(struct pipe_context *ctx, void *state)
r600_delete_shader_selector(ctx, sel);
}
+
+static void r600_delete_gs_state(struct pipe_context *ctx, void *state)
+{
+ struct r600_context *rctx = (struct r600_context *)ctx;
+ struct r600_pipe_shader_selector *sel = (struct r600_pipe_shader_selector *)state;
+
+ if (rctx->gs_shader == sel) {
+ rctx->gs_shader = NULL;
+ }
+
+ r600_delete_shader_selector(ctx, sel);
+}
+
+
void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state)
{
if (state->dirty_mask) {
@@ -1149,10 +1178,65 @@ static void r600_setup_txq_cube_array_constants(struct r600_context *rctx, int s
pipe_resource_reference(&cb.buffer, NULL);
}
+static void update_shader_atom(struct pipe_context *ctx,
+ struct r600_shader_state *state,
+ struct r600_pipe_shader *shader)
+{
+ state->shader = shader;
+ if (shader) {
+ state->atom.num_dw = shader->command_buffer.num_dw;
+ state->atom.dirty = true;
+ r600_context_add_resource_size(ctx, (struct pipe_resource *)shader->bo);
+ } else {
+ state->atom.num_dw = 0;
+ state->atom.dirty = false;
+ }
+}
+
+static void update_gs_block_state(struct r600_context *rctx, unsigned enable)
+{
+ if (rctx->shader_stages.geom_enable != enable) {
+ rctx->shader_stages.geom_enable = enable;
+ rctx->shader_stages.atom.dirty = true;
+ }
+
+ if (rctx->gs_rings.enable != enable) {
+ rctx->gs_rings.enable = enable;
+ rctx->gs_rings.atom.dirty = true;
+
+ if (enable && !rctx->gs_rings.esgs_ring.buffer) {
+ unsigned size = 0x1C000;
+ rctx->gs_rings.esgs_ring.buffer =
+ pipe_buffer_create(rctx->context.screen, PIPE_BIND_CUSTOM,
+ PIPE_USAGE_STATIC, size);
+ rctx->gs_rings.esgs_ring.buffer_size = size;
+
+ size = 0x4000000;
+
+ rctx->gs_rings.gsvs_ring.buffer =
+ pipe_buffer_create(rctx->context.screen, PIPE_BIND_CUSTOM,
+ PIPE_USAGE_STATIC, size);
+ rctx->gs_rings.gsvs_ring.buffer_size = size;
+ }
+
+ if (enable) {
+ r600_set_constant_buffer(&rctx->context, PIPE_SHADER_GEOMETRY,
+ R600_GS_RING_CONST_BUFFER, &rctx->gs_rings.esgs_ring);
+ r600_set_constant_buffer(&rctx->context, PIPE_SHADER_VERTEX,
+ R600_GS_RING_CONST_BUFFER, &rctx->gs_rings.gsvs_ring);
+ } else {
+ r600_set_constant_buffer(&rctx->context, PIPE_SHADER_GEOMETRY,
+ R600_GS_RING_CONST_BUFFER, NULL);
+ r600_set_constant_buffer(&rctx->context, PIPE_SHADER_VERTEX,
+ R600_GS_RING_CONST_BUFFER, NULL);
+ }
+ }
+}
+
static bool r600_update_derived_state(struct r600_context *rctx)
{
struct pipe_context * ctx = (struct pipe_context*)rctx;
- bool ps_dirty = false, vs_dirty = false;
+ bool ps_dirty = false, vs_dirty = false, gs_dirty = false;
bool blend_disable;
if (!rctx->blitter->running) {
@@ -1170,22 +1254,54 @@ static bool r600_update_derived_state(struct r600_context *rctx)
}
}
- if (unlikely(rctx->vertex_shader.shader != rctx->vs_shader)) {
+ update_gs_block_state(rctx, rctx->gs_shader != NULL);
+
+ if (rctx->gs_shader) {
+ r600_shader_select(ctx, rctx->gs_shader, &gs_dirty);
+ if (unlikely(!rctx->gs_shader->current))
+ return false;
+
+ if (rctx->chip_class >= EVERGREEN && !rctx->shader_stages.geom_enable) {
+ rctx->shader_stages.geom_enable = true;
+ rctx->shader_stages.atom.dirty = true;
+ }
+
+ /* gs_shader provides GS and VS (copy shader) */
+ if (unlikely(rctx->geometry_shader.shader != rctx->gs_shader->current)) {
+ update_shader_atom(ctx, &rctx->geometry_shader, rctx->gs_shader->current);
+ update_shader_atom(ctx, &rctx->vertex_shader, rctx->gs_shader->current->gs_copy_shader);
+ }
+
r600_shader_select(ctx, rctx->vs_shader, &vs_dirty);
+ if (unlikely(!rctx->vs_shader->current))
+ return false;
+
+ /* vs_shader is used as ES */
+ if (unlikely(rctx->export_shader.shader != rctx->vs_shader->current)) {
+ update_shader_atom(ctx, &rctx->export_shader, rctx->vs_shader->current);
+ }
+ } else {
+ if (unlikely(rctx->geometry_shader.shader)) {
+ update_shader_atom(ctx, &rctx->geometry_shader, NULL);
+ update_shader_atom(ctx, &rctx->export_shader, NULL);
+ rctx->shader_stages.geom_enable = false;
+ rctx->shader_stages.atom.dirty = true;
+ }
+ r600_shader_select(ctx, rctx->vs_shader, &vs_dirty);
if (unlikely(!rctx->vs_shader->current))
return false;
- rctx->vertex_shader.shader = rctx->vs_shader;
- rctx->vertex_shader.atom.dirty = true;
- r600_context_add_resource_size(ctx, (struct pipe_resource *)rctx->vs_shader->current->bo);
+ if (unlikely(rctx->vertex_shader.shader != rctx->vs_shader->current)) {
+ update_shader_atom(ctx, &rctx->vertex_shader, rctx->vs_shader->current);
- /* Update clip misc state. */
- if (rctx->vs_shader->current->pa_cl_vs_out_cntl != rctx->clip_misc_state.pa_cl_vs_out_cntl ||
- rctx->vs_shader->current->shader.clip_dist_write != rctx->clip_misc_state.clip_dist_write) {
- rctx->clip_misc_state.pa_cl_vs_out_cntl = rctx->vs_shader->current->pa_cl_vs_out_cntl;
- rctx->clip_misc_state.clip_dist_write = rctx->vs_shader->current->shader.clip_dist_write;
- rctx->clip_misc_state.atom.dirty = true;
+ /* Update clip misc state. */
+ if (rctx->vs_shader->current->pa_cl_vs_out_cntl != rctx->clip_misc_state.pa_cl_vs_out_cntl ||
+ rctx->vs_shader->current->shader.clip_dist_write != rctx->clip_misc_state.clip_dist_write) {
+ rctx->clip_misc_state.pa_cl_vs_out_cntl = rctx->vs_shader->current->pa_cl_vs_out_cntl;
+ rctx->clip_misc_state.clip_dist_write = rctx->vs_shader->current->shader.clip_dist_write;
+ rctx->clip_misc_state.atom.dirty = true;
+ }
}
}
@@ -1193,7 +1309,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
if (unlikely(!rctx->ps_shader->current))
return false;
- if (unlikely(ps_dirty || rctx->pixel_shader.shader != rctx->ps_shader)) {
+ if (unlikely(rctx->pixel_shader.shader != rctx->ps_shader->current)) {
if (rctx->cb_misc_state.nr_ps_color_outputs != rctx->ps_shader->current->nr_ps_color_outputs) {
rctx->cb_misc_state.nr_ps_color_outputs = rctx->ps_shader->current->nr_ps_color_outputs;
@@ -1215,9 +1331,9 @@ static bool r600_update_derived_state(struct r600_context *rctx)
r600_update_db_shader_control(rctx);
}
- if (!ps_dirty && rctx->ps_shader && rctx->rasterizer &&
+ if (unlikely(!ps_dirty && rctx->ps_shader && rctx->rasterizer &&
((rctx->rasterizer->sprite_coord_enable != rctx->ps_shader->current->sprite_coord_enable) ||
- (rctx->rasterizer->flatshade != rctx->ps_shader->current->flatshade))) {
+ (rctx->rasterizer->flatshade != rctx->ps_shader->current->flatshade)))) {
if (rctx->chip_class >= EVERGREEN)
evergreen_update_ps_state(ctx, rctx->ps_shader->current);
@@ -1225,11 +1341,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
r600_update_ps_state(ctx, rctx->ps_shader->current);
}
- rctx->pixel_shader.shader = rctx->ps_shader;
- rctx->pixel_shader.atom.num_dw = rctx->ps_shader->current->command_buffer.num_dw;
- rctx->pixel_shader.atom.dirty = true;
- r600_context_add_resource_size(ctx,
- (struct pipe_resource *)rctx->ps_shader->current->bo);
+ update_shader_atom(ctx, &rctx->pixel_shader, rctx->ps_shader->current);
}
/* on R600 we stuff masks + txq info into one constant buffer */
@@ -1268,6 +1380,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
rctx->blend_state.cso,
blend_disable);
}
+
return true;
}
@@ -1710,10 +1823,12 @@ bool sampler_state_needs_border_color(const struct pipe_sampler_state *state)
void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a)
{
struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
- struct r600_pipe_shader *shader = ((struct r600_shader_state*)a)->shader->current;
+ struct r600_pipe_shader *shader = ((struct r600_shader_state*)a)->shader;
- r600_emit_command_buffer(cs, &shader->command_buffer);
+ if (!shader)
+ return;
+ r600_emit_command_buffer(cs, &shader->command_buffer);
r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, shader->bo, RADEON_USAGE_READ));
}
@@ -1723,6 +1838,7 @@ void r600_init_common_state_functions(struct r600_context *rctx)
{
rctx->context.create_fs_state = r600_create_ps_state;
rctx->context.create_vs_state = r600_create_vs_state;
+ rctx->context.create_gs_state = r600_create_gs_state;
rctx->context.create_vertex_elements_state = r600_create_vertex_fetch_shader;
rctx->context.bind_blend_state = r600_bind_blend_state;
rctx->context.bind_depth_stencil_alpha_state = r600_bind_dsa_state;
@@ -1732,6 +1848,7 @@ void r600_init_common_state_functions(struct r600_context *rctx)
rctx->context.bind_vertex_elements_state = r600_bind_vertex_elements;
rctx->context.bind_vertex_sampler_states = r600_bind_vs_sampler_states;
rctx->context.bind_vs_state = r600_bind_vs_state;
+ rctx->context.bind_gs_state = r600_bind_gs_state;
rctx->context.delete_blend_state = r600_delete_blend_state;
rctx->context.delete_depth_stencil_alpha_state = r600_delete_dsa_state;
rctx->context.delete_fs_state = r600_delete_ps_state;
@@ -1739,6 +1856,7 @@ void r600_init_common_state_functions(struct r600_context *rctx)
rctx->context.delete_sampler_state = r600_delete_sampler_state;
rctx->context.delete_vertex_elements_state = r600_delete_vertex_elements;
rctx->context.delete_vs_state = r600_delete_vs_state;
+ rctx->context.delete_gs_state = r600_delete_gs_state;
rctx->context.set_blend_color = r600_set_blend_color;
rctx->context.set_clip_state = r600_set_clip_state;
rctx->context.set_constant_buffer = r600_set_constant_buffer;
diff --git a/src/gallium/drivers/r600/sb/sb_bc.h b/src/gallium/drivers/r600/sb/sb_bc.h
index ad1b862fd5..c9197ed2f0 100644
--- a/src/gallium/drivers/r600/sb/sb_bc.h
+++ b/src/gallium/drivers/r600/sb/sb_bc.h
@@ -169,8 +169,10 @@ enum shader_target
{
TARGET_UNKNOWN,
TARGET_VS,
+ TARGET_ES,
TARGET_PS,
TARGET_GS,
+ TARGET_GS_COPY,
TARGET_COMPUTE,
TARGET_FETCH,
diff --git a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
index 9d76465b36..30effa8d28 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
@@ -137,7 +137,7 @@ void bc_dump::dump(cf_node& n) {
for (int k = 0; k < 4; ++k)
s << chans[n.bc.sel[k]];
- } else if (n.bc.op_ptr->flags & (CF_STRM | CF_RAT)) {
+ } else if (n.bc.op_ptr->flags & CF_MEM) {
static const char *exp_type[] = {"WRITE", "WRITE_IND", "WRITE_ACK",
"WRITE_IND_ACK"};
fill_to(s, 18);
diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
index c56c866baf..fc72a93397 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
@@ -64,7 +64,7 @@ int bc_finalizer::run() {
// workaround for some problems on r6xx/7xx
// add ALU NOP to each vertex shader
- if (!ctx.is_egcm() && sh.target == TARGET_VS) {
+ if (!ctx.is_egcm() && (sh.target == TARGET_VS || sh.target == TARGET_ES)) {
cf_node *c = sh.create_clause(NST_ALU_CLAUSE);
alu_group_node *g = sh.create_alu_group();
diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
index 67e6c3a582..24c4854225 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
@@ -58,7 +58,10 @@ int bc_parser::decode() {
if (pshader) {
switch (bc->type) {
case TGSI_PROCESSOR_FRAGMENT: t = TARGET_PS; break;
- case TGSI_PROCESSOR_VERTEX: t = TARGET_VS; break;
+ case TGSI_PROCESSOR_VERTEX:
+ t = pshader->vs_as_es ? TARGET_ES : TARGET_VS;
+ break;
+ case TGSI_PROCESSOR_GEOMETRY: t = TARGET_GS; break;
case TGSI_PROCESSOR_COMPUTE: t = TARGET_COMPUTE; break;
default: assert(!"unknown shader target"); return -1; break;
}
@@ -134,8 +137,12 @@ int bc_parser::parse_decls() {
}
}
- if (sh->target == TARGET_VS)
+ if (sh->target == TARGET_VS || sh->target == TARGET_ES)
sh->add_input(0, 1, 0x0F);
+ else if (sh->target == TARGET_GS) {
+ sh->add_input(0, 1, 0x0F);
+ sh->add_input(1, 1, 0x0F);
+ }
bool ps_interp = ctx.hw_class >= HW_CLASS_EVERGREEN
&& sh->target == TARGET_PS;
diff --git a/src/gallium/drivers/r600/sb/sb_shader.cpp b/src/gallium/drivers/r600/sb/sb_shader.cpp
index 9fc47ae404..d6182e0946 100644
--- a/src/gallium/drivers/r600/sb/sb_shader.cpp
+++ b/src/gallium/drivers/r600/sb/sb_shader.cpp
@@ -435,6 +435,7 @@ std::string shader::get_full_target_name() {
const char* shader::get_shader_target_name() {
switch (target) {
case TARGET_VS: return "VS";
+ case TARGET_ES: return "ES";
case TARGET_PS: return "PS";
case TARGET_GS: return "GS";
case TARGET_COMPUTE: return "COMPUTE";