diff options
author | Vadim Girlin <vadimgirlin@gmail.com> | 2013-04-14 01:26:51 +0400 |
---|---|---|
committer | Vadim Girlin <vadimgirlin@gmail.com> | 2013-04-14 01:43:46 +0400 |
commit | eb0fc91055c08c0209f15da4e287e89c63d73982 (patch) | |
tree | 4803a8382b22a19a82ed83bd8da6474966e7dce5 | |
parent | 0f2705219b43700ca78f972b61366c7eb57514a8 (diff) |
initial support for compute shaders (tested with bfgminer only)r600-sb-bfgminer
Use R600_SB_CL=1 to enable, disabled by default.
-rw-r--r-- | src/gallium/drivers/r600/r600_isa.h | 12 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 13 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_bc.h | 4 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_bc_builder.cpp | 28 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_bc_decoder.cpp | 27 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_bc_dump.cpp | 6 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_bc_finalize.cpp | 37 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_bc_parser.cpp | 51 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_bc_parser.h | 18 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_core.cpp | 37 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_public.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_shader.cpp | 1 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_shader.h | 3 |
13 files changed, 174 insertions, 66 deletions
diff --git a/src/gallium/drivers/r600/r600_isa.h b/src/gallium/drivers/r600/r600_isa.h index 89f6dd5d3d..8cccc9d0d7 100644 --- a/src/gallium/drivers/r600/r600_isa.h +++ b/src/gallium/drivers/r600/r600_isa.h @@ -142,7 +142,9 @@ enum cf_op_flags CF_EMIT = (1<<11), CF_STRM = (1<<12), /* MEM_STREAM* */ - CF_LOOP_START = (1<<13) + CF_RAT = (1<<13), /* MEM_RAT* */ + + CF_LOOP_START = (1<<14) }; /* ALU instruction info */ @@ -641,14 +643,14 @@ static const struct cf_op_info cf_op_table[] = { {"EXPORT_DONE", { 0x28, 0x28, 0x54, 0x54 }, CF_EXP }, {"MEM_EXPORT", { -1, 0x3A, 0x55, 0x55 }, CF_MEM }, - {"MEM_RAT", { -1, -1, 0x56, 0x56 }, CF_MEM }, - {"MEM_RAT_NOCACHE", { -1, -1, 0x57, 0x57 }, CF_MEM }, + {"MEM_RAT", { -1, -1, 0x56, 0x56 }, CF_MEM | CF_RAT }, + {"MEM_RAT_NOCACHE", { -1, -1, 0x57, 0x57 }, CF_MEM | CF_RAT }, {"MEM_RING1", { -1, -1, 0x58, 0x58 }, CF_MEM }, {"MEM_RING2", { -1, -1, 0x59, 0x59 }, CF_MEM }, {"MEM_RING3", { -1, -1, 0x5A, 0x5A }, CF_MEM }, {"MEM_MEM_COMBINED", { -1, -1, 0x5B, 0x5B }, CF_MEM }, - {"MEM_RAT_COMBINED_NOCACHE", { -1, -1, 0x5C, 0x5C }, CF_MEM }, - {"MEM_RAT_COMBINED", { -1, -1, -1, 0x5D }, CF_MEM }, /* ??? not in cayman isa doc */ + {"MEM_RAT_COMBINED_NOCACHE", { -1, -1, 0x5C, 0x5C }, CF_MEM | CF_RAT }, + {"MEM_RAT_COMBINED", { -1, -1, -1, 0x5D }, CF_MEM | CF_RAT }, /* ??? not in cayman isa doc */ {"EXPORT_DONE_END", { -1, -1, -1, 0x5E }, CF_EXP }, /* ??? not in cayman isa doc */ diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 8dc9d9b092..5611966f5a 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -173,7 +173,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx, } if (use_sb) { - r = r600_sb_bytecode_process(rctx, shader); + r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader); if (r) { R600_ERR("building optimized bytecode failed !\n"); return r; @@ -304,6 +304,7 @@ int r600_compute_shader_create(struct pipe_context * ctx, struct r600_context *r600_ctx = (struct r600_context *)ctx; unsigned char * bytes; unsigned byte_count; + static int use_sb = -1; struct r600_shader_ctx shader_ctx; bool dump = (r600_ctx->screen->debug_flags & DBG_CS) != 0; @@ -318,9 +319,19 @@ int r600_compute_shader_create(struct pipe_context * ctx, cm_bytecode_add_cf_end(shader_ctx.bc); } r600_bytecode_build(shader_ctx.bc); + if (dump) { r600_bytecode_disasm(shader_ctx.bc); } + + if (use_sb == -1) + use_sb = debug_get_num_option("R600_SB_CL", 0); + + if (use_sb) { + if (r600_sb_bytecode_process(r600_ctx, shader_ctx.bc, NULL)) + R600_ERR("r600-sb: compute shader processing error\n"); + } + free(bytes); return 1; } diff --git a/src/gallium/drivers/r600/sb/sb_bc.h b/src/gallium/drivers/r600/sb/sb_bc.h index 2af86ea172..882ada4131 100644 --- a/src/gallium/drivers/r600/sb/sb_bc.h +++ b/src/gallium/drivers/r600/sb/sb_bc.h @@ -346,6 +346,10 @@ struct bc_cf { unsigned array_size:12; unsigned comp_mask:4; + unsigned rat_id:4; + unsigned rat_inst:6; + unsigned rat_index_mode:2; + void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_cf(op); } bool is_alu_extended() { diff --git a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp b/src/gallium/drivers/r600/sb/sb_bc_builder.cpp index b9c05e6aa0..e62e007774 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_builder.cpp @@ -274,14 +274,28 @@ int bc_builder::build_cf_exp(cf_node* n) { const bc_cf &bc = n->bc; const cf_op_info *cfop = bc.op_ptr; - bb << CF_ALLOC_EXPORT_WORD0_ALL() - .ARRAY_BASE(bc.array_base) - .ELEM_SIZE(bc.elem_size) - .INDEX_GPR(bc.index_gpr) - .RW_GPR(bc.rw_gpr) - .RW_REL(bc.rw_rel) - .TYPE(bc.type); + if (cfop->flags & CF_RAT) { + assert(ctx.is_egcm()); + + bb << CF_ALLOC_EXPORT_WORD0_RAT_EGCM() + .ELEM_SIZE(bc.elem_size) + .INDEX_GPR(bc.index_gpr) + .RAT_ID(bc.rat_id) + .RAT_INDEX_MODE(bc.rat_index_mode) + .RAT_INST(bc.rat_inst) + .RW_GPR(bc.rw_gpr) + .RW_REL(bc.rw_rel) + .TYPE(bc.type); + } else { + bb << CF_ALLOC_EXPORT_WORD0_ALL() + .ARRAY_BASE(bc.array_base) + .ELEM_SIZE(bc.elem_size) + .INDEX_GPR(bc.index_gpr) + .RW_GPR(bc.rw_gpr) + .RW_REL(bc.rw_rel) + .TYPE(bc.type); + } if (cfop->flags & CF_EXP) { diff --git a/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp b/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp index d26b6b3bb6..314ad3706e 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp @@ -236,13 +236,26 @@ int bc_decoder::decode_cf_mem(unsigned & i, bc_cf& bc) { uint32_t dw1 = dw[i++]; assert(i <= ndw); - CF_ALLOC_EXPORT_WORD0_ALL w0(dw0); - bc.array_base = w0.get_ARRAY_BASE(); - bc.elem_size = w0.get_ELEM_SIZE(); - bc.index_gpr = w0.get_INDEX_GPR(); - bc.rw_gpr = w0.get_RW_GPR(); - bc.rw_rel = w0.get_RW_REL(); - bc.type = w0.get_TYPE(); + if (!(bc.op_ptr->flags & CF_RAT)) { + CF_ALLOC_EXPORT_WORD0_ALL w0(dw0); + bc.array_base = w0.get_ARRAY_BASE(); + bc.elem_size = w0.get_ELEM_SIZE(); + bc.index_gpr = w0.get_INDEX_GPR(); + bc.rw_gpr = w0.get_RW_GPR(); + bc.rw_rel = w0.get_RW_REL(); + bc.type = w0.get_TYPE(); + } else { + assert(ctx.is_egcm()); + CF_ALLOC_EXPORT_WORD0_RAT_EGCM w0(dw0); + bc.elem_size = w0.get_ELEM_SIZE(); + bc.index_gpr = w0.get_INDEX_GPR(); + bc.rw_gpr = w0.get_RW_GPR(); + bc.rw_rel = w0.get_RW_REL(); + bc.type = w0.get_TYPE(); + bc.rat_id = w0.get_RAT_ID(); + bc.rat_inst = w0.get_RAT_INST(); + bc.rat_index_mode = w0.get_RAT_INDEX_MODE(); + } if (ctx.is_evergreen()) { CF_ALLOC_EXPORT_WORD1_BUF_EG w1(dw1); diff --git a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp index b28be2c6c3..bae9f66e4b 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp @@ -132,7 +132,7 @@ void bc_dump::dump(cf_node& n) { for (int k = 0; k < 4; ++k) s << chans[n.bc.sel[k]]; - } else if (n.bc.op_ptr->flags & CF_STRM) { + } else if (n.bc.op_ptr->flags & (CF_STRM | CF_RAT)) { static const char *exp_type[] = {"WRITE", "WRITE_IND", "WRITE_ACK", "WRITE_IND_ACK"}; fill_to(s, 18); @@ -142,6 +142,10 @@ void bc_dump::dump(cf_node& n) { for (int k = 0; k < 4; ++k) s << ((n.bc.comp_mask & (1 << k)) ? chans[k] : '_'); + if ((n.bc.op_ptr->flags & CF_RAT) && (n.bc.type & 1)) { + s << ", @R" << n.bc.index_gpr << ".xyz"; + } + s << " ES:" << n.bc.elem_size; } else { diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp index 941726651b..bd7e825427 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp @@ -691,6 +691,43 @@ void bc_finalizer::finalize_cf(cf_node* c) { c->bc.rw_gpr = reg >= 0 ? reg : 0; c->bc.comp_mask = mask; + + if ((flags & CF_RAT) && (c->bc.type & 1)) { + + reg = -1; + + for (unsigned chan = 0; chan < 4; ++chan) { + value *v = c->src[4 + chan]; + if (!v || v->is_undef()) + continue; + + if (!v->is_any_gpr() || v->gpr.chan() != chan) { + cerr << "invalid source operand " << chan << " "; + dump::dump_op(c); + cerr << "\n"; + abort(); + } + unsigned vreg = v->gpr.sel(); + if (reg == -1) + reg = vreg; + else if ((unsigned)reg != vreg) { + cerr << "invalid source operand " << chan << " "; + dump::dump_op(c); + cerr << "\n"; + abort(); + } + } + + assert(reg >= 0); + + if (reg >= 0) + update_ngpr(reg); + + c->bc.index_gpr = reg >= 0 ? reg : 0; + } + + + } else { #if 0 diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp index 017046a88a..0f642161a0 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp @@ -53,15 +53,17 @@ using std::cerr; int bc_parser::parse() { - dw = pipe_shader->shader.bc.bytecode; - bc_ndw = pipe_shader->shader.bc.ndw; + dw = bc->bytecode; + bc_ndw = bc->ndw; + max_cf = 0; dec = new bc_decoder(ctx, dw, bc_ndw); shader_target t = TARGET_UNKNOWN; - switch (pipe_shader->shader.processor_type) { + switch (bc->type) { case TGSI_PROCESSOR_FRAGMENT: t = TARGET_PS; break; case TGSI_PROCESSOR_VERTEX: t = TARGET_VS; break; + case TGSI_PROCESSOR_COMPUTE: t = TARGET_COMPUTE; break; default: assert(!"unknown shader target"); return -1; break; } @@ -82,42 +84,43 @@ int bc_parser::parse_shader() { bool eop = false; sh->init(); - parse_decls(); + + if (pshader) + parse_decls(); do { + eop = false; if ((r = parse_cf(i, eop))) return r; - } while (!eop); + } while (!eop || (i >> 1) <= max_cf); return 0; } int bc_parser::parse_decls() { - r600_shader &rs = pipe_shader->shader; - // sh->prepare_regs(rs.bc.ngpr); - if (rs.indirect_files & ~(1 << TGSI_FILE_CONSTANT)) { + if (pshader->indirect_files & ~(1 << TGSI_FILE_CONSTANT)) { #if SB_NO_ARRAY_INFO - sh->add_gpr_array(0, rs.bc.ngpr, 0b1111); + sh->add_gpr_array(0, pshader->bc.ngpr, 0b1111); #else - assert(rs.num_arrays); + assert(pshader->num_arrays); - if (rs.num_arrays) { + if (pshader->num_arrays) { - for (unsigned i = 0; i < rs.num_arrays; ++i) { - r600_shader_array &a = rs.arrays[i]; + for (unsigned i = 0; i < pshader->num_arrays; ++i) { + r600_shader_array &a = pshader->arrays[i]; sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask); } } else { - sh->add_gpr_array(0, rs.bc.ngpr, 0b1111); + sh->add_gpr_array(0, pshader->bc.ngpr, 0b1111); } @@ -133,8 +136,8 @@ int bc_parser::parse_decls() { unsigned linear = 0, persp = 0, centroid = 1; - for (unsigned i = 0; i < rs.ninput; ++i) { - r600_shader_io & in = rs.input[i]; + for (unsigned i = 0; i < pshader->ninput; ++i) { + r600_shader_io & in = pshader->input[i]; bool preloaded = sh->target == TARGET_PS && !(ps_interp && in.spi_sid); sh->add_input(in.gpr, preloaded, /*in.write_mask*/ 0b1111); if (ps_interp && in.spi_sid) { @@ -237,7 +240,7 @@ int bc_parser::parse_cf(unsigned &i, bool &eop) { cf->bc.end_of_program = eop; - } else if (flags & CF_STRM) { + } else if (flags & (CF_STRM | CF_RAT)) { assert(!cf->bc.rw_rel); unsigned burst_count = cf->bc.burst_count; @@ -256,6 +259,17 @@ int bc_parser::parse_cf(unsigned &i, bool &eop) { sh->get_gpr_value(true, cf->bc.rw_gpr, s, false); } + if ((flags & CF_RAT) && (cf->bc.type & 1)) { // indexed write + cf->src.resize(8); + for(int s = 0; s < 3; ++s) { + cf->src[4 + s] = + sh->get_gpr_value(true, cf->bc.index_gpr, s, false); + } + + // FIXME probably we can relax it a bit + cf->flags |= NF_DONT_HOIST | NF_DONT_MOVE; + } + if (!burst_count--) break; @@ -271,6 +285,9 @@ int bc_parser::parse_cf(unsigned &i, bool &eop) { } else if (cf->bc.op == CF_OP_CALL_FS) { sh->init_call_fs(cf); cf->flags |= NF_SCHEDULE_EARLY | NF_DONT_MOVE; + } else if (flags & CF_BRANCH) { + if (cf->bc.addr > max_cf) + max_cf = cf->bc.addr; } eop = cf->bc.end_of_program || cf->bc.op == CF_OP_CF_END; diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.h b/src/gallium/drivers/r600/sb/sb_bc_parser.h index 03a5dcbf37..3060b91700 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_parser.h +++ b/src/gallium/drivers/r600/sb/sb_bc_parser.h @@ -26,15 +26,16 @@ #ifndef BC_PARSER_H_ #define BC_PARSER_H_ - +/* struct r600_context; -struct r600_pipe_shader; +struct r600_shader; +struct r600_bytecode; struct r600_bytecode_cf; struct r600_bytecode_alu; struct r600_bytecode_tex; struct r600_bytecode_vtx; - +*/ namespace r600_sb { class bc_parser { @@ -44,11 +45,14 @@ class bc_parser { bc_decoder *dec; - r600_pipe_shader *pipe_shader; + r600_bytecode *bc; + r600_shader *pshader; uint32_t *dw; unsigned bc_ndw; + int max_cf; + shader *sh; int error; @@ -67,9 +71,9 @@ class bc_parser { public: - bc_parser(sb_context &sctx, r600_pipe_shader* p_shader) : - ctx(sctx), dec(0), pipe_shader(p_shader), - dw(0), bc_ndw(0), + bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader) : + ctx(sctx), dec(0), bc(bc), pshader(pshader), + dw(0), bc_ndw(0), max_cf(0), sh(0), error(0), last_region_id() { } int parse(); diff --git a/src/gallium/drivers/r600/sb/sb_core.cpp b/src/gallium/drivers/r600/sb/sb_core.cpp index 14b5d1d3ce..4071b8d954 100644 --- a/src/gallium/drivers/r600/sb/sb_core.cpp +++ b/src/gallium/drivers/r600/sb/sb_core.cpp @@ -26,10 +26,17 @@ #define SB_RA_SCHED_CHECK DEBUG +extern "C" { +#include "os/os_time.h" +#include "r600_pipe.h" +#include "r600_shader.h" + +#include "sb_public.h" +} + #include <stack> #include <map> #include <iostream> -//#include <sstream> #include "sb_context.h" #include "sb_shader.h" @@ -57,14 +64,6 @@ #include "sb_bc_encoding.h" #include "sb_bc_builder.h" -extern "C" { -#include "os/os_time.h" -#include "r600_pipe.h" -#include "r600_shader.h" - -#include "sb_public.h" -} - using namespace r600_sb; using std::cerr; @@ -102,7 +101,8 @@ void r600_sb_context_destroy(void * sctx) { } int r600_sb_bytecode_process(struct r600_context *rctx, - struct r600_pipe_shader *pipe_shader) { + struct r600_bytecode *bc, + struct r600_shader *pshader) { int r = 0; static unsigned sh_idx = 0; @@ -142,7 +142,7 @@ int r600_sb_bytecode_process(struct r600_context *rctx, cerr << "sb: processing shader " << sh_idx << "\n"; ); - bc_parser parser(*ctx, pipe_shader); + bc_parser parser(*ctx, bc, pshader); if ((r = parser.parse())) { assert(0); @@ -221,16 +221,15 @@ int r600_sb_bytecode_process(struct r600_context *rctx, } if (sb_context::use_new_bytecode) { - bytecode &bc = builder.get_bytecode(); - r600_bytecode &old_bc = pipe_shader->shader.bc; + bytecode &nbc = builder.get_bytecode(); - free(old_bc.bytecode); - old_bc.ndw = bc.ndw(); - old_bc.bytecode = (uint32_t*) malloc(old_bc.ndw << 2); - bc.write_data(old_bc.bytecode); + free(bc->bytecode); + bc->ndw = nbc.ndw(); + bc->bytecode = (uint32_t*) malloc(bc->ndw << 2); + nbc.write_data(bc->bytecode); - old_bc.ngpr = sh->ngpr; - old_bc.nstack = sh->nstack; + bc->ngpr = sh->ngpr; + bc->nstack = sh->nstack; } else { SB_DUMP(0, cerr << "SB_USE_NEW_BYTECODE is not enabled\n"; ); } diff --git a/src/gallium/drivers/r600/sb/sb_public.h b/src/gallium/drivers/r600/sb/sb_public.h index 986439f751..ff711d76b1 100644 --- a/src/gallium/drivers/r600/sb/sb_public.h +++ b/src/gallium/drivers/r600/sb/sb_public.h @@ -30,6 +30,7 @@ void r600_sb_context_destroy(void *sctx); int r600_sb_bytecode_process(struct r600_context *rctx, - struct r600_pipe_shader *pipe_shader); + struct r600_bytecode *bc, + struct r600_shader *pshader); #endif //R600_SB_H_ diff --git a/src/gallium/drivers/r600/sb/sb_shader.cpp b/src/gallium/drivers/r600/sb/sb_shader.cpp index 37a20cc96f..d77e459908 100644 --- a/src/gallium/drivers/r600/sb/sb_shader.cpp +++ b/src/gallium/drivers/r600/sb/sb_shader.cpp @@ -505,6 +505,7 @@ const char* shader::get_shader_target_name() { case TARGET_VS: return "VS"; case TARGET_PS: return "PS"; case TARGET_GS: return "GS"; + case TARGET_COMPUTE: return "COMPUTE"; default: return "INVALID_TARGET"; } diff --git a/src/gallium/drivers/r600/sb/sb_shader.h b/src/gallium/drivers/r600/sb/sb_shader.h index d0d90f26a6..470f972148 100644 --- a/src/gallium/drivers/r600/sb/sb_shader.h +++ b/src/gallium/drivers/r600/sb/sb_shader.h @@ -60,7 +60,8 @@ enum shader_target TARGET_UNKNOWN, TARGET_VS, TARGET_PS, - TARGET_GS + TARGET_GS, + TARGET_COMPUTE }; typedef std::vector<region_node*> regions_vec; |