summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVadim Girlin <vadimgirlin@gmail.com>2013-04-14 01:26:51 +0400
committerVadim Girlin <vadimgirlin@gmail.com>2013-04-14 01:43:46 +0400
commiteb0fc91055c08c0209f15da4e287e89c63d73982 (patch)
tree4803a8382b22a19a82ed83bd8da6474966e7dce5
parent0f2705219b43700ca78f972b61366c7eb57514a8 (diff)
initial support for compute shaders (tested with bfgminer only)r600-sb-bfgminer
Use R600_SB_CL=1 to enable, disabled by default.
-rw-r--r--src/gallium/drivers/r600/r600_isa.h12
-rw-r--r--src/gallium/drivers/r600/r600_shader.c13
-rw-r--r--src/gallium/drivers/r600/sb/sb_bc.h4
-rw-r--r--src/gallium/drivers/r600/sb/sb_bc_builder.cpp28
-rw-r--r--src/gallium/drivers/r600/sb/sb_bc_decoder.cpp27
-rw-r--r--src/gallium/drivers/r600/sb/sb_bc_dump.cpp6
-rw-r--r--src/gallium/drivers/r600/sb/sb_bc_finalize.cpp37
-rw-r--r--src/gallium/drivers/r600/sb/sb_bc_parser.cpp51
-rw-r--r--src/gallium/drivers/r600/sb/sb_bc_parser.h18
-rw-r--r--src/gallium/drivers/r600/sb/sb_core.cpp37
-rw-r--r--src/gallium/drivers/r600/sb/sb_public.h3
-rw-r--r--src/gallium/drivers/r600/sb/sb_shader.cpp1
-rw-r--r--src/gallium/drivers/r600/sb/sb_shader.h3
13 files changed, 174 insertions, 66 deletions
diff --git a/src/gallium/drivers/r600/r600_isa.h b/src/gallium/drivers/r600/r600_isa.h
index 89f6dd5d3d..8cccc9d0d7 100644
--- a/src/gallium/drivers/r600/r600_isa.h
+++ b/src/gallium/drivers/r600/r600_isa.h
@@ -142,7 +142,9 @@ enum cf_op_flags
CF_EMIT = (1<<11),
CF_STRM = (1<<12), /* MEM_STREAM* */
- CF_LOOP_START = (1<<13)
+ CF_RAT = (1<<13), /* MEM_RAT* */
+
+ CF_LOOP_START = (1<<14)
};
/* ALU instruction info */
@@ -641,14 +643,14 @@ static const struct cf_op_info cf_op_table[] = {
{"EXPORT_DONE", { 0x28, 0x28, 0x54, 0x54 }, CF_EXP },
{"MEM_EXPORT", { -1, 0x3A, 0x55, 0x55 }, CF_MEM },
- {"MEM_RAT", { -1, -1, 0x56, 0x56 }, CF_MEM },
- {"MEM_RAT_NOCACHE", { -1, -1, 0x57, 0x57 }, CF_MEM },
+ {"MEM_RAT", { -1, -1, 0x56, 0x56 }, CF_MEM | CF_RAT },
+ {"MEM_RAT_NOCACHE", { -1, -1, 0x57, 0x57 }, CF_MEM | CF_RAT },
{"MEM_RING1", { -1, -1, 0x58, 0x58 }, CF_MEM },
{"MEM_RING2", { -1, -1, 0x59, 0x59 }, CF_MEM },
{"MEM_RING3", { -1, -1, 0x5A, 0x5A }, CF_MEM },
{"MEM_MEM_COMBINED", { -1, -1, 0x5B, 0x5B }, CF_MEM },
- {"MEM_RAT_COMBINED_NOCACHE", { -1, -1, 0x5C, 0x5C }, CF_MEM },
- {"MEM_RAT_COMBINED", { -1, -1, -1, 0x5D }, CF_MEM }, /* ??? not in cayman isa doc */
+ {"MEM_RAT_COMBINED_NOCACHE", { -1, -1, 0x5C, 0x5C }, CF_MEM | CF_RAT },
+ {"MEM_RAT_COMBINED", { -1, -1, -1, 0x5D }, CF_MEM | CF_RAT }, /* ??? not in cayman isa doc */
{"EXPORT_DONE_END", { -1, -1, -1, 0x5E }, CF_EXP }, /* ??? not in cayman isa doc */
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 8dc9d9b092..5611966f5a 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -173,7 +173,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
}
if (use_sb) {
- r = r600_sb_bytecode_process(rctx, shader);
+ r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader);
if (r) {
R600_ERR("building optimized bytecode failed !\n");
return r;
@@ -304,6 +304,7 @@ int r600_compute_shader_create(struct pipe_context * ctx,
struct r600_context *r600_ctx = (struct r600_context *)ctx;
unsigned char * bytes;
unsigned byte_count;
+ static int use_sb = -1;
struct r600_shader_ctx shader_ctx;
bool dump = (r600_ctx->screen->debug_flags & DBG_CS) != 0;
@@ -318,9 +319,19 @@ int r600_compute_shader_create(struct pipe_context * ctx,
cm_bytecode_add_cf_end(shader_ctx.bc);
}
r600_bytecode_build(shader_ctx.bc);
+
if (dump) {
r600_bytecode_disasm(shader_ctx.bc);
}
+
+ if (use_sb == -1)
+ use_sb = debug_get_num_option("R600_SB_CL", 0);
+
+ if (use_sb) {
+ if (r600_sb_bytecode_process(r600_ctx, shader_ctx.bc, NULL))
+ R600_ERR("r600-sb: compute shader processing error\n");
+ }
+
free(bytes);
return 1;
}
diff --git a/src/gallium/drivers/r600/sb/sb_bc.h b/src/gallium/drivers/r600/sb/sb_bc.h
index 2af86ea172..882ada4131 100644
--- a/src/gallium/drivers/r600/sb/sb_bc.h
+++ b/src/gallium/drivers/r600/sb/sb_bc.h
@@ -346,6 +346,10 @@ struct bc_cf {
unsigned array_size:12;
unsigned comp_mask:4;
+ unsigned rat_id:4;
+ unsigned rat_inst:6;
+ unsigned rat_index_mode:2;
+
void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_cf(op); }
bool is_alu_extended() {
diff --git a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp b/src/gallium/drivers/r600/sb/sb_bc_builder.cpp
index b9c05e6aa0..e62e007774 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_builder.cpp
@@ -274,14 +274,28 @@ int bc_builder::build_cf_exp(cf_node* n) {
const bc_cf &bc = n->bc;
const cf_op_info *cfop = bc.op_ptr;
- bb << CF_ALLOC_EXPORT_WORD0_ALL()
- .ARRAY_BASE(bc.array_base)
- .ELEM_SIZE(bc.elem_size)
- .INDEX_GPR(bc.index_gpr)
- .RW_GPR(bc.rw_gpr)
- .RW_REL(bc.rw_rel)
- .TYPE(bc.type);
+ if (cfop->flags & CF_RAT) {
+ assert(ctx.is_egcm());
+
+ bb << CF_ALLOC_EXPORT_WORD0_RAT_EGCM()
+ .ELEM_SIZE(bc.elem_size)
+ .INDEX_GPR(bc.index_gpr)
+ .RAT_ID(bc.rat_id)
+ .RAT_INDEX_MODE(bc.rat_index_mode)
+ .RAT_INST(bc.rat_inst)
+ .RW_GPR(bc.rw_gpr)
+ .RW_REL(bc.rw_rel)
+ .TYPE(bc.type);
+ } else {
+ bb << CF_ALLOC_EXPORT_WORD0_ALL()
+ .ARRAY_BASE(bc.array_base)
+ .ELEM_SIZE(bc.elem_size)
+ .INDEX_GPR(bc.index_gpr)
+ .RW_GPR(bc.rw_gpr)
+ .RW_REL(bc.rw_rel)
+ .TYPE(bc.type);
+ }
if (cfop->flags & CF_EXP) {
diff --git a/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp b/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp
index d26b6b3bb6..314ad3706e 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp
@@ -236,13 +236,26 @@ int bc_decoder::decode_cf_mem(unsigned & i, bc_cf& bc) {
uint32_t dw1 = dw[i++];
assert(i <= ndw);
- CF_ALLOC_EXPORT_WORD0_ALL w0(dw0);
- bc.array_base = w0.get_ARRAY_BASE();
- bc.elem_size = w0.get_ELEM_SIZE();
- bc.index_gpr = w0.get_INDEX_GPR();
- bc.rw_gpr = w0.get_RW_GPR();
- bc.rw_rel = w0.get_RW_REL();
- bc.type = w0.get_TYPE();
+ if (!(bc.op_ptr->flags & CF_RAT)) {
+ CF_ALLOC_EXPORT_WORD0_ALL w0(dw0);
+ bc.array_base = w0.get_ARRAY_BASE();
+ bc.elem_size = w0.get_ELEM_SIZE();
+ bc.index_gpr = w0.get_INDEX_GPR();
+ bc.rw_gpr = w0.get_RW_GPR();
+ bc.rw_rel = w0.get_RW_REL();
+ bc.type = w0.get_TYPE();
+ } else {
+ assert(ctx.is_egcm());
+ CF_ALLOC_EXPORT_WORD0_RAT_EGCM w0(dw0);
+ bc.elem_size = w0.get_ELEM_SIZE();
+ bc.index_gpr = w0.get_INDEX_GPR();
+ bc.rw_gpr = w0.get_RW_GPR();
+ bc.rw_rel = w0.get_RW_REL();
+ bc.type = w0.get_TYPE();
+ bc.rat_id = w0.get_RAT_ID();
+ bc.rat_inst = w0.get_RAT_INST();
+ bc.rat_index_mode = w0.get_RAT_INDEX_MODE();
+ }
if (ctx.is_evergreen()) {
CF_ALLOC_EXPORT_WORD1_BUF_EG w1(dw1);
diff --git a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
index b28be2c6c3..bae9f66e4b 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
@@ -132,7 +132,7 @@ void bc_dump::dump(cf_node& n) {
for (int k = 0; k < 4; ++k)
s << chans[n.bc.sel[k]];
- } else if (n.bc.op_ptr->flags & CF_STRM) {
+ } else if (n.bc.op_ptr->flags & (CF_STRM | CF_RAT)) {
static const char *exp_type[] = {"WRITE", "WRITE_IND", "WRITE_ACK",
"WRITE_IND_ACK"};
fill_to(s, 18);
@@ -142,6 +142,10 @@ void bc_dump::dump(cf_node& n) {
for (int k = 0; k < 4; ++k)
s << ((n.bc.comp_mask & (1 << k)) ? chans[k] : '_');
+ if ((n.bc.op_ptr->flags & CF_RAT) && (n.bc.type & 1)) {
+ s << ", @R" << n.bc.index_gpr << ".xyz";
+ }
+
s << " ES:" << n.bc.elem_size;
} else {
diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
index 941726651b..bd7e825427 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
@@ -691,6 +691,43 @@ void bc_finalizer::finalize_cf(cf_node* c) {
c->bc.rw_gpr = reg >= 0 ? reg : 0;
c->bc.comp_mask = mask;
+
+ if ((flags & CF_RAT) && (c->bc.type & 1)) {
+
+ reg = -1;
+
+ for (unsigned chan = 0; chan < 4; ++chan) {
+ value *v = c->src[4 + chan];
+ if (!v || v->is_undef())
+ continue;
+
+ if (!v->is_any_gpr() || v->gpr.chan() != chan) {
+ cerr << "invalid source operand " << chan << " ";
+ dump::dump_op(c);
+ cerr << "\n";
+ abort();
+ }
+ unsigned vreg = v->gpr.sel();
+ if (reg == -1)
+ reg = vreg;
+ else if ((unsigned)reg != vreg) {
+ cerr << "invalid source operand " << chan << " ";
+ dump::dump_op(c);
+ cerr << "\n";
+ abort();
+ }
+ }
+
+ assert(reg >= 0);
+
+ if (reg >= 0)
+ update_ngpr(reg);
+
+ c->bc.index_gpr = reg >= 0 ? reg : 0;
+ }
+
+
+
} else {
#if 0
diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
index 017046a88a..0f642161a0 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
@@ -53,15 +53,17 @@ using std::cerr;
int bc_parser::parse() {
- dw = pipe_shader->shader.bc.bytecode;
- bc_ndw = pipe_shader->shader.bc.ndw;
+ dw = bc->bytecode;
+ bc_ndw = bc->ndw;
+ max_cf = 0;
dec = new bc_decoder(ctx, dw, bc_ndw);
shader_target t = TARGET_UNKNOWN;
- switch (pipe_shader->shader.processor_type) {
+ switch (bc->type) {
case TGSI_PROCESSOR_FRAGMENT: t = TARGET_PS; break;
case TGSI_PROCESSOR_VERTEX: t = TARGET_VS; break;
+ case TGSI_PROCESSOR_COMPUTE: t = TARGET_COMPUTE; break;
default: assert(!"unknown shader target"); return -1; break;
}
@@ -82,42 +84,43 @@ int bc_parser::parse_shader() {
bool eop = false;
sh->init();
- parse_decls();
+
+ if (pshader)
+ parse_decls();
do {
+ eop = false;
if ((r = parse_cf(i, eop)))
return r;
- } while (!eop);
+ } while (!eop || (i >> 1) <= max_cf);
return 0;
}
int bc_parser::parse_decls() {
- r600_shader &rs = pipe_shader->shader;
-
// sh->prepare_regs(rs.bc.ngpr);
- if (rs.indirect_files & ~(1 << TGSI_FILE_CONSTANT)) {
+ if (pshader->indirect_files & ~(1 << TGSI_FILE_CONSTANT)) {
#if SB_NO_ARRAY_INFO
- sh->add_gpr_array(0, rs.bc.ngpr, 0b1111);
+ sh->add_gpr_array(0, pshader->bc.ngpr, 0b1111);
#else
- assert(rs.num_arrays);
+ assert(pshader->num_arrays);
- if (rs.num_arrays) {
+ if (pshader->num_arrays) {
- for (unsigned i = 0; i < rs.num_arrays; ++i) {
- r600_shader_array &a = rs.arrays[i];
+ for (unsigned i = 0; i < pshader->num_arrays; ++i) {
+ r600_shader_array &a = pshader->arrays[i];
sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask);
}
} else {
- sh->add_gpr_array(0, rs.bc.ngpr, 0b1111);
+ sh->add_gpr_array(0, pshader->bc.ngpr, 0b1111);
}
@@ -133,8 +136,8 @@ int bc_parser::parse_decls() {
unsigned linear = 0, persp = 0, centroid = 1;
- for (unsigned i = 0; i < rs.ninput; ++i) {
- r600_shader_io & in = rs.input[i];
+ for (unsigned i = 0; i < pshader->ninput; ++i) {
+ r600_shader_io & in = pshader->input[i];
bool preloaded = sh->target == TARGET_PS && !(ps_interp && in.spi_sid);
sh->add_input(in.gpr, preloaded, /*in.write_mask*/ 0b1111);
if (ps_interp && in.spi_sid) {
@@ -237,7 +240,7 @@ int bc_parser::parse_cf(unsigned &i, bool &eop) {
cf->bc.end_of_program = eop;
- } else if (flags & CF_STRM) {
+ } else if (flags & (CF_STRM | CF_RAT)) {
assert(!cf->bc.rw_rel);
unsigned burst_count = cf->bc.burst_count;
@@ -256,6 +259,17 @@ int bc_parser::parse_cf(unsigned &i, bool &eop) {
sh->get_gpr_value(true, cf->bc.rw_gpr, s, false);
}
+ if ((flags & CF_RAT) && (cf->bc.type & 1)) { // indexed write
+ cf->src.resize(8);
+ for(int s = 0; s < 3; ++s) {
+ cf->src[4 + s] =
+ sh->get_gpr_value(true, cf->bc.index_gpr, s, false);
+ }
+
+ // FIXME probably we can relax it a bit
+ cf->flags |= NF_DONT_HOIST | NF_DONT_MOVE;
+ }
+
if (!burst_count--)
break;
@@ -271,6 +285,9 @@ int bc_parser::parse_cf(unsigned &i, bool &eop) {
} else if (cf->bc.op == CF_OP_CALL_FS) {
sh->init_call_fs(cf);
cf->flags |= NF_SCHEDULE_EARLY | NF_DONT_MOVE;
+ } else if (flags & CF_BRANCH) {
+ if (cf->bc.addr > max_cf)
+ max_cf = cf->bc.addr;
}
eop = cf->bc.end_of_program || cf->bc.op == CF_OP_CF_END;
diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.h b/src/gallium/drivers/r600/sb/sb_bc_parser.h
index 03a5dcbf37..3060b91700 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_parser.h
+++ b/src/gallium/drivers/r600/sb/sb_bc_parser.h
@@ -26,15 +26,16 @@
#ifndef BC_PARSER_H_
#define BC_PARSER_H_
-
+/*
struct r600_context;
-struct r600_pipe_shader;
+struct r600_shader;
+struct r600_bytecode;
struct r600_bytecode_cf;
struct r600_bytecode_alu;
struct r600_bytecode_tex;
struct r600_bytecode_vtx;
-
+*/
namespace r600_sb {
class bc_parser {
@@ -44,11 +45,14 @@ class bc_parser {
bc_decoder *dec;
- r600_pipe_shader *pipe_shader;
+ r600_bytecode *bc;
+ r600_shader *pshader;
uint32_t *dw;
unsigned bc_ndw;
+ int max_cf;
+
shader *sh;
int error;
@@ -67,9 +71,9 @@ class bc_parser {
public:
- bc_parser(sb_context &sctx, r600_pipe_shader* p_shader) :
- ctx(sctx), dec(0), pipe_shader(p_shader),
- dw(0), bc_ndw(0),
+ bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader) :
+ ctx(sctx), dec(0), bc(bc), pshader(pshader),
+ dw(0), bc_ndw(0), max_cf(0),
sh(0), error(0), last_region_id() { }
int parse();
diff --git a/src/gallium/drivers/r600/sb/sb_core.cpp b/src/gallium/drivers/r600/sb/sb_core.cpp
index 14b5d1d3ce..4071b8d954 100644
--- a/src/gallium/drivers/r600/sb/sb_core.cpp
+++ b/src/gallium/drivers/r600/sb/sb_core.cpp
@@ -26,10 +26,17 @@
#define SB_RA_SCHED_CHECK DEBUG
+extern "C" {
+#include "os/os_time.h"
+#include "r600_pipe.h"
+#include "r600_shader.h"
+
+#include "sb_public.h"
+}
+
#include <stack>
#include <map>
#include <iostream>
-//#include <sstream>
#include "sb_context.h"
#include "sb_shader.h"
@@ -57,14 +64,6 @@
#include "sb_bc_encoding.h"
#include "sb_bc_builder.h"
-extern "C" {
-#include "os/os_time.h"
-#include "r600_pipe.h"
-#include "r600_shader.h"
-
-#include "sb_public.h"
-}
-
using namespace r600_sb;
using std::cerr;
@@ -102,7 +101,8 @@ void r600_sb_context_destroy(void * sctx) {
}
int r600_sb_bytecode_process(struct r600_context *rctx,
- struct r600_pipe_shader *pipe_shader) {
+ struct r600_bytecode *bc,
+ struct r600_shader *pshader) {
int r = 0;
static unsigned sh_idx = 0;
@@ -142,7 +142,7 @@ int r600_sb_bytecode_process(struct r600_context *rctx,
cerr << "sb: processing shader " << sh_idx << "\n";
);
- bc_parser parser(*ctx, pipe_shader);
+ bc_parser parser(*ctx, bc, pshader);
if ((r = parser.parse())) {
assert(0);
@@ -221,16 +221,15 @@ int r600_sb_bytecode_process(struct r600_context *rctx,
}
if (sb_context::use_new_bytecode) {
- bytecode &bc = builder.get_bytecode();
- r600_bytecode &old_bc = pipe_shader->shader.bc;
+ bytecode &nbc = builder.get_bytecode();
- free(old_bc.bytecode);
- old_bc.ndw = bc.ndw();
- old_bc.bytecode = (uint32_t*) malloc(old_bc.ndw << 2);
- bc.write_data(old_bc.bytecode);
+ free(bc->bytecode);
+ bc->ndw = nbc.ndw();
+ bc->bytecode = (uint32_t*) malloc(bc->ndw << 2);
+ nbc.write_data(bc->bytecode);
- old_bc.ngpr = sh->ngpr;
- old_bc.nstack = sh->nstack;
+ bc->ngpr = sh->ngpr;
+ bc->nstack = sh->nstack;
} else {
SB_DUMP(0, cerr << "SB_USE_NEW_BYTECODE is not enabled\n"; );
}
diff --git a/src/gallium/drivers/r600/sb/sb_public.h b/src/gallium/drivers/r600/sb/sb_public.h
index 986439f751..ff711d76b1 100644
--- a/src/gallium/drivers/r600/sb/sb_public.h
+++ b/src/gallium/drivers/r600/sb/sb_public.h
@@ -30,6 +30,7 @@
void r600_sb_context_destroy(void *sctx);
int r600_sb_bytecode_process(struct r600_context *rctx,
- struct r600_pipe_shader *pipe_shader);
+ struct r600_bytecode *bc,
+ struct r600_shader *pshader);
#endif //R600_SB_H_
diff --git a/src/gallium/drivers/r600/sb/sb_shader.cpp b/src/gallium/drivers/r600/sb/sb_shader.cpp
index 37a20cc96f..d77e459908 100644
--- a/src/gallium/drivers/r600/sb/sb_shader.cpp
+++ b/src/gallium/drivers/r600/sb/sb_shader.cpp
@@ -505,6 +505,7 @@ const char* shader::get_shader_target_name() {
case TARGET_VS: return "VS";
case TARGET_PS: return "PS";
case TARGET_GS: return "GS";
+ case TARGET_COMPUTE: return "COMPUTE";
default:
return "INVALID_TARGET";
}
diff --git a/src/gallium/drivers/r600/sb/sb_shader.h b/src/gallium/drivers/r600/sb/sb_shader.h
index d0d90f26a6..470f972148 100644
--- a/src/gallium/drivers/r600/sb/sb_shader.h
+++ b/src/gallium/drivers/r600/sb/sb_shader.h
@@ -60,7 +60,8 @@ enum shader_target
TARGET_UNKNOWN,
TARGET_VS,
TARGET_PS,
- TARGET_GS
+ TARGET_GS,
+ TARGET_COMPUTE
};
typedef std::vector<region_node*> regions_vec;