summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/r600/evergreen_compute.c23
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c3
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h3
-rw-r--r--src/gallium/drivers/r600/r600_shader.c250
-rw-r--r--src/gallium/drivers/r600/sb/sb_core.cpp4
5 files changed, 282 insertions, 1 deletions
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index f76fc9cc25..dad3e8a37a 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -547,6 +547,29 @@ static void evergreen_launch_grid(
unsigned sb_disasm = use_sb ||
(ctx->screen->debug_flags & DBG_SB_DISASM);
+ if (dump && r600_debug_check_fake_ctx(ctx)) {
+
+ struct r600_bytecode fake_bc;
+
+ r600_bytecode_init(&fake_bc, ctx->fake_context->chip_class,
+ ctx->fake_context->family,
+ ctx->screen->has_compressed_msaa_texturing);
+
+
+ fake_bc.type = TGSI_PROCESSOR_COMPUTE;
+ fake_bc.isa = ctx->fake_context->isa;
+ r600_llvm_compile(mod, ctx->fake_context->family, &fake_bc,
+ &use_kill, dump);
+
+ if (dump && !sb_disasm) {
+ r600_bytecode_disasm(&fake_bc);
+ } else if ((dump && sb_disasm) || use_sb) {
+ if (r600_sb_bytecode_process(ctx->fake_context, &fake_bc,
+ NULL, dump, use_sb))
+ R600_ERR("r600_sb_bytecode_process failed!\n");
+ }
+ }
+
r600_bytecode_init(bc, ctx->chip_class, ctx->family,
ctx->screen->has_compressed_msaa_texturing);
bc->type = TGSI_PROCESSOR_COMPUTE;
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 49abf50487..f0c0a7e3ff 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -325,6 +325,9 @@ static void r600_destroy_context(struct pipe_context *context)
r600_sb_context_destroy(rctx->sb_context);
+ if (rctx->fake_context)
+ r600_sb_context_destroy(rctx->fake_context->sb_context);
+
pipe_resource_reference((struct pipe_resource**)&rctx->dummy_cmask, NULL);
pipe_resource_reference((struct pipe_resource**)&rctx->dummy_fmask, NULL);
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 349a6cb6cf..5cccdb5d3f 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -643,6 +643,8 @@ struct r600_context {
void *sb_context;
struct r600_isa *isa;
+
+ struct r600_context *fake_context;
};
static INLINE void r600_emit_command_buffer(struct radeon_winsys_cs *cs,
@@ -765,6 +767,7 @@ void r600_init_context_resource_functions(struct r600_context *r600);
int r600_pipe_shader_create(struct pipe_context *ctx,
struct r600_pipe_shader *shader,
struct r600_shader_key key);
+int r600_debug_check_fake_ctx(struct r600_context *rctx);
void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader);
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index dc44faee36..ed0d4fbcc9 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -34,11 +34,14 @@
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_scan.h"
#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_text.h"
#include "util/u_memory.h"
#include "util/u_math.h"
#include <stdio.h>
#include <errno.h>
+#include <ctype.h>
+
/* CAYMAN notes
Why CAYMAN got loops for lots of instructions is explained here.
@@ -131,6 +134,206 @@ static void r600_dump_streamout(struct pipe_stream_output_info *so)
}
}
+// Loads tgsi dump. Not always reliable because some information (precision of
+// immediates) may be lost in the tgsi dump, but allows to init corresponding
+// shader states (such as uses_kill) before loading exact source bytecode with
+// r600_debug_load_bytecode.
+
+static void r600_debug_load_tgsi(struct r600_pipe_shader_selector *sel,
+ const char *filename) {
+
+#define TSIZE 16384
+
+ FILE *f;
+ struct tgsi_token* tokens;
+ char text[TSIZE];
+ unsigned rd;
+
+ f = fopen(filename, "r");
+ assert(f);
+
+ rd = fread(text, 1, TSIZE, f);
+ assert(rd < TSIZE);
+
+ printf("loaded %d bytes from %s\n", rd, filename);
+ fclose(f);
+
+ tokens = calloc(sizeof(struct tgsi_token), 4096);
+
+ if (!tgsi_text_translate(text, tokens, 4096)) {
+ printf("tgsi_text_translate failed\n");
+ }
+
+ sel->tokens = tokens;
+
+ printf("tgsi loaded...\n");
+
+#undef TSIZE
+}
+
+static char* lbc_num(char *p, int base, uint32_t *n) {
+
+ uint32_t r = 0;
+ int c;
+
+ while (*p && isspace(*p)) p++;
+
+ if (!p)
+ abort();
+
+ while(*p && !isspace(*p)) {
+ int d;
+ c = toupper(*p++);
+ if (isdigit(c)) {
+ d = c - '0';
+ if (d > base)
+ abort();
+ } else if (base == 16 && c >= 'A' && c <= 'F') {
+ d = c - 'A' + 10;
+ }
+
+ r = r * base + d;
+ }
+
+ *n = r;
+ return p;
+}
+
+static int lbc_ishex32(char *p) {
+ int k = 0;
+ for (k = 0; k < 8; ++k) {
+ if (!p[k])
+ return 0;
+ int c = toupper(p[k]);
+ if (!(isdigit(c) || (c >= 'A' && c <= 'F')))
+ return 0;
+ }
+ return 1;
+}
+
+// Loads shader bytecode from sb shader dump,
+// the file should start with "===== SHADER #nnn" and end with "===== SHADER_END".
+// This allows to diagnose issues with sb by loading exact source bytecode
+// provided by user's shader dump.
+// This is useful because loading tgsi dump doesn't always allow to reproduce
+// exact bytecode, e.g. due to compiler changes or lost information in tgsi
+// dump (e.g. lost precision of immediates in tgsi dump)
+
+// NOTE doesn't init other bytecode states like uses_kill, so loading of the
+// corresponding tgsi dump and translating it may be required to init those
+// states before loading bytecode with this function
+
+// TODO parse and init exact ngpr in bytecode struct, though it worked so far
+
+static void r600_debug_load_bytecode(struct r600_pipe_shader *ps,
+ const char *filename) {
+
+#define TSIZE 65536
+
+ FILE *f;
+ char text[TSIZE];
+
+ f = fopen(filename, "r");
+ assert(f);
+
+ fgets(text, TSIZE, f);
+ if (strncmp(text,"===== SHADER", 12))
+ abort();
+
+ fgets(text, TSIZE, f);
+ uint32_t a, u, dw;
+ lbc_num(text+6, 10, &dw);
+
+ struct r600_bytecode *bc = &ps->shader.bc;
+ printf("load_bytecode: shader size (dw): %u\n", dw);
+
+ if (dw > bc->ndw) {
+ free(bc->bytecode);
+ bc->bytecode = malloc(dw * 4);
+ }
+
+ bc->ndw = dw;
+ memset(bc->bytecode, 0, dw * 4);
+
+ while (!feof(f)) {
+ fgets(text, TSIZE, f);
+ printf(text);
+
+ if (!strncmp(text, "===== SHADER_END", 16))
+ break;
+
+ char *p = lbc_num(text, 10, &a);
+
+ p += 2;
+ while (lbc_ishex32(p)) {
+ p = lbc_num(p, 16, &u);
+ p++;
+
+ printf(" [%u] = %08X\n", a, u);
+ bc->bytecode[a] = u;
+ a++;
+ }
+ }
+
+ fclose(f);
+
+ printf("bytecode loaded...\n");
+}
+
+// check and create fake context if required.
+// R600_FAKE environment variable is used to enable it and choose chip class:
+// R600_FAKE=6 means r6xx, =7 -r7xx, =8 - eg, 9 - cayman.
+// exact chip family for each class is hardcoded below.
+int r600_debug_check_fake_ctx(struct r600_context *rctx) {
+
+ if (!rctx->fake_context) {
+
+ static int fake_class = -1;
+
+ if (fake_class == -1)
+ fake_class = debug_get_num_option("R600_FAKE", 0);
+
+ if (fake_class) {
+ rctx->fake_context = calloc(1, sizeof(struct r600_context));
+ rctx->fake_context->screen = malloc(sizeof(struct r600_screen));
+ memcpy(rctx->fake_context->screen, rctx->screen,
+ sizeof(struct r600_screen));
+
+ switch (fake_class) {
+ case 6:
+ rctx->fake_context->chip_class = R600;
+ rctx->fake_context->family = CHIP_R600;
+ break;
+ case 7:
+ rctx->fake_context->chip_class = R700;
+ rctx->fake_context->family = CHIP_RV770;
+ break;
+ case 8:
+ rctx->fake_context->chip_class = EVERGREEN;
+ rctx->fake_context->family = CHIP_REDWOOD;
+ break;
+ case 9:
+ rctx->fake_context->chip_class = CAYMAN;
+ rctx->fake_context->family = CHIP_CAYMAN;
+ break;
+ default:
+ abort();
+ }
+
+ rctx->fake_context->screen->chip_class =
+ rctx->fake_context->chip_class;
+ rctx->fake_context->screen->family = rctx->fake_context->family;
+
+ rctx->fake_context->isa = calloc(1, sizeof(struct r600_isa));
+ if (!rctx->fake_context->isa ||
+ r600_isa_init(rctx->fake_context, rctx->fake_context->isa))
+ abort();
+ }
+ }
+
+ return rctx->fake_context != 0;
+}
+
int r600_pipe_shader_create(struct pipe_context *ctx,
struct r600_pipe_shader *shader,
struct r600_shader_key key)
@@ -143,6 +346,8 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
unsigned use_sb = rctx->screen->debug_flags & DBG_SB;
unsigned sb_disasm = use_sb || (rctx->screen->debug_flags & DBG_SB_DISASM);
+// r600_debug_load_tgsi(sel, "/home/vg/tgsi_shader_dump");
+
shader->shader.bc.isa = rctx->isa;
if (dump) {
@@ -153,6 +358,49 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
r600_dump_streamout(&sel->so);
}
}
+
+ if (dump && r600_debug_check_fake_ctx(rctx)) {
+
+ struct r600_pipe_shader fake_shader;
+ memcpy(&fake_shader, shader, sizeof(fake_shader));
+
+ fake_shader.shader.bc.isa = rctx->fake_context->isa;
+
+ r = r600_shader_from_tgsi(rctx->fake_context->screen,
+ &fake_shader, key);
+ if (r) {
+ R600_ERR("fakectx: translation from TGSI failed !\n");
+ return r;
+ }
+
+ if (!fake_shader.shader.bc.bytecode) {
+ r = r600_bytecode_build(&fake_shader.shader.bc);
+ if (r) {
+ R600_ERR("fakectx: building bytecode failed !\n");
+ return r;
+ }
+ }
+
+// r600_debug_load_bytecode(&fake_shader, "/home/vg/bytecode_dump");
+
+ if (dump || use_sb) {
+ r = r600_sb_bytecode_process(rctx->fake_context,
+ &fake_shader.shader.bc,
+ &fake_shader.shader,
+ dump, use_sb);
+ if (r) {
+ R600_ERR("fakectx: building optimized bytecode failed !\n");
+ return r;
+ }
+ }
+
+ }
+
+ // if r600_debug_load_tgsi was used above, further execution
+ // with unexpected shader probably doesn't make sense.
+
+ // abort();
+
r = r600_shader_from_tgsi(rctx->screen, shader, key);
if (r) {
R600_ERR("translation from TGSI failed !\n");
@@ -169,6 +417,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
R600_ERR("building bytecode failed !\n");
return r;
}
+
}
if (dump && !sb_disasm) {
@@ -221,6 +470,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
default:
return -EINVAL;
}
+// abort();
return 0;
}
diff --git a/src/gallium/drivers/r600/sb/sb_core.cpp b/src/gallium/drivers/r600/sb/sb_core.cpp
index d907508eb2..c69247a756 100644
--- a/src/gallium/drivers/r600/sb/sb_core.cpp
+++ b/src/gallium/drivers/r600/sb/sb_core.cpp
@@ -77,7 +77,8 @@ void r600_sb_context_destroy(void * sctx) {
sb_context *ctx = static_cast<sb_context*>(sctx);
if (sb_context::dump_stat) {
- sblog << "\ncontext src stats: ";
+ sblog << "\ncontext stats for: " << ctx->get_hw_chip_name() << "\n";
+ sblog << "context src stats: ";
ctx->src_stats.dump();
sblog << "context opt stats: ";
ctx->opt_stats.dump();
@@ -268,6 +269,7 @@ int r600_sb_bytecode_process(struct r600_context *rctx,
sh->opt_stats.ndw = bc->ndw;
sh->collect_stats(true);
+ sblog << "shader stats for " << ctx->get_hw_chip_name() << "\n";
sblog << "src stats: ";
sh->src_stats.dump();
sblog << "opt stats: ";