diff options
-rw-r--r-- | src/gallium/drivers/r600/evergreen_compute.c | 23 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 250 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_core.cpp | 4 |
5 files changed, 282 insertions, 1 deletions
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index f76fc9cc25..dad3e8a37a 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -547,6 +547,29 @@ static void evergreen_launch_grid( unsigned sb_disasm = use_sb || (ctx->screen->debug_flags & DBG_SB_DISASM); + if (dump && r600_debug_check_fake_ctx(ctx)) { + + struct r600_bytecode fake_bc; + + r600_bytecode_init(&fake_bc, ctx->fake_context->chip_class, + ctx->fake_context->family, + ctx->screen->has_compressed_msaa_texturing); + + + fake_bc.type = TGSI_PROCESSOR_COMPUTE; + fake_bc.isa = ctx->fake_context->isa; + r600_llvm_compile(mod, ctx->fake_context->family, &fake_bc, + &use_kill, dump); + + if (dump && !sb_disasm) { + r600_bytecode_disasm(&fake_bc); + } else if ((dump && sb_disasm) || use_sb) { + if (r600_sb_bytecode_process(ctx->fake_context, &fake_bc, + NULL, dump, use_sb)) + R600_ERR("r600_sb_bytecode_process failed!\n"); + } + } + r600_bytecode_init(bc, ctx->chip_class, ctx->family, ctx->screen->has_compressed_msaa_texturing); bc->type = TGSI_PROCESSOR_COMPUTE; diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 49abf50487..f0c0a7e3ff 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -325,6 +325,9 @@ static void r600_destroy_context(struct pipe_context *context) r600_sb_context_destroy(rctx->sb_context); + if (rctx->fake_context) + r600_sb_context_destroy(rctx->fake_context->sb_context); + pipe_resource_reference((struct pipe_resource**)&rctx->dummy_cmask, NULL); pipe_resource_reference((struct pipe_resource**)&rctx->dummy_fmask, NULL); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 349a6cb6cf..5cccdb5d3f 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -643,6 +643,8 @@ struct r600_context { void *sb_context; struct r600_isa *isa; + + struct r600_context *fake_context; }; static INLINE void r600_emit_command_buffer(struct radeon_winsys_cs *cs, @@ -765,6 +767,7 @@ void r600_init_context_resource_functions(struct r600_context *r600); int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, struct r600_shader_key key); +int r600_debug_check_fake_ctx(struct r600_context *rctx); void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader); diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index dc44faee36..ed0d4fbcc9 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -34,11 +34,14 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_scan.h" #include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_text.h" #include "util/u_memory.h" #include "util/u_math.h" #include <stdio.h> #include <errno.h> +#include <ctype.h> + /* CAYMAN notes Why CAYMAN got loops for lots of instructions is explained here. @@ -131,6 +134,206 @@ static void r600_dump_streamout(struct pipe_stream_output_info *so) } } +// Loads tgsi dump. Not always reliable because some information (precision of +// immediates) may be lost in the tgsi dump, but allows to init corresponding +// shader states (such as uses_kill) before loading exact source bytecode with +// r600_debug_load_bytecode. + +static void r600_debug_load_tgsi(struct r600_pipe_shader_selector *sel, + const char *filename) { + +#define TSIZE 16384 + + FILE *f; + struct tgsi_token* tokens; + char text[TSIZE]; + unsigned rd; + + f = fopen(filename, "r"); + assert(f); + + rd = fread(text, 1, TSIZE, f); + assert(rd < TSIZE); + + printf("loaded %d bytes from %s\n", rd, filename); + fclose(f); + + tokens = calloc(sizeof(struct tgsi_token), 4096); + + if (!tgsi_text_translate(text, tokens, 4096)) { + printf("tgsi_text_translate failed\n"); + } + + sel->tokens = tokens; + + printf("tgsi loaded...\n"); + +#undef TSIZE +} + +static char* lbc_num(char *p, int base, uint32_t *n) { + + uint32_t r = 0; + int c; + + while (*p && isspace(*p)) p++; + + if (!p) + abort(); + + while(*p && !isspace(*p)) { + int d; + c = toupper(*p++); + if (isdigit(c)) { + d = c - '0'; + if (d > base) + abort(); + } else if (base == 16 && c >= 'A' && c <= 'F') { + d = c - 'A' + 10; + } + + r = r * base + d; + } + + *n = r; + return p; +} + +static int lbc_ishex32(char *p) { + int k = 0; + for (k = 0; k < 8; ++k) { + if (!p[k]) + return 0; + int c = toupper(p[k]); + if (!(isdigit(c) || (c >= 'A' && c <= 'F'))) + return 0; + } + return 1; +} + +// Loads shader bytecode from sb shader dump, +// the file should start with "===== SHADER #nnn" and end with "===== SHADER_END". +// This allows to diagnose issues with sb by loading exact source bytecode +// provided by user's shader dump. +// This is useful because loading tgsi dump doesn't always allow to reproduce +// exact bytecode, e.g. due to compiler changes or lost information in tgsi +// dump (e.g. lost precision of immediates in tgsi dump) + +// NOTE doesn't init other bytecode states like uses_kill, so loading of the +// corresponding tgsi dump and translating it may be required to init those +// states before loading bytecode with this function + +// TODO parse and init exact ngpr in bytecode struct, though it worked so far + +static void r600_debug_load_bytecode(struct r600_pipe_shader *ps, + const char *filename) { + +#define TSIZE 65536 + + FILE *f; + char text[TSIZE]; + + f = fopen(filename, "r"); + assert(f); + + fgets(text, TSIZE, f); + if (strncmp(text,"===== SHADER", 12)) + abort(); + + fgets(text, TSIZE, f); + uint32_t a, u, dw; + lbc_num(text+6, 10, &dw); + + struct r600_bytecode *bc = &ps->shader.bc; + printf("load_bytecode: shader size (dw): %u\n", dw); + + if (dw > bc->ndw) { + free(bc->bytecode); + bc->bytecode = malloc(dw * 4); + } + + bc->ndw = dw; + memset(bc->bytecode, 0, dw * 4); + + while (!feof(f)) { + fgets(text, TSIZE, f); + printf(text); + + if (!strncmp(text, "===== SHADER_END", 16)) + break; + + char *p = lbc_num(text, 10, &a); + + p += 2; + while (lbc_ishex32(p)) { + p = lbc_num(p, 16, &u); + p++; + + printf(" [%u] = %08X\n", a, u); + bc->bytecode[a] = u; + a++; + } + } + + fclose(f); + + printf("bytecode loaded...\n"); +} + +// check and create fake context if required. +// R600_FAKE environment variable is used to enable it and choose chip class: +// R600_FAKE=6 means r6xx, =7 -r7xx, =8 - eg, 9 - cayman. +// exact chip family for each class is hardcoded below. +int r600_debug_check_fake_ctx(struct r600_context *rctx) { + + if (!rctx->fake_context) { + + static int fake_class = -1; + + if (fake_class == -1) + fake_class = debug_get_num_option("R600_FAKE", 0); + + if (fake_class) { + rctx->fake_context = calloc(1, sizeof(struct r600_context)); + rctx->fake_context->screen = malloc(sizeof(struct r600_screen)); + memcpy(rctx->fake_context->screen, rctx->screen, + sizeof(struct r600_screen)); + + switch (fake_class) { + case 6: + rctx->fake_context->chip_class = R600; + rctx->fake_context->family = CHIP_R600; + break; + case 7: + rctx->fake_context->chip_class = R700; + rctx->fake_context->family = CHIP_RV770; + break; + case 8: + rctx->fake_context->chip_class = EVERGREEN; + rctx->fake_context->family = CHIP_REDWOOD; + break; + case 9: + rctx->fake_context->chip_class = CAYMAN; + rctx->fake_context->family = CHIP_CAYMAN; + break; + default: + abort(); + } + + rctx->fake_context->screen->chip_class = + rctx->fake_context->chip_class; + rctx->fake_context->screen->family = rctx->fake_context->family; + + rctx->fake_context->isa = calloc(1, sizeof(struct r600_isa)); + if (!rctx->fake_context->isa || + r600_isa_init(rctx->fake_context, rctx->fake_context->isa)) + abort(); + } + } + + return rctx->fake_context != 0; +} + int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, struct r600_shader_key key) @@ -143,6 +346,8 @@ int r600_pipe_shader_create(struct pipe_context *ctx, unsigned use_sb = rctx->screen->debug_flags & DBG_SB; unsigned sb_disasm = use_sb || (rctx->screen->debug_flags & DBG_SB_DISASM); +// r600_debug_load_tgsi(sel, "/home/vg/tgsi_shader_dump"); + shader->shader.bc.isa = rctx->isa; if (dump) { @@ -153,6 +358,49 @@ int r600_pipe_shader_create(struct pipe_context *ctx, r600_dump_streamout(&sel->so); } } + + if (dump && r600_debug_check_fake_ctx(rctx)) { + + struct r600_pipe_shader fake_shader; + memcpy(&fake_shader, shader, sizeof(fake_shader)); + + fake_shader.shader.bc.isa = rctx->fake_context->isa; + + r = r600_shader_from_tgsi(rctx->fake_context->screen, + &fake_shader, key); + if (r) { + R600_ERR("fakectx: translation from TGSI failed !\n"); + return r; + } + + if (!fake_shader.shader.bc.bytecode) { + r = r600_bytecode_build(&fake_shader.shader.bc); + if (r) { + R600_ERR("fakectx: building bytecode failed !\n"); + return r; + } + } + +// r600_debug_load_bytecode(&fake_shader, "/home/vg/bytecode_dump"); + + if (dump || use_sb) { + r = r600_sb_bytecode_process(rctx->fake_context, + &fake_shader.shader.bc, + &fake_shader.shader, + dump, use_sb); + if (r) { + R600_ERR("fakectx: building optimized bytecode failed !\n"); + return r; + } + } + + } + + // if r600_debug_load_tgsi was used above, further execution + // with unexpected shader probably doesn't make sense. + + // abort(); + r = r600_shader_from_tgsi(rctx->screen, shader, key); if (r) { R600_ERR("translation from TGSI failed !\n"); @@ -169,6 +417,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx, R600_ERR("building bytecode failed !\n"); return r; } + } if (dump && !sb_disasm) { @@ -221,6 +470,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx, default: return -EINVAL; } +// abort(); return 0; } diff --git a/src/gallium/drivers/r600/sb/sb_core.cpp b/src/gallium/drivers/r600/sb/sb_core.cpp index d907508eb2..c69247a756 100644 --- a/src/gallium/drivers/r600/sb/sb_core.cpp +++ b/src/gallium/drivers/r600/sb/sb_core.cpp @@ -77,7 +77,8 @@ void r600_sb_context_destroy(void * sctx) { sb_context *ctx = static_cast<sb_context*>(sctx); if (sb_context::dump_stat) { - sblog << "\ncontext src stats: "; + sblog << "\ncontext stats for: " << ctx->get_hw_chip_name() << "\n"; + sblog << "context src stats: "; ctx->src_stats.dump(); sblog << "context opt stats: "; ctx->opt_stats.dump(); @@ -268,6 +269,7 @@ int r600_sb_bytecode_process(struct r600_context *rctx, sh->opt_stats.ndw = bc->ndw; sh->collect_stats(true); + sblog << "shader stats for " << ctx->get_hw_chip_name() << "\n"; sblog << "src stats: "; sh->src_stats.dump(); sblog << "opt stats: "; |