diff options
author | Nicolai Hähnle <nicolai.haehnle@amd.com> | 2016-03-10 18:12:44 -0500 |
---|---|---|
committer | Nicolai Hähnle <nicolai.haehnle@amd.com> | 2016-03-18 18:20:30 -0500 |
commit | 7e13cd3ab906c6c8e7658eb69fdc2fbf3c7168ae (patch) | |
tree | b8ec9109ff6c120dfc1e0e304beba9c4b604471a | |
parent | 76e96559f3328db69f87ac7e36f57a50baf24e7f (diff) |
radeonsi: Lower TGSI_OPCODE_MEMBAR down to LLVM op
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 31 |
1 files changed, 31 insertions, 0 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 9ef14bc53c..7c0dc93d00 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2713,6 +2713,35 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data); +/* Prevent optimizations (at least of memory accesses) across the current + * point in the program by emitting empty inline assembly that is marked as + * having side effects. + */ +static void emit_optimization_barrier(struct si_shader_context *ctx) +{ + LLVMBuilderRef builder = ctx->radeon_bld.gallivm.builder; + LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false); + LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, "", "", true, false); + LLVMBuildCall(builder, inlineasm, NULL, 0, ""); +} + +static void membar_emit( + const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) +{ + struct si_shader_context *ctx = si_shader_context(bld_base); + + /* Since memoryBarrier only makes guarantees about atomics and + * coherent image accesses (which bypass TC L1), we do not need to emit + * any special cache handling here. + * + * We do have to prevent LLVM from re-ordering loads across + * the barrier though. + */ + emit_optimization_barrier(ctx); +} + static bool tgsi_is_array_sampler(unsigned target) { return target == TGSI_TEXTURE_1D_ARRAY || @@ -5315,6 +5344,8 @@ static void si_init_shader_ctx(struct si_shader_context *ctx, bld_base->op_actions[TGSI_OPCODE_ATOMIMAX] = tmpl; bld_base->op_actions[TGSI_OPCODE_ATOMIMAX].intr_name = "smax"; + bld_base->op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit; + bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy; bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy; bld_base->op_actions[TGSI_OPCODE_DDX_FINE].emit = si_llvm_emit_ddxy; |