diff options
-rw-r--r-- | src/amd/common/ac_llvm_build.c | 43 | ||||
-rw-r--r-- | src/amd/common/ac_llvm_build.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 45 |
3 files changed, 47 insertions, 43 deletions
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 2503608e26..9045df7a0d 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -36,6 +36,7 @@ #include "ac_exp_param.h" #include "util/bitscan.h" #include "util/macros.h" +#include "util/u_atomic.h" #include "sid.h" #include "shader_enums.h" @@ -200,6 +201,48 @@ void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize) } } +/* Prevent optimizations (at least of memory accesses) across the current + * point in the program by emitting empty inline assembly that is marked as + * having side effects. + * + * Optionally, a value can be passed through the inline assembly to prevent + * LLVM from hoisting calls to ReadNone functions. + */ +void +ac_build_optimization_barrier(struct ac_llvm_context *ctx, + LLVMValueRef *pvgpr) +{ + static int counter = 0; + + LLVMBuilderRef builder = ctx->builder; + char code[16]; + + snprintf(code, sizeof(code), "; %d", p_atomic_inc_return(&counter)); + + if (!pvgpr) { + LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false); + LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "", true, false); + LLVMBuildCall(builder, inlineasm, NULL, 0, ""); + } else { + LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, &ctx->i32, 1, false); + LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "=v,0", true, false); + LLVMValueRef vgpr = *pvgpr; + LLVMTypeRef vgpr_type = LLVMTypeOf(vgpr); + unsigned vgpr_size = ac_get_type_size(vgpr_type); + LLVMValueRef vgpr0; + + assert(vgpr_size % 4 == 0); + + vgpr = LLVMBuildBitCast(builder, vgpr, LLVMVectorType(ctx->i32, vgpr_size / 4), ""); + vgpr0 = LLVMBuildExtractElement(builder, vgpr, ctx->i32_0, ""); + vgpr0 = LLVMBuildCall(builder, inlineasm, &vgpr0, 1, ""); + vgpr = LLVMBuildInsertElement(builder, vgpr, vgpr0, ctx->i32_0, ""); + vgpr = LLVMBuildBitCast(builder, vgpr, vgpr_type, ""); + + *pvgpr = vgpr; + } +} + LLVMValueRef ac_build_gather_values_extended(struct ac_llvm_context *ctx, LLVMValueRef *values, diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index cbc938d5a0..798fcebe17 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -75,6 +75,8 @@ ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name, void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize); +void ac_build_optimization_barrier(struct ac_llvm_context *ctx, + LLVMValueRef *pvgpr); LLVMValueRef ac_build_gather_values_extended(struct ac_llvm_context *ctx, LLVMValueRef *values, diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 1124947885..705629d1a2 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -3463,47 +3463,6 @@ static void si_llvm_return_fs_outputs(struct ac_shader_abi *abi, ctx->return_value = ret; } -/* Prevent optimizations (at least of memory accesses) across the current - * point in the program by emitting empty inline assembly that is marked as - * having side effects. - * - * Optionally, a value can be passed through the inline assembly to prevent - * LLVM from hoisting calls to ReadNone functions. - */ -static void emit_optimization_barrier(struct si_shader_context *ctx, - LLVMValueRef *pvgpr) -{ - static int counter = 0; - - LLVMBuilderRef builder = ctx->gallivm.builder; - char code[16]; - - snprintf(code, sizeof(code), "; %d", p_atomic_inc_return(&counter)); - - if (!pvgpr) { - LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false); - LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "", true, false); - LLVMBuildCall(builder, inlineasm, NULL, 0, ""); - } else { - LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, &ctx->i32, 1, false); - LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "=v,0", true, false); - LLVMValueRef vgpr = *pvgpr; - LLVMTypeRef vgpr_type = LLVMTypeOf(vgpr); - unsigned vgpr_size = ac_get_type_size(vgpr_type); - LLVMValueRef vgpr0; - - assert(vgpr_size % 4 == 0); - - vgpr = LLVMBuildBitCast(builder, vgpr, LLVMVectorType(ctx->i32, vgpr_size / 4), ""); - vgpr0 = LLVMBuildExtractElement(builder, vgpr, ctx->i32_0, ""); - vgpr0 = LLVMBuildCall(builder, inlineasm, &vgpr0, 1, ""); - vgpr = LLVMBuildInsertElement(builder, vgpr, vgpr0, ctx->i32_0, ""); - vgpr = LLVMBuildBitCast(builder, vgpr, vgpr_type, ""); - - *pvgpr = vgpr; - } -} - void si_emit_waitcnt(struct si_shader_context *ctx, unsigned simm16) { struct gallivm_state *gallivm = &ctx->gallivm; @@ -3801,7 +3760,7 @@ static LLVMValueRef si_emit_ballot(struct si_shader_context *ctx, /* We currently have no other way to prevent LLVM from lifting the icmp * calls to a dominating basic block. */ - emit_optimization_barrier(ctx, &args[0]); + ac_build_optimization_barrier(&ctx->ac, &args[0]); if (LLVMTypeOf(args[0]) != ctx->i32) args[0] = LLVMBuildBitCast(gallivm->builder, args[0], ctx->i32, ""); @@ -3912,7 +3871,7 @@ static void read_lane_emit( /* We currently have no other way to prevent LLVM from lifting the icmp * calls to a dominating basic block. */ - emit_optimization_barrier(ctx, &emit_data->args[0]); + ac_build_optimization_barrier(&ctx->ac, &emit_data->args[0]); for (unsigned i = 0; i < emit_data->arg_count; ++i) { emit_data->args[i] = LLVMBuildBitCast(builder, emit_data->args[i], |