diff options
author | Connor Abbott <cwabbott0@gmail.com> | 2019-08-16 12:46:27 +0200 |
---|---|---|
committer | Connor Abbott <cwabbott0@gmail.com> | 2019-08-18 15:15:45 +0200 |
commit | c550d367a747472ee71ed4c99e210174730aa82b (patch) | |
tree | c142bc645b52f31c139272ef7bf4846231376f78 | |
parent | 0e394cda0db60ce8b584aac37913b793847ad518 (diff) |
ac/nir: Fix store_scratch with a non-full writemask
By adding one more helper to ac_llvm_build, we can also easily keep
vector stores together.
Fixes the
tests/spec/glsl-1.30/execution/fs-large-local-array-vec4.shader_test
piglit test.
Fixes: 74470baebbd ("ac/nir: Lower large indirect variables to scratch")
Reviewed-by: Marek Olšák <marek.olsak@amd.com
-rw-r--r-- | src/amd/common/ac_llvm_build.c | 16 | ||||
-rw-r--r-- | src/amd/common/ac_llvm_build.h | 7 | ||||
-rw-r--r-- | src/amd/common/ac_nir_to_llvm.c | 24 |
3 files changed, 42 insertions, 5 deletions
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 24970769b874..823bf34acdbc 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -626,6 +626,22 @@ ac_build_expand(struct ac_llvm_context *ctx, return ac_build_gather_values(ctx, chan, dst_channels); } +/* Extract components [start, start + channels) from a vector. + */ +LLVMValueRef +ac_extract_components(struct ac_llvm_context *ctx, + LLVMValueRef value, + unsigned start, + unsigned channels) +{ + LLVMValueRef chan[channels]; + + for (unsigned i = 0; i < channels; i++) + chan[i] = ac_llvm_extract_elem(ctx, value, i + start); + + return ac_build_gather_values(ctx, chan, channels); +} + /* Expand a scalar or vector to <4 x type> by filling the remaining channels * with undef. Extract at most num_channels components from the input. */ diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index 082201fb048f..6848a7ca082f 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -190,6 +190,13 @@ LLVMValueRef ac_build_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values, unsigned value_count); + +LLVMValueRef +ac_extract_components(struct ac_llvm_context *ctx, + LLVMValueRef value, + unsigned start, + unsigned channels); + LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx, LLVMValueRef value, unsigned num_channels); diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 9b59c82f385f..b981d4cc897d 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -3637,13 +3637,27 @@ static void visit_intrinsic(struct ac_nir_context *ctx, offset); LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->ac.context, instr->src[0].ssa->bit_size); - LLVMTypeRef vec_type = - instr->src[0].ssa->num_components == 1 ? comp_type : - LLVMVectorType(comp_type, instr->src[0].ssa->num_components); unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, - LLVMPointerType(vec_type, addr_space), ""); - LLVMBuildStore(ctx->ac.builder, get_src(ctx, instr->src[0]), ptr); + LLVMPointerType(comp_type, addr_space), ""); + LLVMValueRef src = get_src(ctx, instr->src[0]); + unsigned wrmask = nir_intrinsic_write_mask(instr); + while (wrmask) { + int start, count; + u_bit_scan_consecutive_range(&wrmask, &start, &count); + + LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, start, false); + LLVMValueRef offset_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &offset, 1, ""); + LLVMTypeRef vec_type = + count == 1 ? comp_type : LLVMVectorType(comp_type, count); + offset_ptr = LLVMBuildBitCast(ctx->ac.builder, + offset_ptr, + LLVMPointerType(vec_type, addr_space), + ""); + LLVMValueRef offset_src = + ac_extract_components(&ctx->ac, src, start, count); + LLVMBuildStore(ctx->ac.builder, offset_src, offset_ptr); + } break; } default: |