summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorConnor Abbott <cwabbott0@gmail.com>2019-08-16 12:46:27 +0200
committerConnor Abbott <cwabbott0@gmail.com>2019-08-18 15:15:45 +0200
commitc550d367a747472ee71ed4c99e210174730aa82b (patch)
treec142bc645b52f31c139272ef7bf4846231376f78
parent0e394cda0db60ce8b584aac37913b793847ad518 (diff)
ac/nir: Fix store_scratch with a non-full writemask
By adding one more helper to ac_llvm_build, we can also easily keep vector stores together. Fixes the tests/spec/glsl-1.30/execution/fs-large-local-array-vec4.shader_test piglit test. Fixes: 74470baebbd ("ac/nir: Lower large indirect variables to scratch") Reviewed-by: Marek Olšák <marek.olsak@amd.com
-rw-r--r--src/amd/common/ac_llvm_build.c16
-rw-r--r--src/amd/common/ac_llvm_build.h7
-rw-r--r--src/amd/common/ac_nir_to_llvm.c24
3 files changed, 42 insertions, 5 deletions
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 24970769b874..823bf34acdbc 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -626,6 +626,22 @@ ac_build_expand(struct ac_llvm_context *ctx,
return ac_build_gather_values(ctx, chan, dst_channels);
}
+/* Extract components [start, start + channels) from a vector.
+ */
+LLVMValueRef
+ac_extract_components(struct ac_llvm_context *ctx,
+ LLVMValueRef value,
+ unsigned start,
+ unsigned channels)
+{
+ LLVMValueRef chan[channels];
+
+ for (unsigned i = 0; i < channels; i++)
+ chan[i] = ac_llvm_extract_elem(ctx, value, i + start);
+
+ return ac_build_gather_values(ctx, chan, channels);
+}
+
/* Expand a scalar or vector to <4 x type> by filling the remaining channels
* with undef. Extract at most num_channels components from the input.
*/
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 082201fb048f..6848a7ca082f 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -190,6 +190,13 @@ LLVMValueRef
ac_build_gather_values(struct ac_llvm_context *ctx,
LLVMValueRef *values,
unsigned value_count);
+
+LLVMValueRef
+ac_extract_components(struct ac_llvm_context *ctx,
+ LLVMValueRef value,
+ unsigned start,
+ unsigned channels);
+
LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
LLVMValueRef value,
unsigned num_channels);
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 9b59c82f385f..b981d4cc897d 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3637,13 +3637,27 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
offset);
LLVMTypeRef comp_type =
LLVMIntTypeInContext(ctx->ac.context, instr->src[0].ssa->bit_size);
- LLVMTypeRef vec_type =
- instr->src[0].ssa->num_components == 1 ? comp_type :
- LLVMVectorType(comp_type, instr->src[0].ssa->num_components);
unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
- LLVMPointerType(vec_type, addr_space), "");
- LLVMBuildStore(ctx->ac.builder, get_src(ctx, instr->src[0]), ptr);
+ LLVMPointerType(comp_type, addr_space), "");
+ LLVMValueRef src = get_src(ctx, instr->src[0]);
+ unsigned wrmask = nir_intrinsic_write_mask(instr);
+ while (wrmask) {
+ int start, count;
+ u_bit_scan_consecutive_range(&wrmask, &start, &count);
+
+ LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, start, false);
+ LLVMValueRef offset_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &offset, 1, "");
+ LLVMTypeRef vec_type =
+ count == 1 ? comp_type : LLVMVectorType(comp_type, count);
+ offset_ptr = LLVMBuildBitCast(ctx->ac.builder,
+ offset_ptr,
+ LLVMPointerType(vec_type, addr_space),
+ "");
+ LLVMValueRef offset_src =
+ ac_extract_components(&ctx->ac, src, start, count);
+ LLVMBuildStore(ctx->ac.builder, offset_src, offset_ptr);
+ }
break;
}
default: