summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Graunke <kenneth@whitecape.org>2015-10-13 15:30:03 -0700
committerKenneth Graunke <kenneth@whitecape.org>2015-11-05 15:26:07 -0800
commit8dcf807cb43383590ba193c7ff20b8a98e4a9f65 (patch)
treee9c05a2c3895e154905e993eddd0f8fd9191c6dc
parent5ae37ae6151623303300047d7465d199df8199a4 (diff)
i965: Fix scalar VS float[] and vec2[] output arrays.
The scalar VS backend has never handled float[] and vec2[] outputs correctly (my original code was broken). Outputs need to be padded out to vec4 slots. In fs_visitor::nir_setup_outputs(), we tried to process each vec4 slot by looping from 0 to ALIGN(type_size_scalar(type), 4) / 4. However, this is wrong: type_size_scalar() for a float[2] would return 2, or for vec2[2] it would return 4. This looked like a single slot, even though in reality each array element would be stored in separate vec4 slots. Because of this bug, outputs[] and output_components[] would not get initialized for the second element's VARYING_SLOT, which meant emit_urb_writes() would skip writing them. Nothing used those values, and dead code elimination threw a party. To fix this, we introduce a new type_size_vec4_times_4() function which pads array elements correctly, but still counts in scalar components, generating correct indices in store_output intrinsics. Normally, varying packing avoids this problem by turning varyings into vec4s. So this doesn't actually fix any Piglit or dEQP tests today. However, if varying packing is disabled, things would be broken. Tessellation shaders can't use varying packing, so this fixes various tcs-input Piglit tests on a branch of mine. v2: Shorten the implementation of type_size_4x to a single line (caught by Connor Abbott), and rename it to type_size_vec4_times_4() (renaming suggested by Jason Ekstrand). Use type_size_vec4 rather than using type_size_vec4_times_4 and then dividing by 4. Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp13
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.h1
4 files changed, 17 insertions, 2 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 629fbbdf01..ad94fa479e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -514,6 +514,19 @@ type_size_scalar(const struct glsl_type *type)
}
/**
+ * Returns the number of scalar components needed to store type, assuming
+ * that vectors are padded out to vec4.
+ *
+ * This has the packing rules of type_size_vec4(), but counts components
+ * similar to type_size_scalar().
+ */
+extern "C" int
+type_size_vec4_times_4(const struct glsl_type *type)
+{
+ return 4 * type_size_vec4(type);
+}
+
+/**
* Create a MOV to read the timestamp register.
*
* The caller is responsible for emitting the MOV. The return value is
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index b6f4c52c50..261518605b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -104,7 +104,7 @@ fs_visitor::nir_setup_outputs()
switch (stage) {
case MESA_SHADER_VERTEX:
case MESA_SHADER_GEOMETRY:
- for (unsigned int i = 0; i < ALIGN(type_size_scalar(var->type), 4) / 4; i++) {
+ for (int i = 0; i < type_size_vec4(var->type); i++) {
int output = var->data.location + i;
this->outputs[output] = offset(reg, bld, 4 * i);
this->output_components[output] = vector_elements;
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index a7a5eb511c..dece208233 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -150,7 +150,8 @@ brw_nir_lower_outputs(nir_shader *nir, bool is_scalar)
case MESA_SHADER_GEOMETRY:
if (is_scalar) {
nir_assign_var_locations(&nir->outputs, &nir->num_outputs,
- type_size_scalar);
+ type_size_vec4_times_4);
+ nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4);
} else {
nir_foreach_variable(var, &nir->outputs)
var->data.driver_location = var->data.location;
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 6a2dfc9bbb..29baebf0cc 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -277,6 +277,7 @@ bool brw_cs_precompile(struct gl_context *ctx,
int type_size_scalar(const struct glsl_type *type);
int type_size_vec4(const struct glsl_type *type);
+int type_size_vec4_times_4(const struct glsl_type *type);
bool is_scalar_shader_stage(const struct brw_compiler *compiler, int stage);