summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>2023-05-10 08:16:59 +0300
committerMarge Bot <emma+marge@anholt.net>2023-05-11 08:01:46 +0000
commitb4b17f8aaac83fa5ff9532533697fa643a8c5741 (patch)
tree752beab9674175892818a9f2fd40faf18a94c739
parent728e316864712e8714bfc641d5c3a6efa036edbb (diff)
Revert "intel/compiler: make uses_pos_offset a tri-state"
This reverts commit 5489033fa8568ecacafe32ceab36f89f2e14f3dc. The problem I was trying to address is that we were programming the 3DSTATE_PS::PositionXYOffsetSelect bit differently with GPL (CENTROID) than without (NONE). I failed to understand that this bit also impacts the thread payload layout. GPL fragment shaders don't know ahead of time if pos_offset is going to be used. It'll be choosen at runtime base on push constant bits. So we need to program this bit different just to have a payload matching the compiled shader code. This fixes the freedoom replay with GPL FS shader in SIMD32. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Ivan Briano <ivan.briano@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22938>
-rw-r--r--src/gallium/drivers/crocus/crocus_state.c12
-rw-r--r--src/gallium/drivers/iris/iris_state.c4
-rw-r--r--src/intel/compiler/brw_compiler.h22
-rw-r--r--src/intel/compiler/brw_fs.cpp16
-rw-r--r--src/intel/vulkan/genX_pipeline.c6
-rw-r--r--src/intel/vulkan_hasvk/genX_pipeline.c6
6 files changed, 17 insertions, 49 deletions
diff --git a/src/gallium/drivers/crocus/crocus_state.c b/src/gallium/drivers/crocus/crocus_state.c
index 0bc05a2d7de..28fa9e3fd65 100644
--- a/src/gallium/drivers/crocus/crocus_state.c
+++ b/src/gallium/drivers/crocus/crocus_state.c
@@ -6505,9 +6505,7 @@ crocus_upload_dirty_render_state(struct crocus_context *ice,
* look useful at the moment. We might need this in future.
*/
ps.PositionXYOffsetSelect =
- brw_wm_prog_data_uses_position_xy_offset(wm_prog_data,
- 0 /* msaa_flags */) ?
- POSOFFSET_SAMPLE : POSOFFSET_NONE;
+ wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE : POSOFFSET_NONE;
if (wm_prog_data->base.total_scratch) {
struct crocus_bo *bo = crocus_get_scratch_space(ice, wm_prog_data->base.total_scratch, MESA_SHADER_FRAGMENT);
@@ -7274,10 +7272,10 @@ crocus_upload_dirty_render_state(struct crocus_context *ice,
* We only require XY sample offsets. So, this recommendation doesn't
* look useful at the moment. We might need this in future.
*/
- wm.PositionXYOffsetSelect =
- brw_wm_prog_data_uses_position_xy_offset(wm_prog_data,
- 0 /* msaa_flags */) ?
- POSOFFSET_SAMPLE : POSOFFSET_NONE;
+ if (wm_prog_data->uses_pos_offset)
+ wm.PositionXYOffsetSelect = POSOFFSET_SAMPLE;
+ else
+ wm.PositionXYOffsetSelect = POSOFFSET_NONE;
#endif
wm.LineStippleEnable = cso->cso.line_stipple_enable;
wm.PolygonStippleEnable = cso->cso.poly_stipple_enable;
diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c
index 748bea90986..ea0271f407f 100644
--- a/src/gallium/drivers/iris/iris_state.c
+++ b/src/gallium/drivers/iris/iris_state.c
@@ -4936,9 +4936,7 @@ iris_store_fs_state(const struct intel_device_info *devinfo,
* look useful at the moment. We might need this in future.
*/
ps.PositionXYOffsetSelect =
- brw_wm_prog_data_uses_position_xy_offset(wm_prog_data,
- 0 /* msaa_flags */) ?
- POSOFFSET_SAMPLE : POSOFFSET_NONE;
+ wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE : POSOFFSET_NONE;
if (prog_data->total_scratch) {
INIT_THREAD_SCRATCH_SIZE(ps);
diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h
index 7acffd62742..f456acc35ae 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -923,8 +923,7 @@ struct brw_wm_prog_data {
bool dispatch_16;
bool dispatch_32;
bool dual_src_blend;
- enum brw_sometimes uses_pos_offset;
- bool read_pos_offset_input;
+ bool uses_pos_offset;
bool uses_omask;
bool uses_kill;
bool uses_src_depth;
@@ -1187,25 +1186,6 @@ brw_wm_prog_data_is_coarse(const struct brw_wm_prog_data *prog_data,
return prog_data->coarse_pixel_dispatch;
}
-static inline bool
-brw_wm_prog_data_uses_position_xy_offset(const struct brw_wm_prog_data *prog_data,
- enum brw_wm_msaa_flags pushed_msaa_flags)
-{
- bool per_sample;
- if (pushed_msaa_flags & BRW_WM_MSAA_FLAG_ENABLE_DYNAMIC) {
- per_sample = (pushed_msaa_flags & BRW_WM_MSAA_FLAG_PERSAMPLE_INTERP) != 0;
- } else {
- assert(prog_data->persample_dispatch == BRW_ALWAYS ||
- prog_data->persample_dispatch == BRW_NEVER);
- per_sample = prog_data->persample_dispatch == BRW_ALWAYS;
- }
-
- if (!per_sample)
- return false;
-
- return prog_data->read_pos_offset_input;
-}
-
struct brw_push_const_block {
unsigned dwords; /* Dword count, not reg aligned */
unsigned regs;
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 0a0ed57fd9a..59637d36fb2 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -7365,16 +7365,12 @@ brw_nir_populate_wm_prog_data(const nir_shader *shader,
* per-sample dispatch. If we need gl_SamplePosition and we don't have
* persample dispatch, we hard-code it to 0.5.
*/
- prog_data->read_pos_offset_input =
- BITSET_TEST(shader->info.system_values_read,
- SYSTEM_VALUE_SAMPLE_POS) ||
- BITSET_TEST(shader->info.system_values_read,
- SYSTEM_VALUE_SAMPLE_POS_OR_CENTER);
-
- if (prog_data->read_pos_offset_input)
- prog_data->uses_pos_offset = prog_data->persample_dispatch;
- else
- prog_data->uses_pos_offset = BRW_NEVER;
+ prog_data->uses_pos_offset =
+ prog_data->persample_dispatch != BRW_NEVER &&
+ (BITSET_TEST(shader->info.system_values_read,
+ SYSTEM_VALUE_SAMPLE_POS) ||
+ BITSET_TEST(shader->info.system_values_read,
+ SYSTEM_VALUE_SAMPLE_POS_OR_CENTER));
}
prog_data->has_render_target_reads = shader->info.outputs_read != 0ull;
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c
index 1e5a76da2c2..5f4a56f3337 100644
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -1535,10 +1535,8 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
ps.PushConstantEnable = wm_prog_data->base.nr_params > 0 ||
wm_prog_data->base.ubo_ranges[0].length;
ps.PositionXYOffsetSelect =
- !brw_wm_prog_data_uses_position_xy_offset(wm_prog_data,
- pipeline->fs_msaa_flags) ?
- POSOFFSET_NONE :
- (persample ? POSOFFSET_SAMPLE : POSOFFSET_CENTROID);
+ !wm_prog_data->uses_pos_offset ? POSOFFSET_NONE :
+ persample ? POSOFFSET_SAMPLE : POSOFFSET_CENTROID;
ps.MaximumNumberofThreadsPerPSD = devinfo->max_threads_per_psd - 1;
diff --git a/src/intel/vulkan_hasvk/genX_pipeline.c b/src/intel/vulkan_hasvk/genX_pipeline.c
index 9a8483af687..bd13d520686 100644
--- a/src/intel/vulkan_hasvk/genX_pipeline.c
+++ b/src/intel/vulkan_hasvk/genX_pipeline.c
@@ -1695,10 +1695,8 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
ps.BindingTableEntryCount = fs_bin->bind_map.surface_count;
ps.PushConstantEnable = wm_prog_data->base.nr_params > 0 ||
wm_prog_data->base.ubo_ranges[0].length;
- ps.PositionXYOffsetSelect =
- brw_wm_prog_data_uses_position_xy_offset(wm_prog_data,
- 0 /* msaa_flags */) ?
- POSOFFSET_SAMPLE : POSOFFSET_NONE;
+ ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ?
+ POSOFFSET_SAMPLE: POSOFFSET_NONE;
#if GFX_VER < 8
ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;