diff options
author | Jordan Justen <jordan.l.justen@intel.com> | 2022-12-08 16:31:20 -0800 |
---|---|---|
committer | Jordan Justen <jordan.l.justen@intel.com> | 2022-12-15 00:54:59 -0800 |
commit | 4ac22783a834893c224032b650c8651a6b92f79b (patch) | |
tree | 0434211e064961c3707d1f424e0a847b25d4abc7 | |
parent | 51d5431a29a810a210c638cd75ffbe5bf0b4286f (diff) |
intel/common/intel_genX_state.h: Add intel_set_ps_dispatch_state()genx-ps-kernel-enable
This replaces brw_fs_get_dispatch_enables(), which was added in
b9403b1c477 ("intel: factor out dispatch PS enabling logic"), but this
function will not work well for future changes to 3DSTATE_PS.
So, instead, this moves the related code into a "genX" file which can
directly update 3DSTATE_PS for the given platform.
Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
-rw-r--r-- | src/gallium/drivers/crocus/crocus_state.c | 9 | ||||
-rw-r--r-- | src/gallium/drivers/iris/iris_state.c | 8 | ||||
-rw-r--r-- | src/intel/blorp/blorp_genX_exec.h | 8 | ||||
-rw-r--r-- | src/intel/common/intel_genX_state.h | 70 | ||||
-rw-r--r-- | src/intel/compiler/brw_compiler.h | 59 | ||||
-rw-r--r-- | src/intel/vulkan/genX_pipeline.c | 8 | ||||
-rw-r--r-- | src/intel/vulkan_hasvk/genX_pipeline.c | 8 |
7 files changed, 86 insertions, 84 deletions
diff --git a/src/gallium/drivers/crocus/crocus_state.c b/src/gallium/drivers/crocus/crocus_state.c index 749e714c4df..626070ed242 100644 --- a/src/gallium/drivers/crocus/crocus_state.c +++ b/src/gallium/drivers/crocus/crocus_state.c @@ -108,6 +108,7 @@ #include "crocus_resource.h" #include "crocus_genx_macros.h" +#include "intel/common/intel_genX_state.h" #include "intel/common/intel_guardband.h" #include "main/macros.h" /* UNCLAMPED_* */ @@ -6446,11 +6447,9 @@ crocus_upload_dirty_render_state(struct crocus_context *ice, */ ps.VectorMaskEnable = GFX_VER >= 8 && wm_prog_data->uses_vmask; - brw_fs_get_dispatch_enables(&batch->screen->devinfo, wm_prog_data, - ice->state.framebuffer.samples, - &ps._8PixelDispatchEnable, - &ps._16PixelDispatchEnable, - &ps._32PixelDispatchEnable); + intel_set_ps_dispatch_state(&ps, &batch->screen->devinfo, + wm_prog_data, + ice->state.framebuffer.samples); ps.DispatchGRFStartRegisterForConstantSetupData0 = brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0); diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 9b623c882fb..90654b5472d 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -109,6 +109,7 @@ #include "iris_utrace.h" #include "iris_genx_macros.h" +#include "intel/common/intel_genX_state.h" #include "intel/common/intel_guardband.h" #include "intel/common/intel_pixel_hash.h" @@ -6228,11 +6229,8 @@ iris_upload_dirty_render_state(struct iris_context *ice, uint32_t ps_state[GENX(3DSTATE_PS_length)] = {0}; _iris_pack_command(batch, GENX(3DSTATE_PS), ps_state, ps) { - brw_fs_get_dispatch_enables(&screen->devinfo, wm_prog_data, - cso_fb->samples, - &ps._8PixelDispatchEnable, - &ps._16PixelDispatchEnable, - &ps._32PixelDispatchEnable); + intel_set_ps_dispatch_state(&ps, &batch->screen->devinfo, + wm_prog_data, cso_fb->samples); ps.DispatchGRFStartRegisterForConstantSetupData0 = brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0); diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h index 9df890533eb..d7b3cb99bcd 100644 --- a/src/intel/blorp/blorp_genX_exec.h +++ b/src/intel/blorp/blorp_genX_exec.h @@ -141,6 +141,7 @@ _blorp_combine_address(struct blorp_batch *batch, void *location, #define __gen_combine_address _blorp_combine_address #include "genxml/genX_pack.h" +#include "common/intel_genX_state.h" #define _blorp_cmd_length(cmd) cmd ## _length #define _blorp_cmd_length_bias(cmd) cmd ## _length_bias @@ -859,11 +860,8 @@ blorp_emit_ps_config(struct blorp_batch *batch, ps.SamplerCount = 0; if (prog_data) { - brw_fs_get_dispatch_enables(devinfo, prog_data, - params->num_samples, - &ps._8PixelDispatchEnable, - &ps._16PixelDispatchEnable, - &ps._32PixelDispatchEnable); + intel_set_ps_dispatch_state(&ps, devinfo, prog_data, + params->num_samples); ps.DispatchGRFStartRegisterForConstantSetupData0 = brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0); diff --git a/src/intel/common/intel_genX_state.h b/src/intel/common/intel_genX_state.h index 3863cbb6a3b..08e7420264e 100644 --- a/src/intel/common/intel_genX_state.h +++ b/src/intel/common/intel_genX_state.h @@ -28,10 +28,80 @@ #error This file should only be included by genX files. #endif +#include <stdbool.h> + +#include "dev/intel_device_info.h" +#include "genxml/gen_macros.h" + #ifdef __cplusplus extern "C" { #endif +#if GFX_VER >= 7 + +static inline void +intel_set_ps_dispatch_state(struct GENX(3DSTATE_PS) *ps, + const struct intel_device_info *devinfo, + const struct brw_wm_prog_data *prog_data, + unsigned rasterization_samples) +{ + assert(rasterization_samples != 0); + + bool enable_8 = prog_data->dispatch_8; + bool enable_16 = prog_data->dispatch_16; + bool enable_32 = prog_data->dispatch_32; + + if (prog_data->persample_dispatch) { + /* TGL PRMs, Volume 2d: Command Reference: Structures: + * 3DSTATE_PS_BODY::32 Pixel Dispatch Enable: + * + * "Must not be enabled when dispatch rate is sample AND NUM_MULTISAMPLES > 1." + */ + if (GFX_VER >= 12 && rasterization_samples > 1) + enable_32 = false; + + /* Starting with SandyBridge (where we first get MSAA), the different + * pixel dispatch combinations are grouped into classifications A + * through F (SNB PRM Vol. 2 Part 1 Section 7.7.1). On most hardware + * generations, the only configurations supporting persample dispatch + * are those in which only one dispatch width is enabled. + * + * The Gfx12 hardware spec has a similar dispatch grouping table, but + * the following conflicting restriction applies (from the page on + * "Structure_3DSTATE_PS_BODY"), so we need to keep the SIMD16 shader: + * + * "SIMD32 may only be enabled if SIMD16 or (dual)SIMD8 is also + * enabled." + */ + if (enable_32 || enable_16) + enable_8 = false; + if (GFX_VER < 12 && enable_32) + enable_16 = false; + } + + /* The docs for 3DSTATE_PS::32 Pixel Dispatch Enable say: + * + * "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, + * SIMD32 Dispatch must not be enabled for PER_PIXEL dispatch + * mode." + * + * 16x MSAA only exists on Gfx9+, so we can skip this on Gfx8. + */ + if (GFX_VER >= 9 && rasterization_samples == 16 && + !prog_data->persample_dispatch) { + assert(enable_8 || enable_16); + enable_32 = false; + } + + assert(enable_8 || enable_16 || enable_32); + + ps->_8PixelDispatchEnable = enable_8; + ps->_16PixelDispatchEnable = enable_16; + ps->_32PixelDispatchEnable = enable_32; +} + +#endif + #ifdef __cplusplus } #endif diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index ea84e551eff..b59b84e558d 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -957,65 +957,6 @@ brw_fs_simd_width_for_ksp(unsigned ksp_idx, bool simd8_enabled, } } -static inline void -brw_fs_get_dispatch_enables(const struct intel_device_info *devinfo, - const struct brw_wm_prog_data *prog_data, - unsigned rasterization_samples, - bool *enable_8, - bool *enable_16, - bool *enable_32) -{ - assert(rasterization_samples != 0); - - *enable_8 = prog_data->dispatch_8; - *enable_16 = prog_data->dispatch_16; - *enable_32 = prog_data->dispatch_32; - - if (prog_data->persample_dispatch) { - /* TGL PRMs, Volume 2d: Command Reference: Structures: - * 3DSTATE_PS_BODY::32 Pixel Dispatch Enable: - * - * "Must not be enabled when dispatch rate is sample AND NUM_MULTISAMPLES > 1." - */ - if (devinfo->ver >= 12 && rasterization_samples > 1) - *enable_32 = false; - - /* Starting with SandyBridge (where we first get MSAA), the different - * pixel dispatch combinations are grouped into classifications A - * through F (SNB PRM Vol. 2 Part 1 Section 7.7.1). On most hardware - * generations, the only configurations supporting persample dispatch - * are those in which only one dispatch width is enabled. - * - * The Gfx12 hardware spec has a similar dispatch grouping table, but - * the following conflicting restriction applies (from the page on - * "Structure_3DSTATE_PS_BODY"), so we need to keep the SIMD16 shader: - * - * "SIMD32 may only be enabled if SIMD16 or (dual)SIMD8 is also - * enabled." - */ - if (*enable_32 || *enable_16) - *enable_8 = false; - if (devinfo->ver < 12 && *enable_32) - *enable_16 = false; - } - - /* The docs for 3DSTATE_PS::32 Pixel Dispatch Enable say: - * - * "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, - * SIMD32 Dispatch must not be enabled for PER_PIXEL dispatch - * mode." - * - * 16x MSAA only exists on Gfx9+, so we can skip this on Gfx8. - */ - if (devinfo->ver >= 9 && rasterization_samples == 16 && - !prog_data->persample_dispatch) { - assert(*enable_8 || *enable_16); - *enable_32 = false; - } - - assert(*enable_8 || *enable_16 || *enable_32); -} - #define brw_wm_state_simd_width_for_ksp(wm_state, ksp_idx) \ brw_fs_simd_width_for_ksp((ksp_idx), (wm_state)._8PixelDispatchEnable, \ (wm_state)._16PixelDispatchEnable, \ diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 2ed9193c6ae..62e4acdc929 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -27,6 +27,7 @@ #include "genxml/genX_pack.h" #include "genxml/genX_rt_pack.h" +#include "common/intel_genX_state.h" #include "common/intel_l3_config.h" #include "common/intel_sample_positions.h" #include "nir/nir_xfb_info.h" @@ -1522,11 +1523,8 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline, const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) { - brw_fs_get_dispatch_enables(devinfo, wm_prog_data, - ms != NULL ? ms->rasterization_samples : 1, - &ps._8PixelDispatchEnable, - &ps._16PixelDispatchEnable, - &ps._32PixelDispatchEnable); + intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data, + ms != NULL ? ms->rasterization_samples : 1); ps.KernelStartPointer0 = fs_bin->kernel.offset + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0); diff --git a/src/intel/vulkan_hasvk/genX_pipeline.c b/src/intel/vulkan_hasvk/genX_pipeline.c index 76c5249ac00..e7d066f071e 100644 --- a/src/intel/vulkan_hasvk/genX_pipeline.c +++ b/src/intel/vulkan_hasvk/genX_pipeline.c @@ -27,6 +27,7 @@ #include "genxml/genX_pack.h" #include "genxml/genX_rt_pack.h" +#include "common/intel_genX_state.h" #include "common/intel_l3_config.h" #include "common/intel_sample_positions.h" #include "nir/nir_xfb_info.h" @@ -1677,11 +1678,8 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline, #endif anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) { - brw_fs_get_dispatch_enables(devinfo, wm_prog_data, - ms != NULL ? ms->rasterization_samples : 1, - &ps._8PixelDispatchEnable, - &ps._16PixelDispatchEnable, - &ps._32PixelDispatchEnable); + intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data, + ms != NULL ? ms->rasterization_samples : 1); ps.KernelStartPointer0 = fs_bin->kernel.offset + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0); |