diff options
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 70 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.h | 5 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_shaders.c | 6 |
3 files changed, 76 insertions, 5 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 43619dd329..d151ba4c5c 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -98,10 +98,10 @@ static void si_build_ps_prolog_function(struct si_shader_context *ctx, static void si_build_ps_epilog_function(struct si_shader_context *ctx, union si_shader_part_key *key); -/* Ideally pass the sample mask input to the PS epilog as v13, which +/* Ideally pass the sample mask input to the PS epilog as v14, which * is its usual location, so that the shader doesn't have to add v_mov. */ -#define PS_EPILOG_SAMPLEMASK_MIN_LOC 13 +#define PS_EPILOG_SAMPLEMASK_MIN_LOC 14 enum { CONST_ADDR_SPACE = 2, @@ -4553,6 +4553,7 @@ static void create_function(struct si_shader_context *ctx) shader->info.face_vgpr_index = 20; add_arg_assign_checked(&fninfo, ARG_VGPR, ctx->i32, &ctx->abi.ancillary, SI_PARAM_ANCILLARY); + shader->info.ancillary_vgpr_index = 21; add_arg_assign_checked(&fninfo, ARG_VGPR, ctx->f32, &ctx->abi.sample_coverage, SI_PARAM_SAMPLE_COVERAGE); add_arg_checked(&fninfo, ARG_VGPR, ctx->i32, SI_PARAM_POS_FIXED_PT); @@ -4624,6 +4625,7 @@ static void create_function(struct si_shader_context *ctx) S_0286D0_LINEAR_CENTER_ENA(1) | S_0286D0_LINEAR_CENTROID_ENA(1) | S_0286D0_FRONT_FACE_ENA(1) | + S_0286D0_ANCILLARY_ENA(1) | S_0286D0_POS_FIXED_PT_ENA(1)); } @@ -5830,6 +5832,7 @@ static void si_get_ps_prolog_key(struct si_shader *shader, key->ps_prolog.states.force_linear_center_interp || key->ps_prolog.states.bc_optimize_for_persp || key->ps_prolog.states.bc_optimize_for_linear); + key->ps_prolog.ancillary_vgpr_index = shader->info.ancillary_vgpr_index; if (info->colors_read) { unsigned *color = shader->selector->color_attr_index; @@ -5939,7 +5942,8 @@ static bool si_need_ps_prolog(const union si_shader_part_key *key) key->ps_prolog.states.force_linear_center_interp || key->ps_prolog.states.bc_optimize_for_persp || key->ps_prolog.states.bc_optimize_for_linear || - key->ps_prolog.states.poly_stipple; + key->ps_prolog.states.poly_stipple || + key->ps_prolog.states.samplemask_log_ps_iter; } /** @@ -6583,6 +6587,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, if (ctx.type == PIPE_SHADER_FRAGMENT) { shader->info.num_input_vgprs = 0; shader->info.face_vgpr_index = -1; + shader->info.ancillary_vgpr_index = -1; if (G_0286CC_PERSP_SAMPLE_ENA(shader->config.spi_ps_input_addr)) shader->info.num_input_vgprs += 2; @@ -6612,8 +6617,10 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, shader->info.face_vgpr_index = shader->info.num_input_vgprs; shader->info.num_input_vgprs += 1; } - if (G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr)) + if (G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr)) { + shader->info.ancillary_vgpr_index = shader->info.num_input_vgprs; shader->info.num_input_vgprs += 1; + } if (G_0286CC_SAMPLE_COVERAGE_ENA(shader->config.spi_ps_input_addr)) shader->info.num_input_vgprs += 1; if (G_0286CC_POS_FIXED_PT_ENA(shader->config.spi_ps_input_addr)) @@ -7285,6 +7292,54 @@ static void si_build_ps_prolog_function(struct si_shader_context *ctx, } } + /* Section 15.2.2 (Shader Inputs) of the OpenGL 4.5 (Core Profile) spec + * says: + * + * "When per-sample shading is active due to the use of a fragment + * input qualified by sample or due to the use of the gl_SampleID + * or gl_SamplePosition variables, only the bit for the current + * sample is set in gl_SampleMaskIn. When state specifies multiple + * fragment shader invocations for a given fragment, the sample + * mask for any single fragment shader invocation may specify a + * subset of the covered samples for the fragment. In this case, + * the bit corresponding to each covered sample will be set in + * exactly one fragment shader invocation." + * + * The samplemask loaded by hardware is always the coverage of the + * entire pixel/fragment, so mask bits out based on the sample ID. + */ + if (key->ps_prolog.states.samplemask_log_ps_iter) { + /* The bit pattern matches that used by fixed function fragment + * processing. */ + static const uint16_t ps_iter_masks[] = { + 0xffff, /* not used */ + 0x5555, + 0x1111, + 0x0101, + 0x0001, + }; + assert(key->ps_prolog.states.samplemask_log_ps_iter < ARRAY_SIZE(ps_iter_masks)); + + uint32_t ps_iter_mask = ps_iter_masks[key->ps_prolog.states.samplemask_log_ps_iter]; + unsigned ancillary_vgpr = key->ps_prolog.num_input_sgprs + + key->ps_prolog.ancillary_vgpr_index; + LLVMValueRef sampleid = unpack_param(ctx, ancillary_vgpr, 8, 4); + LLVMValueRef samplemask = LLVMGetParam(func, ancillary_vgpr + 1); + + samplemask = LLVMBuildBitCast(gallivm->builder, samplemask, ctx->i32, ""); + samplemask = LLVMBuildAnd( + gallivm->builder, + samplemask, + LLVMBuildShl(gallivm->builder, + LLVMConstInt(ctx->i32, ps_iter_mask, false), + sampleid, ""), + ""); + samplemask = LLVMBuildBitCast(gallivm->builder, samplemask, ctx->f32, ""); + + ret = LLVMBuildInsertValue(gallivm->builder, ret, samplemask, + ancillary_vgpr + 1, ""); + } + /* Tell LLVM to insert WQM instruction sequence when needed. */ if (key->ps_prolog.wqm) { LLVMAddTargetDependentFunctionAttr(func, @@ -7481,6 +7536,12 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen, assert(G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_addr)); } + /* Samplemask fixup requires the sample ID. */ + if (shader->key.part.ps.prolog.samplemask_log_ps_iter) { + shader->config.spi_ps_input_ena |= S_0286CC_ANCILLARY_ENA(1); + assert(G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr)); + } + /* The sample mask input is always enabled, because the API shader always * passes it through to the epilog. Disable it here if it's unused. */ @@ -7565,6 +7626,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, shader->info.num_input_sgprs = mainp->info.num_input_sgprs; shader->info.num_input_vgprs = mainp->info.num_input_vgprs; shader->info.face_vgpr_index = mainp->info.face_vgpr_index; + shader->info.ancillary_vgpr_index = mainp->info.ancillary_vgpr_index; memcpy(shader->info.vs_output_param_offset, mainp->info.vs_output_param_offset, sizeof(mainp->info.vs_output_param_offset)); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 4592ac551c..641f49cbac 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -424,6 +424,7 @@ struct si_ps_prolog_bits { unsigned force_linear_center_interp:1; unsigned bc_optimize_for_persp:1; unsigned bc_optimize_for_linear:1; + unsigned samplemask_log_ps_iter:3; }; /* Common PS bits between the shader key and the epilog key. */ @@ -465,6 +466,7 @@ union si_shader_part_key { unsigned colors_read:8; /* color input components read */ unsigned num_interp_inputs:5; /* BCOLOR is at this location */ unsigned face_vgpr_index:5; + unsigned ancillary_vgpr_index:5; unsigned wqm:1; char color_attr_index[2]; char color_interp_vgpr_index[2]; /* -1 == constant */ @@ -557,7 +559,8 @@ struct si_shader_info { ubyte vs_output_param_offset[SI_MAX_VS_OUTPUTS]; ubyte num_input_sgprs; ubyte num_input_vgprs; - char face_vgpr_index; + signed char face_vgpr_index; + signed char ancillary_vgpr_index; bool uses_instanceid; ubyte nr_pos_exports; ubyte nr_param_exports; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index ac8b65e398..6d87cab988 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1425,6 +1425,12 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, sctx->framebuffer.nr_samples <= 1; key->part.ps.epilog.clamp_color = rs->clamp_fragment_color; + if (sctx->ps_iter_samples > 1 && + sel->info.reads_samplemask) { + key->part.ps.prolog.samplemask_log_ps_iter = + util_logbase2(util_next_power_of_two(sctx->ps_iter_samples)); + } + if (rs->force_persample_interp && rs->multisample_enable && sctx->framebuffer.nr_samples > 1 && |