diff options
author | Kenneth Graunke <kenneth@whitecape.org> | 2015-05-15 10:03:42 -0700 |
---|---|---|
committer | Carl Worth <cworth@cworth.org> | 2015-06-11 11:13:29 -0700 |
commit | 7180a61d68ba467960f989bbcec58b6f02ac8212 (patch) | |
tree | 6000d800a75d39bcacdd75adbe50db38ca3521d5 | |
parent | 588d13f18dca2fb5754c0d62a6d8029230b17ef6 (diff) |
i965: Create a vue_maps_equal() helper.jenkins
Currently, this just does the existing memcmp(). It will be optimized
shortly.
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
i965: Optimize VUE map comparisons.
struct brw_vue_map is 136 bytes; doing a single 8-byte comparison is
much cheaper and should work just as well.
Even if this changes in the future, the comparisons are now done by a
centralized helper function, so it should be easy to change.
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
stash
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_gs.c | 30 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_state_upload.c | 106 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vs.c | 51 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vue_map.c | 61 |
5 files changed, 156 insertions, 94 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 2dcc23c5fc..918c079cd6 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -528,6 +528,8 @@ void brw_compute_vue_map(const struct brw_device_info *devinfo, struct brw_vue_map *vue_map, GLbitfield64 slots_valid); +GLbitfield64 brw_compute_output_vue_slots(const struct brw_context *brw, + const struct gl_program *prog); /** * Bitmask indicating which fragment shader inputs represent varyings (and diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 52c73031a3..90aca3282c 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -115,19 +115,8 @@ brw_codegen_gs_prog(struct brw_context *brw, c.prog_data.control_data_header_size_hwords = ALIGN(c.control_data_header_size_bits, 256) / 256; - GLbitfield64 outputs_written = gp->program.Base.OutputsWritten; - - /* In order for legacy clipping to work, we need to populate the clip - * distance varying slots whenever clipping is enabled, even if the vertex - * shader doesn't write to gl_ClipDistance. - */ - if (c.key.base.userclip_active) { - outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0); - outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1); - } - - brw_compute_vue_map(brw->intelScreen->devinfo, - &c.prog_data.base.vue_map, outputs_written); + /* BRW_NEW_VUE_MAP_GEOM_OUT */ + c.prog_data.base.vue_map = brw->vue_map_geom_out; /* Compute the output vertex size. * @@ -297,7 +286,8 @@ brw_gs_state_dirty(struct brw_context *brw) _NEW_TEXTURE, BRW_NEW_GEOMETRY_PROGRAM | BRW_NEW_TRANSFORM_FEEDBACK | - BRW_NEW_VUE_MAP_VS); + BRW_NEW_VUE_MAP_VS | + BRW_NEW_VUE_MAP_GEOM_OUT); } static void @@ -339,12 +329,6 @@ brw_upload_gs_prog(struct brw_context *brw) return; if (gp == NULL) { - /* No geometry shader. Vertex data just passes straight through. */ - if (brw->ctx.NewDriverState & BRW_NEW_VUE_MAP_VS) { - brw->vue_map_geom_out = brw->vue_map_vs; - brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT; - } - if (brw->gen == 6 && (brw->ctx.NewDriverState & BRW_NEW_TRANSFORM_FEEDBACK)) { gen6_brw_upload_ff_gs_prog(brw); @@ -371,12 +355,6 @@ brw_upload_gs_prog(struct brw_context *brw) (void)success; } brw->gs.base.prog_data = &brw->gs.prog_data->base.base; - - if (memcmp(&brw->gs.prog_data->base.vue_map, &brw->vue_map_geom_out, - sizeof(brw->vue_map_geom_out)) != 0) { - brw->vue_map_geom_out = brw->gs.prog_data->base.vue_map; - brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT; - } } bool diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 84b0861aaa..6cf3deb046 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -652,6 +652,90 @@ check_and_emit_atom(struct brw_context *brw, } } +static void +select_shader_programs(struct brw_context *brw, enum brw_pipeline pipeline) +{ + struct gl_context *ctx = &brw->ctx; + const struct brw_device_info *devinfo = brw->intelScreen->devinfo; + + /* Compute is simple, handle it separately */ + if (pipeline == BRW_COMPUTE_PIPELINE) { + if (brw->compute_program != ctx->ComputeProgram._Current) { + brw->compute_program = ctx->ComputeProgram._Current; + ctx->NewDriverState |= BRW_NEW_COMPUTE_PROGRAM; + } + return; + } + + assert(pipeline == BRW_RENDER_PIPELINE); + + const struct gl_program *progs[MESA_SHADER_STAGES] = { + [MESA_SHADER_VERTEX] = (struct gl_program *) ctx->VertexProgram._Current, + [MESA_SHADER_GEOMETRY] = (struct gl_program *) ctx->GeometryProgram._Current, + [MESA_SHADER_FRAGMENT] = (struct gl_program *) ctx->FragmentProgram._Current + }; + + const struct gl_program **brw_prog_ptrs[MESA_SHADER_STAGES] = { + [MESA_SHADER_VERTEX] = (const struct gl_program **) &brw->vertex_program, + [MESA_SHADER_GEOMETRY] = (const struct gl_program **) &brw->geometry_program, + [MESA_SHADER_FRAGMENT] = (const struct gl_program **) &brw->fragment_program + }; + + struct brw_vue_map *vue_maps[MESA_SHADER_FRAGMENT] = { + [MESA_SHADER_VERTEX] = &brw->vue_map_vs, + [MESA_SHADER_GEOMETRY] = &brw->vue_map_geom_out, + }; + + static const uint64_t brw_new_program_dirty_bit[MESA_SHADER_STAGES] = { + [MESA_SHADER_VERTEX] = BRW_NEW_VERTEX_PROGRAM, + [MESA_SHADER_GEOMETRY] = BRW_NEW_GEOMETRY_PROGRAM, + [MESA_SHADER_FRAGMENT] = BRW_NEW_FRAGMENT_PROGRAM, + [MESA_SHADER_COMPUTE] = BRW_NEW_COMPUTE_PROGRAM, + }; + + static const uint64_t brw_new_vue_map_dirty_bit[MESA_SHADER_FRAGMENT] = { + [MESA_SHADER_VERTEX] = BRW_NEW_VUE_MAP_VS, + [MESA_SHADER_GEOMETRY] = BRW_NEW_VUE_MAP_GEOM_OUT, + }; + + for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) { + /* Update (e.g.) brw->vertex_program and flag BRW_NEW_VERTEX_PROGRAM. */ + if (*brw_prog_ptrs[i] != progs[i]) { + *brw_prog_ptrs[i] = progs[i]; + ctx->NewDriverState |= brw_new_program_dirty_bit[i]; + } + } + + /* Recompute the VUE maps for each stage, if necessary, and flag + * BRW_NEW_VUE_MAP_<stage>. + */ + for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { + /* See brw_compute_output_vue_slots */ + if (!brw_state_dirty(brw, _NEW_TRANSFORM | _NEW_POINT | _NEW_POLYGON, + brw_new_program_dirty_bit[i])) + continue; + + GLbitfield64 outputs; + if (progs[i]) { + /* The program exists; compute its output layout. */ + outputs = brw_compute_output_vue_slots(brw, progs[i]); + } else { + /* This stage is disabled; the previous active stage's outputs are + * passed through. + */ + int prev_stage; + for (prev_stage = i - 1; !progs[prev_stage]; i--); + outputs = vue_maps[prev_stage]->slots_valid; + } + + /* If the outputs changed, recompute the VUE map and flag the bit. */ + if (vue_maps[i]->slots_valid != outputs) { + brw_compute_vue_map(devinfo, vue_maps[i], outputs); + ctx->NewDriverState |= brw_new_vue_map_dirty_bit[i]; + } + } +} + static inline void brw_upload_pipeline_state(struct brw_context *brw, enum brw_pipeline pipeline) @@ -669,27 +753,7 @@ brw_upload_pipeline_state(struct brw_context *brw, ctx->NewDriverState = ~0ull; } - if (pipeline == BRW_RENDER_PIPELINE) { - if (brw->fragment_program != ctx->FragmentProgram._Current) { - brw->fragment_program = ctx->FragmentProgram._Current; - brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM; - } - - if (brw->geometry_program != ctx->GeometryProgram._Current) { - brw->geometry_program = ctx->GeometryProgram._Current; - brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM; - } - - if (brw->vertex_program != ctx->VertexProgram._Current) { - brw->vertex_program = ctx->VertexProgram._Current; - brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM; - } - } - - if (brw->compute_program != ctx->ComputeProgram._Current) { - brw->compute_program = ctx->ComputeProgram._Current; - brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM; - } + select_shader_programs(brw, pipeline); if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) { brw->meta_in_progress = _mesa_meta_in_progress(ctx); diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 6e9848fb1e..9e6b8ec626 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -98,7 +98,6 @@ brw_codegen_vs_prog(struct brw_context *brw, struct brw_vs_prog_data prog_data; struct brw_stage_prog_data *stage_prog_data = &prog_data.base.base; void *mem_ctx; - int i; struct gl_shader *vs = NULL; if (prog) @@ -142,44 +141,14 @@ brw_codegen_vs_prog(struct brw_context *brw, rzalloc_array(NULL, const gl_constant_value *, param_count); stage_prog_data->nr_params = param_count; - GLbitfield64 outputs_written = vp->program.Base.OutputsWritten; prog_data.inputs_read = vp->program.Base.InputsRead; if (c.key.copy_edgeflag) { - outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE); prog_data.inputs_read |= VERT_BIT_EDGEFLAG; } - if (brw->gen < 6) { - /* Put dummy slots into the VUE for the SF to put the replaced - * point sprite coords in. We shouldn't need these dummy slots, - * which take up precious URB space, but it would mean that the SF - * doesn't get nice aligned pairs of input coords into output - * coords, which would be a pain to handle. - */ - for (i = 0; i < 8; i++) { - if (c.key.point_coord_replace & (1 << i)) - outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i); - } - - /* if back colors are written, allocate slots for front colors too */ - if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0)) - outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0); - if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1)) - outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1); - } - - /* In order for legacy clipping to work, we need to populate the clip - * distance varying slots whenever clipping is enabled, even if the vertex - * shader doesn't write to gl_ClipDistance. - */ - if (c.key.base.userclip_active) { - outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0); - outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1); - } - - brw_compute_vue_map(brw->intelScreen->devinfo, - &prog_data.base.vue_map, outputs_written); + /* BRW_NEW_VUE_MAP_VS */ + prog_data.base.vue_map = brw->vue_map_vs; if (0) { _mesa_fprint_program_opt(stderr, &c.vp->program.Base, PROG_PRINT_DEBUG, @@ -312,7 +281,8 @@ brw_vs_state_dirty(struct brw_context *brw) _NEW_TEXTURE | _NEW_TRANSFORM, BRW_NEW_VERTEX_PROGRAM | - BRW_NEW_VS_ATTRIB_WORKAROUNDS); + BRW_NEW_VS_ATTRIB_WORKAROUNDS | + BRW_NEW_VUE_MAP_VS); } static void @@ -388,19 +358,6 @@ brw_upload_vs_prog(struct brw_context *brw) assert(success); } brw->vs.base.prog_data = &brw->vs.prog_data->base.base; - - if (memcmp(&brw->vs.prog_data->base.vue_map, &brw->vue_map_geom_out, - sizeof(brw->vue_map_geom_out)) != 0) { - brw->vue_map_vs = brw->vs.prog_data->base.vue_map; - brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_VS; - if (brw->gen < 6) { - /* No geometry shader support, so the VS VUE map is the VUE map for - * the output of the "geometry" portion of the pipeline. - */ - brw->vue_map_geom_out = brw->vue_map_vs; - brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT; - } - } } bool diff --git a/src/mesa/drivers/dri/i965/brw_vue_map.c b/src/mesa/drivers/dri/i965/brw_vue_map.c index 76875789ba..ef54067b7a 100644 --- a/src/mesa/drivers/dri/i965/brw_vue_map.c +++ b/src/mesa/drivers/dri/i965/brw_vue_map.c @@ -146,3 +146,64 @@ brw_compute_vue_map(const struct brw_device_info *devinfo, } } } + +/** + * Compute the necessary VUE output slots for a program. + * + * prog->OutputsWritten contains a bitfield of which varyings are actually + * written by the program. Unfortunately, this is insufficient to determine + * the VUE output format. For example, if clipping is enabled, we need to + * populate clip distance slots. + */ +GLbitfield64 +brw_compute_output_vue_slots(const struct brw_context *brw, + const struct gl_program *prog) +{ + const struct gl_context *ctx = &brw->ctx; + + /* Start with the outputs written by the program. */ + GLbitfield64 outputs = prog->OutputsWritten; + + /* gl_Layer and gl_ViewportIndex don't get their own varying slots -- they + * are stored in the first VUE slot (VARYING_SLOT_PSIZ). Ignore them. + */ + outputs &= ~(VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT); + + /* _NEW_TRANSFORM + * In order for legacy clipping to work, we need to populate the clip + * distance varying slots whenever clipping is enabled, even if the vertex + * shader doesn't write to gl_ClipDistance. + */ + if (ctx->Transform.ClipPlanesEnabled != 0) { + outputs |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0); + outputs |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1); + } + + if (brw->gen < 6) { + /* _NEW_POLYGON */ + if (ctx->Polygon.FrontMode != GL_FILL || + ctx->Polygon.BackMode != GL_FILL) { + outputs |= BITFIELD64_BIT(VARYING_SLOT_EDGE); + } + + /* _NEW_POINT + * Put dummy slots into the VUE for the SF to put the replaced + * point sprite coords in. We shouldn't need these dummy slots, + * which take up precious URB space, but it would mean that the SF + * doesn't get nice aligned pairs of input coords into output + * coords, which would be a pain to handle. + */ + for (int i = 0; i < 8; i++) { + if (ctx->Point.CoordReplace[i]) + outputs |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i); + } + + /* if back colors are written, allocate slots for front colors too */ + if (outputs & BITFIELD64_BIT(VARYING_SLOT_BFC0)) + outputs |= BITFIELD64_BIT(VARYING_SLOT_COL0); + if (outputs & BITFIELD64_BIT(VARYING_SLOT_BFC1)) + outputs |= BITFIELD64_BIT(VARYING_SLOT_COL1); + } + + return outputs; +} |