summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Graunke <kenneth@whitecape.org>2015-05-15 10:03:42 -0700
committerCarl Worth <cworth@cworth.org>2015-06-11 11:13:29 -0700
commit7180a61d68ba467960f989bbcec58b6f02ac8212 (patch)
tree6000d800a75d39bcacdd75adbe50db38ca3521d5
parent588d13f18dca2fb5754c0d62a6d8029230b17ef6 (diff)
i965: Create a vue_maps_equal() helper.jenkins
Currently, this just does the existing memcmp(). It will be optimized shortly. Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> i965: Optimize VUE map comparisons. struct brw_vue_map is 136 bytes; doing a single 8-byte comparison is much cheaper and should work just as well. Even if this changes in the future, the comparisons are now done by a centralized helper function, so it should be easy to change. Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> stash
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs.c30
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c106
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c51
-rw-r--r--src/mesa/drivers/dri/i965/brw_vue_map.c61
5 files changed, 156 insertions, 94 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 2dcc23c5fc..918c079cd6 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -528,6 +528,8 @@ void brw_compute_vue_map(const struct brw_device_info *devinfo,
struct brw_vue_map *vue_map,
GLbitfield64 slots_valid);
+GLbitfield64 brw_compute_output_vue_slots(const struct brw_context *brw,
+ const struct gl_program *prog);
/**
* Bitmask indicating which fragment shader inputs represent varyings (and
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index 52c73031a3..90aca3282c 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -115,19 +115,8 @@ brw_codegen_gs_prog(struct brw_context *brw,
c.prog_data.control_data_header_size_hwords =
ALIGN(c.control_data_header_size_bits, 256) / 256;
- GLbitfield64 outputs_written = gp->program.Base.OutputsWritten;
-
- /* In order for legacy clipping to work, we need to populate the clip
- * distance varying slots whenever clipping is enabled, even if the vertex
- * shader doesn't write to gl_ClipDistance.
- */
- if (c.key.base.userclip_active) {
- outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
- outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
- }
-
- brw_compute_vue_map(brw->intelScreen->devinfo,
- &c.prog_data.base.vue_map, outputs_written);
+ /* BRW_NEW_VUE_MAP_GEOM_OUT */
+ c.prog_data.base.vue_map = brw->vue_map_geom_out;
/* Compute the output vertex size.
*
@@ -297,7 +286,8 @@ brw_gs_state_dirty(struct brw_context *brw)
_NEW_TEXTURE,
BRW_NEW_GEOMETRY_PROGRAM |
BRW_NEW_TRANSFORM_FEEDBACK |
- BRW_NEW_VUE_MAP_VS);
+ BRW_NEW_VUE_MAP_VS |
+ BRW_NEW_VUE_MAP_GEOM_OUT);
}
static void
@@ -339,12 +329,6 @@ brw_upload_gs_prog(struct brw_context *brw)
return;
if (gp == NULL) {
- /* No geometry shader. Vertex data just passes straight through. */
- if (brw->ctx.NewDriverState & BRW_NEW_VUE_MAP_VS) {
- brw->vue_map_geom_out = brw->vue_map_vs;
- brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
- }
-
if (brw->gen == 6 &&
(brw->ctx.NewDriverState & BRW_NEW_TRANSFORM_FEEDBACK)) {
gen6_brw_upload_ff_gs_prog(brw);
@@ -371,12 +355,6 @@ brw_upload_gs_prog(struct brw_context *brw)
(void)success;
}
brw->gs.base.prog_data = &brw->gs.prog_data->base.base;
-
- if (memcmp(&brw->gs.prog_data->base.vue_map, &brw->vue_map_geom_out,
- sizeof(brw->vue_map_geom_out)) != 0) {
- brw->vue_map_geom_out = brw->gs.prog_data->base.vue_map;
- brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
- }
}
bool
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 84b0861aaa..6cf3deb046 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -652,6 +652,90 @@ check_and_emit_atom(struct brw_context *brw,
}
}
+static void
+select_shader_programs(struct brw_context *brw, enum brw_pipeline pipeline)
+{
+ struct gl_context *ctx = &brw->ctx;
+ const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
+
+ /* Compute is simple, handle it separately */
+ if (pipeline == BRW_COMPUTE_PIPELINE) {
+ if (brw->compute_program != ctx->ComputeProgram._Current) {
+ brw->compute_program = ctx->ComputeProgram._Current;
+ ctx->NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
+ }
+ return;
+ }
+
+ assert(pipeline == BRW_RENDER_PIPELINE);
+
+ const struct gl_program *progs[MESA_SHADER_STAGES] = {
+ [MESA_SHADER_VERTEX] = (struct gl_program *) ctx->VertexProgram._Current,
+ [MESA_SHADER_GEOMETRY] = (struct gl_program *) ctx->GeometryProgram._Current,
+ [MESA_SHADER_FRAGMENT] = (struct gl_program *) ctx->FragmentProgram._Current
+ };
+
+ const struct gl_program **brw_prog_ptrs[MESA_SHADER_STAGES] = {
+ [MESA_SHADER_VERTEX] = (const struct gl_program **) &brw->vertex_program,
+ [MESA_SHADER_GEOMETRY] = (const struct gl_program **) &brw->geometry_program,
+ [MESA_SHADER_FRAGMENT] = (const struct gl_program **) &brw->fragment_program
+ };
+
+ struct brw_vue_map *vue_maps[MESA_SHADER_FRAGMENT] = {
+ [MESA_SHADER_VERTEX] = &brw->vue_map_vs,
+ [MESA_SHADER_GEOMETRY] = &brw->vue_map_geom_out,
+ };
+
+ static const uint64_t brw_new_program_dirty_bit[MESA_SHADER_STAGES] = {
+ [MESA_SHADER_VERTEX] = BRW_NEW_VERTEX_PROGRAM,
+ [MESA_SHADER_GEOMETRY] = BRW_NEW_GEOMETRY_PROGRAM,
+ [MESA_SHADER_FRAGMENT] = BRW_NEW_FRAGMENT_PROGRAM,
+ [MESA_SHADER_COMPUTE] = BRW_NEW_COMPUTE_PROGRAM,
+ };
+
+ static const uint64_t brw_new_vue_map_dirty_bit[MESA_SHADER_FRAGMENT] = {
+ [MESA_SHADER_VERTEX] = BRW_NEW_VUE_MAP_VS,
+ [MESA_SHADER_GEOMETRY] = BRW_NEW_VUE_MAP_GEOM_OUT,
+ };
+
+ for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) {
+ /* Update (e.g.) brw->vertex_program and flag BRW_NEW_VERTEX_PROGRAM. */
+ if (*brw_prog_ptrs[i] != progs[i]) {
+ *brw_prog_ptrs[i] = progs[i];
+ ctx->NewDriverState |= brw_new_program_dirty_bit[i];
+ }
+ }
+
+ /* Recompute the VUE maps for each stage, if necessary, and flag
+ * BRW_NEW_VUE_MAP_<stage>.
+ */
+ for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
+ /* See brw_compute_output_vue_slots */
+ if (!brw_state_dirty(brw, _NEW_TRANSFORM | _NEW_POINT | _NEW_POLYGON,
+ brw_new_program_dirty_bit[i]))
+ continue;
+
+ GLbitfield64 outputs;
+ if (progs[i]) {
+ /* The program exists; compute its output layout. */
+ outputs = brw_compute_output_vue_slots(brw, progs[i]);
+ } else {
+ /* This stage is disabled; the previous active stage's outputs are
+ * passed through.
+ */
+ int prev_stage;
+ for (prev_stage = i - 1; !progs[prev_stage]; i--);
+ outputs = vue_maps[prev_stage]->slots_valid;
+ }
+
+ /* If the outputs changed, recompute the VUE map and flag the bit. */
+ if (vue_maps[i]->slots_valid != outputs) {
+ brw_compute_vue_map(devinfo, vue_maps[i], outputs);
+ ctx->NewDriverState |= brw_new_vue_map_dirty_bit[i];
+ }
+ }
+}
+
static inline void
brw_upload_pipeline_state(struct brw_context *brw,
enum brw_pipeline pipeline)
@@ -669,27 +753,7 @@ brw_upload_pipeline_state(struct brw_context *brw,
ctx->NewDriverState = ~0ull;
}
- if (pipeline == BRW_RENDER_PIPELINE) {
- if (brw->fragment_program != ctx->FragmentProgram._Current) {
- brw->fragment_program = ctx->FragmentProgram._Current;
- brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
- }
-
- if (brw->geometry_program != ctx->GeometryProgram._Current) {
- brw->geometry_program = ctx->GeometryProgram._Current;
- brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
- }
-
- if (brw->vertex_program != ctx->VertexProgram._Current) {
- brw->vertex_program = ctx->VertexProgram._Current;
- brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
- }
- }
-
- if (brw->compute_program != ctx->ComputeProgram._Current) {
- brw->compute_program = ctx->ComputeProgram._Current;
- brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
- }
+ select_shader_programs(brw, pipeline);
if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
brw->meta_in_progress = _mesa_meta_in_progress(ctx);
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 6e9848fb1e..9e6b8ec626 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -98,7 +98,6 @@ brw_codegen_vs_prog(struct brw_context *brw,
struct brw_vs_prog_data prog_data;
struct brw_stage_prog_data *stage_prog_data = &prog_data.base.base;
void *mem_ctx;
- int i;
struct gl_shader *vs = NULL;
if (prog)
@@ -142,44 +141,14 @@ brw_codegen_vs_prog(struct brw_context *brw,
rzalloc_array(NULL, const gl_constant_value *, param_count);
stage_prog_data->nr_params = param_count;
- GLbitfield64 outputs_written = vp->program.Base.OutputsWritten;
prog_data.inputs_read = vp->program.Base.InputsRead;
if (c.key.copy_edgeflag) {
- outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE);
prog_data.inputs_read |= VERT_BIT_EDGEFLAG;
}
- if (brw->gen < 6) {
- /* Put dummy slots into the VUE for the SF to put the replaced
- * point sprite coords in. We shouldn't need these dummy slots,
- * which take up precious URB space, but it would mean that the SF
- * doesn't get nice aligned pairs of input coords into output
- * coords, which would be a pain to handle.
- */
- for (i = 0; i < 8; i++) {
- if (c.key.point_coord_replace & (1 << i))
- outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i);
- }
-
- /* if back colors are written, allocate slots for front colors too */
- if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0))
- outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0);
- if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1))
- outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1);
- }
-
- /* In order for legacy clipping to work, we need to populate the clip
- * distance varying slots whenever clipping is enabled, even if the vertex
- * shader doesn't write to gl_ClipDistance.
- */
- if (c.key.base.userclip_active) {
- outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
- outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
- }
-
- brw_compute_vue_map(brw->intelScreen->devinfo,
- &prog_data.base.vue_map, outputs_written);
+ /* BRW_NEW_VUE_MAP_VS */
+ prog_data.base.vue_map = brw->vue_map_vs;
if (0) {
_mesa_fprint_program_opt(stderr, &c.vp->program.Base, PROG_PRINT_DEBUG,
@@ -312,7 +281,8 @@ brw_vs_state_dirty(struct brw_context *brw)
_NEW_TEXTURE |
_NEW_TRANSFORM,
BRW_NEW_VERTEX_PROGRAM |
- BRW_NEW_VS_ATTRIB_WORKAROUNDS);
+ BRW_NEW_VS_ATTRIB_WORKAROUNDS |
+ BRW_NEW_VUE_MAP_VS);
}
static void
@@ -388,19 +358,6 @@ brw_upload_vs_prog(struct brw_context *brw)
assert(success);
}
brw->vs.base.prog_data = &brw->vs.prog_data->base.base;
-
- if (memcmp(&brw->vs.prog_data->base.vue_map, &brw->vue_map_geom_out,
- sizeof(brw->vue_map_geom_out)) != 0) {
- brw->vue_map_vs = brw->vs.prog_data->base.vue_map;
- brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_VS;
- if (brw->gen < 6) {
- /* No geometry shader support, so the VS VUE map is the VUE map for
- * the output of the "geometry" portion of the pipeline.
- */
- brw->vue_map_geom_out = brw->vue_map_vs;
- brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
- }
- }
}
bool
diff --git a/src/mesa/drivers/dri/i965/brw_vue_map.c b/src/mesa/drivers/dri/i965/brw_vue_map.c
index 76875789ba..ef54067b7a 100644
--- a/src/mesa/drivers/dri/i965/brw_vue_map.c
+++ b/src/mesa/drivers/dri/i965/brw_vue_map.c
@@ -146,3 +146,64 @@ brw_compute_vue_map(const struct brw_device_info *devinfo,
}
}
}
+
+/**
+ * Compute the necessary VUE output slots for a program.
+ *
+ * prog->OutputsWritten contains a bitfield of which varyings are actually
+ * written by the program. Unfortunately, this is insufficient to determine
+ * the VUE output format. For example, if clipping is enabled, we need to
+ * populate clip distance slots.
+ */
+GLbitfield64
+brw_compute_output_vue_slots(const struct brw_context *brw,
+ const struct gl_program *prog)
+{
+ const struct gl_context *ctx = &brw->ctx;
+
+ /* Start with the outputs written by the program. */
+ GLbitfield64 outputs = prog->OutputsWritten;
+
+ /* gl_Layer and gl_ViewportIndex don't get their own varying slots -- they
+ * are stored in the first VUE slot (VARYING_SLOT_PSIZ). Ignore them.
+ */
+ outputs &= ~(VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
+
+ /* _NEW_TRANSFORM
+ * In order for legacy clipping to work, we need to populate the clip
+ * distance varying slots whenever clipping is enabled, even if the vertex
+ * shader doesn't write to gl_ClipDistance.
+ */
+ if (ctx->Transform.ClipPlanesEnabled != 0) {
+ outputs |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
+ outputs |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
+ }
+
+ if (brw->gen < 6) {
+ /* _NEW_POLYGON */
+ if (ctx->Polygon.FrontMode != GL_FILL ||
+ ctx->Polygon.BackMode != GL_FILL) {
+ outputs |= BITFIELD64_BIT(VARYING_SLOT_EDGE);
+ }
+
+ /* _NEW_POINT
+ * Put dummy slots into the VUE for the SF to put the replaced
+ * point sprite coords in. We shouldn't need these dummy slots,
+ * which take up precious URB space, but it would mean that the SF
+ * doesn't get nice aligned pairs of input coords into output
+ * coords, which would be a pain to handle.
+ */
+ for (int i = 0; i < 8; i++) {
+ if (ctx->Point.CoordReplace[i])
+ outputs |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i);
+ }
+
+ /* if back colors are written, allocate slots for front colors too */
+ if (outputs & BITFIELD64_BIT(VARYING_SLOT_BFC0))
+ outputs |= BITFIELD64_BIT(VARYING_SLOT_COL0);
+ if (outputs & BITFIELD64_BIT(VARYING_SLOT_BFC1))
+ outputs |= BITFIELD64_BIT(VARYING_SLOT_COL1);
+ }
+
+ return outputs;
+}