summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordan Justen <jordan.l.justen@intel.com>2014-09-23 16:46:39 -0700
committerJordan Justen <jordan.l.justen@intel.com>2014-10-27 11:52:28 -0700
commita720933b113c6584fdacbde6780e2f6ee48af77c (patch)
tree82d43d2a09fa65417a63c6b3cbb86f405e441f7e
parent3220b79f2bb2020a144bac13c0eac4a89a94a0a6 (diff)
wip - some signs of life for uniforms
-rw-r--r--src/mesa/drivers/dri/i965/brw_cs.cpp139
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c1
5 files changed, 137 insertions, 7 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp
index c9ee403325..030d5c1f3a 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cs.cpp
@@ -256,28 +256,39 @@ brw_upload_cs_state(struct brw_context *brw)
prog_data->binding_table.size_bytes,
32, &stage_state->bind_bo_offset);
+ unsigned push_constant_size = prog_data->nr_params * sizeof(gl_constant_value);
+ unsigned reg_aligned_constant_size = ALIGN(push_constant_size, 32);
+ const unsigned threads = brw->max_vs_threads;
+
BEGIN_BATCH(8);
OUT_BATCH(MEDIA_VFE_STATE << 16 | (8 - 2));
OUT_BATCH(0); // dw1
OUT_BATCH((brw->max_vs_threads - 1) << 16 | 0xc4); // dw2
OUT_BATCH(0); // dw3
- OUT_BATCH(0); // dw4
+ OUT_BATCH((reg_aligned_constant_size / 32) * threads + 32); // dw4
OUT_BATCH(0); // dw5
OUT_BATCH(0); // dw6
OUT_BATCH(0); // dw7
ADVANCE_BATCH();
+ if (reg_aligned_constant_size > 0) {
+ BEGIN_BATCH(4);
+ OUT_BATCH(MEDIA_CURBE_LOAD << 16 | (4 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(reg_aligned_constant_size * threads);
+ OUT_BATCH(stage_state->push_const_offset);
+ ADVANCE_BATCH();
+ }
+
/* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
memcpy(bind, stage_state->surf_offset,
prog_data->binding_table.size_bytes);
memset(desc, 0, 8 * 4);
- const unsigned simd_size = cs_prog_data->simd_size;
- unsigned thread_width_max = ALIGN(cs_prog_data->local_size[0], simd_size);
-
desc[0] = brw->cs.base.prog_offset;
desc[3] = stage_state->bind_bo_offset;
+ desc[4] = (((reg_aligned_constant_size / 32) + 0) << 16);
BEGIN_BATCH(4);
OUT_BATCH(MEDIA_INTERFACE_DESCRIPTOR_LOAD << 16 | (4 - 2));
@@ -291,9 +302,125 @@ brw_upload_cs_state(struct brw_context *brw)
extern "C"
const struct brw_tracked_state brw_cs_state = {
.dirty = {
- .mesa = 0,
- .brw = BRW_NEW_COMPUTE_PROGRAM,
+ .mesa = _NEW_PROGRAM_CONSTANTS,
+ .brw = BRW_NEW_COMPUTE_PROGRAM |
+ BRW_NEW_PUSH_CONSTANT_ALLOCATION,
.cache = (CACHE_NEW_CS_PROG)
},
.emit = brw_upload_cs_state
};
+
+/**
+ * Creates a region containing the push constants for the CS on gen7+.
+ *
+ * Push constants are constant values (such as GLSL uniforms) that are
+ * pre-loaded into a shader stage's register space at thread spawn time.
+ *
+ * Not all GLSL uniforms will be uploaded as push constants: The hardware has
+ * a limitation of 32 or 64 EU registers (256 or 512 floats) per stage to be
+ * uploaded as push constants, while GL 4.4 requires at least 1024 components
+ * to be usable for the VS. Plus, currently we always use pull constants
+ * instead of push constants when doing variable-index array access.
+ *
+ * For other stages, see brw_curbe.c for the equivalent gen4/5 code and
+ * gen6_vs_state.c for gen6+.
+ */
+static void
+brw_upload_cs_push_constants(struct brw_context *brw,
+ const struct gl_program *prog,
+ const struct brw_stage_prog_data *prog_data,
+ struct brw_stage_state *stage_state,
+ enum aub_state_struct_type type)
+{
+ struct gl_context *ctx = &brw->ctx;
+
+ /* Updates the ParamaterValues[i] pointers for all parameters of the
+ * basic type of PROGRAM_STATE_VAR.
+ */
+ /* XXX: Should this happen somewhere before to get our state flag set? */
+ _mesa_load_state_parameters(ctx, prog->Parameters);
+
+ if (prog_data->nr_params == 0) {
+ stage_state->push_const_size = 0;
+ } else {
+ gl_constant_value *param;
+ unsigned i, t;
+
+ const unsigned push_constant_size =
+ prog_data->nr_params * sizeof(gl_constant_value);
+ const unsigned param_aligned_count = ALIGN(push_constant_size, 8);
+ const unsigned reg_count = param_aligned_count / 8;
+ const unsigned reg_aligned_size = 8 * param_aligned_count;
+ /* TODO: We might be able to shrink this based on the the simd size
+ * and local group size.
+ */
+ const unsigned threads = brw->max_vs_threads;
+
+ param = (gl_constant_value*)
+ brw_state_batch(brw, type,
+ reg_aligned_size * threads,
+ 32, &stage_state->push_const_offset);
+ assert(param);
+
+ STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
+
+ /* _NEW_PROGRAM_CONSTANTS
+ *
+ * Also _NEW_TRANSFORM -- we may reference clip planes other than as a
+ * side effect of dereferencing uniforms, so _NEW_PROGRAM_CONSTANTS
+ * wouldn't be set for them.
+ */
+ for (t = 0; t < threads; t++) {
+ for (i = 0; i < prog_data->nr_params; i++) {
+ param[t * param_aligned_count + i] = *prog_data->param[i];
+ }
+ }
+
+ stage_state->push_const_size = ALIGN(prog_data->nr_params, 8) / 8;
+ /* We can only push 32 registers of constants at a time. */
+
+ /* From the SNB PRM (vol2, part 1, section 3.2.1.4: 3DSTATE_CONSTANT_VS:
+ *
+ * "The sum of all four read length fields (each incremented to
+ * represent the actual read length) must be less than or equal to
+ * 32"
+ *
+ * From the IVB PRM (vol2, part 1, section 3.2.1.3: 3DSTATE_CONSTANT_VS:
+ *
+ * "The sum of all four read length fields must be less than or
+ * equal to the size of 64"
+ *
+ * The other shader stages all match the VS's limits.
+ */
+ assert(stage_state->push_const_size <= 32);
+ }
+}
+
+static void
+gen6_upload_cs_push_constants(struct brw_context *brw)
+{
+ struct brw_stage_state *stage_state = &brw->cs.base;
+
+ /* BRW_NEW_COMPUTE_PROGRAM */
+ const struct brw_compute_program *cp =
+ (struct brw_compute_program *) brw->compute_program;
+
+ if (cp) {
+ /* CACHE_NEW_CS_PROG */
+ struct brw_stage_prog_data *prog_data = &brw->cs.prog_data->base;
+
+ brw_upload_cs_push_constants(brw, &cp->program.Base, prog_data,
+ stage_state, AUB_TRACE_VS_CONSTANTS);
+ }
+}
+
+const struct brw_tracked_state gen7_cs_push_constants = {
+ .dirty = {
+ .mesa = _NEW_PROGRAM_CONSTANTS,
+ .brw = BRW_NEW_COMPUTE_PROGRAM |
+ BRW_NEW_PUSH_CONSTANT_ALLOCATION,
+ .cache = CACHE_NEW_CS_PROG,
+ },
+ .emit = gen6_upload_cs_push_constants,
+};
+
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 7aee1d5235..f7b3593f46 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -2401,6 +2401,7 @@ enum brw_wm_barycentric_interp_mode {
#define MEDIA_INTERFACE_DESCRIPTOR_LOAD 0x7002
#define MEDIA_VFE_STATE 0x7000
+#define MEDIA_CURBE_LOAD 0x7001
#define MEDIA_STATE_FLUSH 0x7004
#define GPGPU_OBJECT 0x7104
#define GPGPU_WALKER 0x7105
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index a9802fdf1b..30a722ab6b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3329,7 +3329,7 @@ fs_visitor::setup_payload_gen6()
assert(brw->gen >= 6);
/* R0-1: masks, pixel X/Y coordinates. */
- payload.num_regs = 2;
+ payload.num_regs = (stage == MESA_SHADER_COMPUTE) ? 1 : 2;
/* R2: only for 32-pixel dispatch.*/
/* R3-26: barycentric interpolation coordinates. These appear in the
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index d070ba365f..579a7ea9c9 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -96,6 +96,7 @@ extern const struct brw_tracked_state brw_vertices;
extern const struct brw_tracked_state brw_index_buffer;
extern const struct brw_tracked_state brw_cs_prog;
extern const struct brw_tracked_state brw_cs_state;
+extern const struct brw_tracked_state gen7_cs_push_constants;
extern const struct brw_tracked_state gen6_binding_table_pointers;
extern const struct brw_tracked_state gen6_blend_state;
extern const struct brw_tracked_state gen6_cc_state_pointers;
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 20fab33f97..fbdc7f5ca5 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -203,6 +203,7 @@ static const struct brw_tracked_state *gen7_atoms[] =
&gen6_vs_push_constants, /* Before vs_state */
&gen6_gs_push_constants, /* Before gs_state */
&gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
+ &gen7_cs_push_constants,
/* Surface state setup. Must come before the VS/WM unit. The binding
* table upload must be last.