diff options
author | Jordan Justen <jordan.l.justen@intel.com> | 2014-09-23 16:46:39 -0700 |
---|---|---|
committer | Jordan Justen <jordan.l.justen@intel.com> | 2014-10-27 11:52:28 -0700 |
commit | a720933b113c6584fdacbde6780e2f6ee48af77c (patch) | |
tree | 82d43d2a09fa65417a63c6b3cbb86f405e441f7e | |
parent | 3220b79f2bb2020a144bac13c0eac4a89a94a0a6 (diff) |
wip - some signs of life for uniforms
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_cs.cpp | 139 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_defines.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_state.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_state_upload.c | 1 |
5 files changed, 137 insertions, 7 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp index c9ee403325..030d5c1f3a 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.cpp +++ b/src/mesa/drivers/dri/i965/brw_cs.cpp @@ -256,28 +256,39 @@ brw_upload_cs_state(struct brw_context *brw) prog_data->binding_table.size_bytes, 32, &stage_state->bind_bo_offset); + unsigned push_constant_size = prog_data->nr_params * sizeof(gl_constant_value); + unsigned reg_aligned_constant_size = ALIGN(push_constant_size, 32); + const unsigned threads = brw->max_vs_threads; + BEGIN_BATCH(8); OUT_BATCH(MEDIA_VFE_STATE << 16 | (8 - 2)); OUT_BATCH(0); // dw1 OUT_BATCH((brw->max_vs_threads - 1) << 16 | 0xc4); // dw2 OUT_BATCH(0); // dw3 - OUT_BATCH(0); // dw4 + OUT_BATCH((reg_aligned_constant_size / 32) * threads + 32); // dw4 OUT_BATCH(0); // dw5 OUT_BATCH(0); // dw6 OUT_BATCH(0); // dw7 ADVANCE_BATCH(); + if (reg_aligned_constant_size > 0) { + BEGIN_BATCH(4); + OUT_BATCH(MEDIA_CURBE_LOAD << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(reg_aligned_constant_size * threads); + OUT_BATCH(stage_state->push_const_offset); + ADVANCE_BATCH(); + } + /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */ memcpy(bind, stage_state->surf_offset, prog_data->binding_table.size_bytes); memset(desc, 0, 8 * 4); - const unsigned simd_size = cs_prog_data->simd_size; - unsigned thread_width_max = ALIGN(cs_prog_data->local_size[0], simd_size); - desc[0] = brw->cs.base.prog_offset; desc[3] = stage_state->bind_bo_offset; + desc[4] = (((reg_aligned_constant_size / 32) + 0) << 16); BEGIN_BATCH(4); OUT_BATCH(MEDIA_INTERFACE_DESCRIPTOR_LOAD << 16 | (4 - 2)); @@ -291,9 +302,125 @@ brw_upload_cs_state(struct brw_context *brw) extern "C" const struct brw_tracked_state brw_cs_state = { .dirty = { - .mesa = 0, - .brw = BRW_NEW_COMPUTE_PROGRAM, + .mesa = _NEW_PROGRAM_CONSTANTS, + .brw = BRW_NEW_COMPUTE_PROGRAM | + BRW_NEW_PUSH_CONSTANT_ALLOCATION, .cache = (CACHE_NEW_CS_PROG) }, .emit = brw_upload_cs_state }; + +/** + * Creates a region containing the push constants for the CS on gen7+. + * + * Push constants are constant values (such as GLSL uniforms) that are + * pre-loaded into a shader stage's register space at thread spawn time. + * + * Not all GLSL uniforms will be uploaded as push constants: The hardware has + * a limitation of 32 or 64 EU registers (256 or 512 floats) per stage to be + * uploaded as push constants, while GL 4.4 requires at least 1024 components + * to be usable for the VS. Plus, currently we always use pull constants + * instead of push constants when doing variable-index array access. + * + * For other stages, see brw_curbe.c for the equivalent gen4/5 code and + * gen6_vs_state.c for gen6+. + */ +static void +brw_upload_cs_push_constants(struct brw_context *brw, + const struct gl_program *prog, + const struct brw_stage_prog_data *prog_data, + struct brw_stage_state *stage_state, + enum aub_state_struct_type type) +{ + struct gl_context *ctx = &brw->ctx; + + /* Updates the ParamaterValues[i] pointers for all parameters of the + * basic type of PROGRAM_STATE_VAR. + */ + /* XXX: Should this happen somewhere before to get our state flag set? */ + _mesa_load_state_parameters(ctx, prog->Parameters); + + if (prog_data->nr_params == 0) { + stage_state->push_const_size = 0; + } else { + gl_constant_value *param; + unsigned i, t; + + const unsigned push_constant_size = + prog_data->nr_params * sizeof(gl_constant_value); + const unsigned param_aligned_count = ALIGN(push_constant_size, 8); + const unsigned reg_count = param_aligned_count / 8; + const unsigned reg_aligned_size = 8 * param_aligned_count; + /* TODO: We might be able to shrink this based on the the simd size + * and local group size. + */ + const unsigned threads = brw->max_vs_threads; + + param = (gl_constant_value*) + brw_state_batch(brw, type, + reg_aligned_size * threads, + 32, &stage_state->push_const_offset); + assert(param); + + STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float)); + + /* _NEW_PROGRAM_CONSTANTS + * + * Also _NEW_TRANSFORM -- we may reference clip planes other than as a + * side effect of dereferencing uniforms, so _NEW_PROGRAM_CONSTANTS + * wouldn't be set for them. + */ + for (t = 0; t < threads; t++) { + for (i = 0; i < prog_data->nr_params; i++) { + param[t * param_aligned_count + i] = *prog_data->param[i]; + } + } + + stage_state->push_const_size = ALIGN(prog_data->nr_params, 8) / 8; + /* We can only push 32 registers of constants at a time. */ + + /* From the SNB PRM (vol2, part 1, section 3.2.1.4: 3DSTATE_CONSTANT_VS: + * + * "The sum of all four read length fields (each incremented to + * represent the actual read length) must be less than or equal to + * 32" + * + * From the IVB PRM (vol2, part 1, section 3.2.1.3: 3DSTATE_CONSTANT_VS: + * + * "The sum of all four read length fields must be less than or + * equal to the size of 64" + * + * The other shader stages all match the VS's limits. + */ + assert(stage_state->push_const_size <= 32); + } +} + +static void +gen6_upload_cs_push_constants(struct brw_context *brw) +{ + struct brw_stage_state *stage_state = &brw->cs.base; + + /* BRW_NEW_COMPUTE_PROGRAM */ + const struct brw_compute_program *cp = + (struct brw_compute_program *) brw->compute_program; + + if (cp) { + /* CACHE_NEW_CS_PROG */ + struct brw_stage_prog_data *prog_data = &brw->cs.prog_data->base; + + brw_upload_cs_push_constants(brw, &cp->program.Base, prog_data, + stage_state, AUB_TRACE_VS_CONSTANTS); + } +} + +const struct brw_tracked_state gen7_cs_push_constants = { + .dirty = { + .mesa = _NEW_PROGRAM_CONSTANTS, + .brw = BRW_NEW_COMPUTE_PROGRAM | + BRW_NEW_PUSH_CONSTANT_ALLOCATION, + .cache = CACHE_NEW_CS_PROG, + }, + .emit = gen6_upload_cs_push_constants, +}; + diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 7aee1d5235..f7b3593f46 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -2401,6 +2401,7 @@ enum brw_wm_barycentric_interp_mode { #define MEDIA_INTERFACE_DESCRIPTOR_LOAD 0x7002 #define MEDIA_VFE_STATE 0x7000 +#define MEDIA_CURBE_LOAD 0x7001 #define MEDIA_STATE_FLUSH 0x7004 #define GPGPU_OBJECT 0x7104 #define GPGPU_WALKER 0x7105 diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index a9802fdf1b..30a722ab6b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3329,7 +3329,7 @@ fs_visitor::setup_payload_gen6() assert(brw->gen >= 6); /* R0-1: masks, pixel X/Y coordinates. */ - payload.num_regs = 2; + payload.num_regs = (stage == MESA_SHADER_COMPUTE) ? 1 : 2; /* R2: only for 32-pixel dispatch.*/ /* R3-26: barycentric interpolation coordinates. These appear in the diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index d070ba365f..579a7ea9c9 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -96,6 +96,7 @@ extern const struct brw_tracked_state brw_vertices; extern const struct brw_tracked_state brw_index_buffer; extern const struct brw_tracked_state brw_cs_prog; extern const struct brw_tracked_state brw_cs_state; +extern const struct brw_tracked_state gen7_cs_push_constants; extern const struct brw_tracked_state gen6_binding_table_pointers; extern const struct brw_tracked_state gen6_blend_state; extern const struct brw_tracked_state gen6_cc_state_pointers; diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 20fab33f97..fbdc7f5ca5 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -203,6 +203,7 @@ static const struct brw_tracked_state *gen7_atoms[] = &gen6_vs_push_constants, /* Before vs_state */ &gen6_gs_push_constants, /* Before gs_state */ &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */ + &gen7_cs_push_constants, /* Surface state setup. Must come before the VS/WM unit. The binding * table upload must be last. |