summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Ekstrand <jason.ekstrand@intel.com>2014-12-08 17:34:52 -0800
committerJason Ekstrand <jason.ekstrand@intel.com>2014-12-17 21:08:12 -0800
commit459e872cfff7b73434b7bce4a3d6c8c288e40763 (patch)
treed9b49b63ece352d915b4667043236c7b5a6b9823
parent65c8950a5ee92a8cdc4fc0504410f0388e73ef99 (diff)
nir: Use a source for uniform buffer indices instead of an index
In GLSL-to-NIR we were just setting the base index to 0 whenever there was an indirect so having it expressed as a sum makes no sense. Also, while a base offset may make sense for the memory location (first element in the array, etc.) it makes less sense for the actual uniform buffer index. This may change later, but it seems to make more sense for now.
-rw-r--r--src/glsl/nir/glsl_to_nir.cpp8
-rw-r--r--src/glsl/nir/nir_intrinsics.h27
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp96
3 files changed, 76 insertions, 55 deletions
diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index b32bea1543..47fb6d2272 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -902,11 +902,11 @@ nir_visitor::visit(ir_expression *ir)
}
nir_intrinsic_instr *load = nir_intrinsic_instr_create(this->shader, op);
load->num_components = ir->type->vector_elements;
- load->const_index[0] = ir->operands[0]->as_constant()->value.u[0];
- load->const_index[1] = const_index ? const_index->value.u[0] : 0; /* base offset */
- load->const_index[2] = 1; /* number of vec4's */
+ load->const_index[0] = const_index ? const_index->value.u[0] : 0; /* base offset */
+ load->const_index[1] = 1; /* number of vec4's */
+ load->src[0] = evaluate_rvalue(ir->operands[0]);
if (!const_index)
- load->src[0] = evaluate_rvalue(ir->operands[1]);
+ load->src[1] = evaluate_rvalue(ir->operands[1]);
add_instr(&load->instr, ir->type->vector_elements);
/*
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index e66273d384..d94866c859 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -101,11 +101,11 @@ SYSTEM_VALUE(invocation_id, 1)
/*
* The first index is the address to load from, and the second index is the
- * number of array elements to load. For UBO's (and SSBO's), the first index
- * is the UBO buffer index (TODO nonconstant UBO buffer index) and the second
- * and third indices play the role of the first and second indices in the other
- * loads. Indirect loads have an additional register input, which is added
- * to the constant address to compute the final address to load from.
+ * number of array elements to load. Indirect loads have an additional
+ * register input, which is added to the constant address to compute the
+ * final address to load from. For UBO's (and SSBO's), the first source is
+ * the (possibly constant) UBO buffer index and the indirect (if it exists)
+ * is the second source.
*
* For vector backends, the address is in terms of one vec4, and so each array
* element is +4 scalar components from the previous array element. For scalar
@@ -113,16 +113,15 @@ SYSTEM_VALUE(invocation_id, 1)
* elements begin immediately after the previous array element.
*/
-#define LOAD(name, num_indices, flags) \
- INTRINSIC(load_##name, 0, ARR(), true, 0, 0, num_indices, \
- NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \
- INTRINSIC(load_##name##_indirect, 1, ARR(1), true, 0, 0, num_indices, \
- NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \
+#define LOAD(name, extra_srcs, flags) \
+ INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, 2, flags) \
+ INTRINSIC(load_##name##_indirect, extra_srcs + 1, ARR(1, 1), \
+ true, 0, 0, 2, flags)
-LOAD(uniform, 2, NIR_INTRINSIC_CAN_REORDER)
-LOAD(ubo, 3, NIR_INTRINSIC_CAN_REORDER)
-LOAD(input, 2, NIR_INTRINSIC_CAN_REORDER)
-/* LOAD(ssbo, 2, 0) */
+LOAD(uniform, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+LOAD(ubo, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+LOAD(input, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+/* LOAD(ssbo, 1, 0) */
/*
* Stores work the same way as loads, except now the first register input is
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index fde96fa943..c7610d1e86 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1344,52 +1344,74 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
break;
}
+ case nir_intrinsic_load_ubo_indirect:
+ has_indirect = true;
case nir_intrinsic_load_ubo: {
- fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start +
- (unsigned) instr->const_index[0]);
- fs_reg packed_consts = fs_reg(this, glsl_type::float_type);
- packed_consts.type = dest.type;
-
- fs_reg const_offset_reg = fs_reg((unsigned) instr->const_index[1] & ~15);
- emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
- packed_consts, surf_index, const_offset_reg));
+ nir_const_value *const_index = nir_src_as_const_value(instr->src[0]);
+ fs_reg surf_index;
- for (unsigned i = 0; i < instr->num_components; i++) {
- packed_consts.set_smear(instr->const_index[1] % 16 / 4 + i);
+ if (const_index) {
+ surf_index = fs_reg(stage_prog_data->binding_table.ubo_start +
+ const_index->u[0]);
+ } else {
+ /* The block index is not a constant. Evaluate the index expression
+ * per-channel and add the base UBO index; the generator will select
+ * a value from any live channel.
+ */
+ surf_index = fs_reg(this, glsl_type::uint_type);
+ emit(ADD(surf_index, get_nir_src(instr->src[0]),
+ fs_reg(stage_prog_data->binding_table.ubo_start)))
+ ->force_writemask_all = true;
- /* The std140 packing rules don't allow vectors to cross 16-byte
- * boundaries, and a reg is 32 bytes.
+ /* Assume this may touch any UBO. It would be nice to provide
+ * a tighter bound, but the array information is already lowered away.
*/
- assert(packed_consts.subreg_offset < 32);
+ brw_mark_surface_used(prog_data,
+ stage_prog_data->binding_table.ubo_start +
+ shader_prog->NumUniformBlocks - 1);
+ }
- fs_inst *inst = MOV(dest, packed_consts);
- if (instr->has_predicate)
- inst->predicate = BRW_PREDICATE_NORMAL;
- emit(inst);
+ if (has_indirect) {
+ /* Turn the byte offset into a dword offset. */
+ fs_reg base_offset = fs_reg(this, glsl_type::int_type);
+ emit(SHR(base_offset, retype(get_nir_src(instr->src[1]),
+ BRW_REGISTER_TYPE_D),
+ fs_reg(2)));
- dest.reg_offset++;
- }
- break;
- }
+ unsigned vec4_offset = instr->const_index[0] / 4;
+ for (int i = 0; i < instr->num_components; i++) {
+ exec_list list = VARYING_PULL_CONSTANT_LOAD(offset(dest, i),
+ surf_index, base_offset,
+ vec4_offset + i);
- case nir_intrinsic_load_ubo_indirect: {
- fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start +
- instr->const_index[0]);
- /* Turn the byte offset into a dword offset. */
- unsigned base_offset = instr->const_index[1] / 4;
- fs_reg offset = fs_reg(this, glsl_type::int_type);
- emit(SHR(offset, retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_D),
- fs_reg(2)));
+ fs_inst *last_inst = (fs_inst *) list.get_tail();
+ if (instr->has_predicate)
+ last_inst->predicate = BRW_PREDICATE_NORMAL;
+ emit(list);
+ }
+ } else {
+ fs_reg packed_consts = fs_reg(this, glsl_type::float_type);
+ packed_consts.type = dest.type;
- for (unsigned i = 0; i < instr->num_components; i++) {
- exec_list list = VARYING_PULL_CONSTANT_LOAD(dest, surf_index,
- offset, base_offset + i);
- fs_inst *last_inst = (fs_inst *) list.get_tail();
- if (instr->has_predicate)
- last_inst->predicate = BRW_PREDICATE_NORMAL;
- emit(list);
+ fs_reg const_offset_reg((unsigned) instr->const_index[0] & ~15);
+ emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts,
+ surf_index, const_offset_reg);
- dest.reg_offset++;
+ for (unsigned i = 0; i < instr->num_components; i++) {
+ packed_consts.set_smear(instr->const_index[0] % 16 / 4 + i);
+
+ /* The std140 packing rules don't allow vectors to cross 16-byte
+ * boundaries, and a reg is 32 bytes.
+ */
+ assert(packed_consts.subreg_offset < 32);
+
+ fs_inst *inst = MOV(dest, packed_consts);
+ if (instr->has_predicate)
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ emit(inst);
+
+ dest.reg_offset++;
+ }
}
break;
}