summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2011-02-05 10:05:10 -0600
committerMatt Turner <mattst88@gmail.com>2016-04-24 22:17:27 -0700
commit47054ef56009c76ef5df30927589a7f704406b76 (patch)
treee2843d9327f416143d5f94c273271e74a1d8600d
parent1bc983cd649af10d87500ceda37d7a36aa3741a6 (diff)
i965: Add support for URB transposed reads present on g45 through ILK.dead/g45-transposed-read
This saves SF URB size by having the WM unit do the transpose from the SF-friendly coefficient-major layout to the WM-friendly attribute-major layout.
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_device_info.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_device_info.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf.c29
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf_emit.c70
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_state.c2
7 files changed, 65 insertions, 41 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 63ac3bc31ed..adca06ab7b0 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -835,6 +835,7 @@ brwCreateContext(gl_api api,
brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
brw->has_pln = devinfo->has_pln;
+ brw->has_transposed_read = devinfo->has_transposed_read;
brw->has_compr4 = devinfo->has_compr4;
brw->has_surface_tile_offset = devinfo->has_surface_tile_offset;
brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 1d3d5b2a1fb..cdc4661c620 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -873,6 +873,7 @@ struct brw_context
bool has_compr4;
bool has_negative_rhw_bug;
bool has_pln;
+ bool has_transposed_read;
bool no_simd8;
bool use_rep_send;
bool use_resource_streamer;
diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c b/src/mesa/drivers/dri/i965/brw_device_info.c
index 3666190fc36..47b26517912 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.c
+++ b/src/mesa/drivers/dri/i965/brw_device_info.c
@@ -40,6 +40,7 @@ static const struct brw_device_info brw_device_info_i965 = {
static const struct brw_device_info brw_device_info_g4x = {
.gen = 4,
.has_pln = true,
+ .has_transposed_read = true,
.has_compr4 = true,
.has_surface_tile_offset = true,
.is_g4x = true,
@@ -55,6 +56,7 @@ static const struct brw_device_info brw_device_info_g4x = {
static const struct brw_device_info brw_device_info_ilk = {
.gen = 5,
.has_pln = true,
+ .has_transposed_read = true,
.has_compr4 = true,
.has_surface_tile_offset = true,
.num_slices = 1,
diff --git a/src/mesa/drivers/dri/i965/brw_device_info.h b/src/mesa/drivers/dri/i965/brw_device_info.h
index 4e7f3135960..3a296f60eed 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.h
+++ b/src/mesa/drivers/dri/i965/brw_device_info.h
@@ -46,6 +46,7 @@ struct brw_device_info
bool has_llc;
bool has_pln;
+ bool has_transposed_read;
bool has_compr4;
bool has_surface_tile_offset;
bool supports_simd16_3src;
diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c
index 5acbf189969..8ff5c8a9e23 100644
--- a/src/mesa/drivers/dri/i965/brw_sf.c
+++ b/src/mesa/drivers/dri/i965/brw_sf.c
@@ -78,7 +78,34 @@ static void compile_sf_prog( struct brw_context *brw,
c.nr_setup_regs = c.nr_attr_regs;
c.prog_data.urb_read_length = c.nr_attr_regs;
- c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
+ /* Number of 512-bit URB rows produced. */
+ if (brw->has_transposed_read) {
+ /* Transposed reads: The 3 coefficients we produce are packed
+ * in the URB entry:
+ *
+ * row0: a0.x_x a0.y_x a0.z_x a0.w_x a1.x_x a1.y_x a1.z_x a1.w_x
+ * a0.x_y a0.y_y a0.z_y a0.w_y a1.x_y a1.y_y a1.z_y a1.w_y
+ * row1: a0.x_c a0.y_c a0.z_c a0.w_c a1.x_c a1.y_c a1.z_c a1.w_c
+ * a2.x_x a2.y_x a2.z_x a2.w_x a3.x_x a3.y_x a3.z_x a3.w_x
+ * row2: a2.x_y a2.y_y a2.z_y a2.w_y a3.x_y a3.y_y a3.z_y a3.w_y
+ * a2.x_c a2.y_c a2.z_c a2.w_c a3.x_c a3.y_c a3.z_c a3.w_c
+ *
+ * The WM gets programmed as if it was reading from the else
+ * block below.
+ */
+ c.prog_data.urb_entry_size = (c.nr_setup_attrs + 3 / 4) * 3;
+ } else {
+ /* Transposed writes into URB. The rows look like:
+ *
+ * row0: a0.x_x a0.x_y null a0.x_c a0.y_x a0.y_y null a0.y_c
+ * a0.z_x a0.z_y null a0.z_c a0.w_x a0.w_y null a0.w_c
+ *
+ * So we use a whole row per attribute (and since we write
+ * two-attribute groups to the URB, align the size in case the
+ * disabled last attribute actually gets written).
+ */
+ c.prog_data.urb_entry_size = ALIGN(c.nr_setup_attrs, 2);
+ }
c.has_flat_shading = brw_any_flat_varyings(&key->interpolation_mode);
/* Which primitive? Or all three?
diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c
index f03b74143f4..fbf528a8bda 100644
--- a/src/mesa/drivers/dri/i965/brw_sf_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c
@@ -41,6 +41,32 @@
#include "brw_util.h"
#include "brw_sf.h"
+static void
+do_urb_write(struct brw_sf_compile *c, int attr_pair, bool last)
+{
+ struct brw_codegen *p = &c->func;
+ const struct brw_device_info *devinfo = p->devinfo;
+ uint32_t offset, swizzle;
+
+ if (devinfo->has_transposed_read) {
+ offset = attr_pair * 3;
+ swizzle = BRW_URB_SWIZZLE_NONE;
+ } else {
+ offset = attr_pair * 4;
+ swizzle = BRW_URB_SWIZZLE_TRANSPOSE;
+ }
+
+ brw_urb_WRITE(p,
+ brw_null_reg(),
+ 0,
+ brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
+ last ? BRW_URB_WRITE_EOT_COMPLETE
+ : BRW_URB_WRITE_NO_FLAGS,
+ 4, /* msg len */
+ 0, /* response len */
+ offset,
+ swizzle);
+}
/**
* Determine the vue slot corresponding to the given half of the given register.
@@ -482,16 +508,7 @@ void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate)
/* Copy m0..m3 to URB. m0 is implicitly copied from r0 in
* the send instruction:
*/
- brw_urb_WRITE(p,
- brw_null_reg(),
- 0,
- brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
- last ? BRW_URB_WRITE_EOT_COMPLETE
- : BRW_URB_WRITE_NO_FLAGS,
- 4, /* msg len */
- 0, /* response len */
- i*4, /* offset */
- BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
+ do_urb_write(c, i, last);
}
}
@@ -556,16 +573,7 @@ void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate)
/* Copy m0..m3 to URB.
*/
- brw_urb_WRITE(p,
- brw_null_reg(),
- 0,
- brw_vec8_grf(0, 0),
- last ? BRW_URB_WRITE_EOT_COMPLETE
- : BRW_URB_WRITE_NO_FLAGS,
- 4, /* msg len */
- 0, /* response len */
- i*4, /* urb destination offset */
- BRW_URB_SWIZZLE_TRANSPOSE);
+ do_urb_write(c, i, last);
}
}
@@ -646,16 +654,7 @@ void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate)
set_predicate_control_flag_value(p, c, pc);
/* Copy m0..m3 to URB. */
- brw_urb_WRITE(p,
- brw_null_reg(),
- 0,
- brw_vec8_grf(0, 0),
- last ? BRW_URB_WRITE_EOT_COMPLETE
- : BRW_URB_WRITE_NO_FLAGS,
- 4, /* msg len */
- 0, /* response len */
- i*4, /* urb destination offset */
- BRW_URB_SWIZZLE_TRANSPOSE);
+ do_urb_write(c, i, last);
}
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
@@ -707,16 +706,7 @@ void brw_emit_point_setup(struct brw_sf_compile *c, bool allocate)
/* Copy m0..m3 to URB.
*/
- brw_urb_WRITE(p,
- brw_null_reg(),
- 0,
- brw_vec8_grf(0, 0),
- last ? BRW_URB_WRITE_EOT_COMPLETE
- : BRW_URB_WRITE_NO_FLAGS,
- 4, /* msg len */
- 0, /* response len */
- i*4, /* urb destination offset */
- BRW_URB_SWIZZLE_TRANSPOSE);
+ do_urb_write(c, i, last);
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index 6bf0a55e418..ab39baf5f95 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -206,6 +206,8 @@ brw_upload_wm_unit(struct brw_context *brw)
/* _NEW_POLYGONSTIPPLE */
wm->wm5.polygon_stipple = ctx->Polygon.StippleFlag;
+ if (brw->has_transposed_read)
+ wm->wm5.transposed_urb_read_enable = 1;
/* _NEW_POLYGON */
if (ctx->Polygon.OffsetFill) {