diff options
author | Eric Anholt <eric@anholt.net> | 2011-02-05 10:05:10 -0600 |
---|---|---|
committer | Matt Turner <mattst88@gmail.com> | 2016-04-24 22:17:27 -0700 |
commit | 47054ef56009c76ef5df30927589a7f704406b76 (patch) | |
tree | e2843d9327f416143d5f94c273271e74a1d8600d | |
parent | 1bc983cd649af10d87500ceda37d7a36aa3741a6 (diff) |
i965: Add support for URB transposed reads present on g45 through ILK.dead/g45-transposed-read
This saves SF URB size by having the WM unit do the transpose from the
SF-friendly coefficient-major layout to the WM-friendly
attribute-major layout.
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.c | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_device_info.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_device_info.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_sf.c | 29 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_sf_emit.c | 70 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_state.c | 2 |
7 files changed, 65 insertions, 41 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 63ac3bc31ed..adca06ab7b0 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -835,6 +835,7 @@ brwCreateContext(gl_api api, brw->has_hiz = devinfo->has_hiz_and_separate_stencil; brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil; brw->has_pln = devinfo->has_pln; + brw->has_transposed_read = devinfo->has_transposed_read; brw->has_compr4 = devinfo->has_compr4; brw->has_surface_tile_offset = devinfo->has_surface_tile_offset; brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 1d3d5b2a1fb..cdc4661c620 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -873,6 +873,7 @@ struct brw_context bool has_compr4; bool has_negative_rhw_bug; bool has_pln; + bool has_transposed_read; bool no_simd8; bool use_rep_send; bool use_resource_streamer; diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c b/src/mesa/drivers/dri/i965/brw_device_info.c index 3666190fc36..47b26517912 100644 --- a/src/mesa/drivers/dri/i965/brw_device_info.c +++ b/src/mesa/drivers/dri/i965/brw_device_info.c @@ -40,6 +40,7 @@ static const struct brw_device_info brw_device_info_i965 = { static const struct brw_device_info brw_device_info_g4x = { .gen = 4, .has_pln = true, + .has_transposed_read = true, .has_compr4 = true, .has_surface_tile_offset = true, .is_g4x = true, @@ -55,6 +56,7 @@ static const struct brw_device_info brw_device_info_g4x = { static const struct brw_device_info brw_device_info_ilk = { .gen = 5, .has_pln = true, + .has_transposed_read = true, .has_compr4 = true, .has_surface_tile_offset = true, .num_slices = 1, diff --git a/src/mesa/drivers/dri/i965/brw_device_info.h b/src/mesa/drivers/dri/i965/brw_device_info.h index 4e7f3135960..3a296f60eed 100644 --- a/src/mesa/drivers/dri/i965/brw_device_info.h +++ b/src/mesa/drivers/dri/i965/brw_device_info.h @@ -46,6 +46,7 @@ struct brw_device_info bool has_llc; bool has_pln; + bool has_transposed_read; bool has_compr4; bool has_surface_tile_offset; bool supports_simd16_3src; diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index 5acbf189969..8ff5c8a9e23 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -78,7 +78,34 @@ static void compile_sf_prog( struct brw_context *brw, c.nr_setup_regs = c.nr_attr_regs; c.prog_data.urb_read_length = c.nr_attr_regs; - c.prog_data.urb_entry_size = c.nr_setup_regs * 2; + /* Number of 512-bit URB rows produced. */ + if (brw->has_transposed_read) { + /* Transposed reads: The 3 coefficients we produce are packed + * in the URB entry: + * + * row0: a0.x_x a0.y_x a0.z_x a0.w_x a1.x_x a1.y_x a1.z_x a1.w_x + * a0.x_y a0.y_y a0.z_y a0.w_y a1.x_y a1.y_y a1.z_y a1.w_y + * row1: a0.x_c a0.y_c a0.z_c a0.w_c a1.x_c a1.y_c a1.z_c a1.w_c + * a2.x_x a2.y_x a2.z_x a2.w_x a3.x_x a3.y_x a3.z_x a3.w_x + * row2: a2.x_y a2.y_y a2.z_y a2.w_y a3.x_y a3.y_y a3.z_y a3.w_y + * a2.x_c a2.y_c a2.z_c a2.w_c a3.x_c a3.y_c a3.z_c a3.w_c + * + * The WM gets programmed as if it was reading from the else + * block below. + */ + c.prog_data.urb_entry_size = (c.nr_setup_attrs + 3 / 4) * 3; + } else { + /* Transposed writes into URB. The rows look like: + * + * row0: a0.x_x a0.x_y null a0.x_c a0.y_x a0.y_y null a0.y_c + * a0.z_x a0.z_y null a0.z_c a0.w_x a0.w_y null a0.w_c + * + * So we use a whole row per attribute (and since we write + * two-attribute groups to the URB, align the size in case the + * disabled last attribute actually gets written). + */ + c.prog_data.urb_entry_size = ALIGN(c.nr_setup_attrs, 2); + } c.has_flat_shading = brw_any_flat_varyings(&key->interpolation_mode); /* Which primitive? Or all three? diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c index f03b74143f4..fbf528a8bda 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_emit.c +++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c @@ -41,6 +41,32 @@ #include "brw_util.h" #include "brw_sf.h" +static void +do_urb_write(struct brw_sf_compile *c, int attr_pair, bool last) +{ + struct brw_codegen *p = &c->func; + const struct brw_device_info *devinfo = p->devinfo; + uint32_t offset, swizzle; + + if (devinfo->has_transposed_read) { + offset = attr_pair * 3; + swizzle = BRW_URB_SWIZZLE_NONE; + } else { + offset = attr_pair * 4; + swizzle = BRW_URB_SWIZZLE_TRANSPOSE; + } + + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), /* r0, will be copied to m0 */ + last ? BRW_URB_WRITE_EOT_COMPLETE + : BRW_URB_WRITE_NO_FLAGS, + 4, /* msg len */ + 0, /* response len */ + offset, + swizzle); +} /** * Determine the vue slot corresponding to the given half of the given register. @@ -482,16 +508,7 @@ void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate) /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in * the send instruction: */ - brw_urb_WRITE(p, - brw_null_reg(), - 0, - brw_vec8_grf(0, 0), /* r0, will be copied to m0 */ - last ? BRW_URB_WRITE_EOT_COMPLETE - : BRW_URB_WRITE_NO_FLAGS, - 4, /* msg len */ - 0, /* response len */ - i*4, /* offset */ - BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */ + do_urb_write(c, i, last); } } @@ -556,16 +573,7 @@ void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate) /* Copy m0..m3 to URB. */ - brw_urb_WRITE(p, - brw_null_reg(), - 0, - brw_vec8_grf(0, 0), - last ? BRW_URB_WRITE_EOT_COMPLETE - : BRW_URB_WRITE_NO_FLAGS, - 4, /* msg len */ - 0, /* response len */ - i*4, /* urb destination offset */ - BRW_URB_SWIZZLE_TRANSPOSE); + do_urb_write(c, i, last); } } @@ -646,16 +654,7 @@ void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate) set_predicate_control_flag_value(p, c, pc); /* Copy m0..m3 to URB. */ - brw_urb_WRITE(p, - brw_null_reg(), - 0, - brw_vec8_grf(0, 0), - last ? BRW_URB_WRITE_EOT_COMPLETE - : BRW_URB_WRITE_NO_FLAGS, - 4, /* msg len */ - 0, /* response len */ - i*4, /* urb destination offset */ - BRW_URB_SWIZZLE_TRANSPOSE); + do_urb_write(c, i, last); } brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); @@ -707,16 +706,7 @@ void brw_emit_point_setup(struct brw_sf_compile *c, bool allocate) /* Copy m0..m3 to URB. */ - brw_urb_WRITE(p, - brw_null_reg(), - 0, - brw_vec8_grf(0, 0), - last ? BRW_URB_WRITE_EOT_COMPLETE - : BRW_URB_WRITE_NO_FLAGS, - 4, /* msg len */ - 0, /* response len */ - i*4, /* urb destination offset */ - BRW_URB_SWIZZLE_TRANSPOSE); + do_urb_write(c, i, last); } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 6bf0a55e418..ab39baf5f95 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -206,6 +206,8 @@ brw_upload_wm_unit(struct brw_context *brw) /* _NEW_POLYGONSTIPPLE */ wm->wm5.polygon_stipple = ctx->Polygon.StippleFlag; + if (brw->has_transposed_read) + wm->wm5.transposed_urb_read_enable = 1; /* _NEW_POLYGON */ if (ctx->Polygon.OffsetFill) { |