diff options
author | Topi Pohjolainen <topi.pohjolainen@intel.com> | 2016-11-08 14:19:15 +0200 |
---|---|---|
committer | Topi Pohjolainen <topi.pohjolainen@intel.com> | 2016-11-25 16:57:07 +0200 |
commit | 544ed74315954919708da8d43263a8ff4f1d0f21 (patch) | |
tree | d39cef20403192d62dd5987e1fad0c9c985dad14 | |
parent | 96dbe765e1c06566a50393714dcbe86ebde5195a (diff) |
i965: Use ISL for CCS layouts
One can now also delete intel_get_non_msrt_mcs_alignment().
v2 (Jason): Do not leak aux buf but allocate only after getting
ISL surfaces.
Signed-off-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 138 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 4 |
2 files changed, 38 insertions, 104 deletions
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index d629901bf3..d061cd43f5 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -101,66 +101,6 @@ compute_msaa_layout(struct brw_context *brw, mesa_format format, } } - -/** - * For single-sampled render targets ("non-MSRT"), the MCS buffer is a - * scaled-down bitfield representation of the color buffer which is capable of - * recording when blocks of the color buffer are equal to the clear value. - * This function returns the block size that will be used by the MCS buffer - * corresponding to a certain color miptree. - * - * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)", - * beneath the "Fast Color Clear" bullet (p327): - * - * The following table describes the RT alignment - * - * Pixels Lines - * TiledY RT CL - * bpp - * 32 8 4 - * 64 4 4 - * 128 2 4 - * TiledX RT CL - * bpp - * 32 16 2 - * 64 8 2 - * 128 4 2 - * - * This alignment has the following uses: - * - * - For figuring out the size of the MCS buffer. Each 4k tile in the MCS - * buffer contains 128 blocks horizontally and 256 blocks vertically. - * - * - For figuring out alignment restrictions for a fast clear operation. Fast - * clear operations must always clear aligned multiples of 16 blocks - * horizontally and 32 blocks vertically. - * - * - For scaling down the coordinates sent through the render pipeline during - * a fast clear. X coordinates must be scaled down by 8 times the block - * width, and Y coordinates by 16 times the block height. - * - * - For scaling down the coordinates sent through the render pipeline during - * a "Render Target Resolve" operation. X coordinates must be scaled down - * by half the block width, and Y coordinates by half the block height. - */ -void -intel_get_non_msrt_mcs_alignment(const struct intel_mipmap_tree *mt, - unsigned *width_px, unsigned *height) -{ - switch (mt->tiling) { - default: - unreachable("Non-MSRT MCS requires X or Y tiling"); - /* In release builds, fall through */ - case I915_TILING_Y: - *width_px = 32 / mt->cpp; - *height = 4; - break; - case I915_TILING_X: - *width_px = 64 / mt->cpp; - *height = 2; - } -} - bool intel_tiling_supports_non_msrt_mcs(const struct brw_context *brw, unsigned tiling) @@ -1654,55 +1594,53 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw, assert(!mt->disable_aux_buffers); assert(!mt->no_ccs); - /* The format of the MCS buffer is opaque to the driver; all that matters - * is that we get its size and pitch right. We'll pretend that the format - * is R32. Since an MCS tile covers 128 blocks horizontally, and a Y-tiled - * R32 buffer is 32 pixels across, we'll need to scale the width down by - * the block width and then a further factor of 4. Since an MCS tile - * covers 256 blocks vertically, and a Y-tiled R32 buffer is 32 rows high, - * we'll need to scale the height down by the block height and then a - * further factor of 8. - */ - const mesa_format format = MESA_FORMAT_R_UINT32; - unsigned block_width_px; - unsigned block_height; - intel_get_non_msrt_mcs_alignment(mt, &block_width_px, &block_height); - unsigned width_divisor = block_width_px * 4; - unsigned height_divisor = block_height * 8; - - /* The Skylake MCS is twice as tall as the Broadwell MCS. - * - * In pre-Skylake, each bit in the MCS contained the state of 2 cachelines - * in the main surface. In Skylake, it's two bits. The extra bit - * doubles the MCS height, not width, because in Skylake the MCS is always - * Y-tiled. + struct isl_surf temp_main_surf; + struct isl_surf temp_ccs_surf; + + /* Create first an ISL presentation for the main color surface and let ISL + * calculate equivalent CCS surface against it. */ - if (brw->gen >= 9) - height_divisor /= 2; + intel_miptree_get_isl_surf(brw, mt, &temp_main_surf); + if (!isl_surf_get_ccs_surf(&brw->isl_dev, &temp_main_surf, &temp_ccs_surf)) + return false; + + assert(temp_ccs_surf.size && + (temp_ccs_surf.size % temp_ccs_surf.row_pitch == 0)); + + struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1); + if (!buf) + return false; - unsigned mcs_width = - ALIGN(mt->logical_width0, width_divisor) / width_divisor; - unsigned mcs_height = - ALIGN(mt->logical_height0, height_divisor) / height_divisor; - assert(mt->logical_depth0 == 1); + buf->size = temp_ccs_surf.size; + buf->pitch = temp_ccs_surf.row_pitch; + buf->qpitch = isl_surf_get_array_pitch_sa_rows(&temp_ccs_surf); - uint32_t layout_flags = - (brw->gen >= 8) ? MIPTREE_LAYOUT_FORCE_HALIGN16 : 0; /* In case of compression mcs buffer needs to be initialised requiring the * buffer to be immediately mapped to cpu space for writing. Therefore do * not use the gpu access flag which can cause an unnecessary delay if the * backing pages happened to be just used by the GPU. */ - if (!is_lossless_compressed) - layout_flags |= MIPTREE_LAYOUT_ACCELERATED_UPLOAD; - - mt->mcs_buf = intel_mcs_miptree_buf_create(brw, mt, - format, - mcs_width, - mcs_height, - layout_flags); - if (!mt->mcs_buf) + const uint32_t alloc_flags = + is_lossless_compressed ? 0 : BO_ALLOC_FOR_RENDER; + uint32_t tiling = I915_TILING_Y; + unsigned long pitch; + + /* ISL has stricter set of alignment rules then the drm allocator. + * Therefore one can pass the ISL dimensions in terms of bytes instead of + * trying to recalculate based on different format block sizes. + */ + buf->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "ccs-miptree", + buf->pitch, buf->size / buf->pitch, + 1, &tiling, &pitch, alloc_flags); + if (buf->bo) { + assert(pitch == buf->pitch); + assert(tiling == I915_TILING_Y); + } else { + free(buf); return false; + } + + mt->mcs_buf = buf; /* From Gen9 onwards single-sampled (non-msrt) auxiliary buffers are * used for lossless compression which requires similar initialisation diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index 29d0e2fac6..297d45f751 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -658,10 +658,6 @@ struct intel_mipmap_tree GLuint refcount; }; -void -intel_get_non_msrt_mcs_alignment(const struct intel_mipmap_tree *mt, - unsigned *width_px, unsigned *height); - bool intel_miptree_is_lossless_compressed(const struct brw_context *brw, const struct intel_mipmap_tree *mt); |