diff options
author | Keith Whitwell <keithw@vmware.com> | 2010-07-16 14:40:30 +0100 |
---|---|---|
committer | José Fonseca <jfonseca@vmware.com> | 2010-07-16 17:24:21 +0100 |
commit | 2f6d47a7c8d6e69e5154de44115aab9ba35a41d2 (patch) | |
tree | 8919f6686b6f7db8aa3d3305084076d8884afb15 | |
parent | b7fff13d58b57870807bae2f43fa2854b551b267 (diff) |
llvmpipe: use single swizzled tile
Use a single swizzled tile per colorbuf (and per thread) to avoid
accumulating large amounts of cached swizzled data.
Now that the SSE3 code has been merged to master, the performance delta
of this change is minimal, the main benefit is reduced memory usage
due to no longer keeping swizzled copies of render targets.
It's clear from the performance of the in-place version of this code
that there is still quite a bit of time being spent swizzling &
unswizzling, but it's not clear exactly how to reduce that.
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_memory.c | 41 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_memory.h | 13 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast.c | 55 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast_priv.h | 24 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_setup.c | 16 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_texture.c | 88 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_texture.h | 11 |
7 files changed, 148 insertions, 100 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_memory.c b/src/gallium/drivers/llvmpipe/lp_memory.c index 61d16668eb95..0f55d4a80ae7 100644 --- a/src/gallium/drivers/llvmpipe/lp_memory.c +++ b/src/gallium/drivers/llvmpipe/lp_memory.c @@ -29,32 +29,17 @@ #include "lp_limits.h" #include "lp_memory.h" - -/** 32bpp RGBA dummy tile to use in out of memory conditions */ -static PIPE_ALIGN_VAR(16) uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4]; - -static unsigned lp_out_of_memory = 0; - - -uint8_t * -lp_get_dummy_tile(void) -{ - if (lp_out_of_memory++ < 10) { - debug_printf("llvmpipe: out of memory. Using dummy tile memory.\n"); - } - return lp_dummy_tile; -} - -uint8_t * -lp_get_dummy_tile_silent(void) -{ - return lp_dummy_tile; -} - - -boolean -lp_is_dummy_tile(void *tile) -{ - return tile == lp_dummy_tile; -} +/** + * 32bpp RGBA swizzled tiles. One for for each thread and each + * possible colorbuf. Adds up to quite a bit 8*8*64*64*4 == 1MB. + * Several schemes exist to reduce this, such as scaling back the + * number of threads or using a smaller tilesize when multiple + * colorbuffers are bound. + */ +PIPE_ALIGN_VAR(16) uint8_t lp_swizzled_cbuf[LP_MAX_THREADS][PIPE_MAX_COLOR_BUFS][TILE_SIZE * TILE_SIZE * 4]; + + +/* A single dummy tile used in a couple of out-of-memory situations. + */ +PIPE_ALIGN_VAR(16) uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4]; diff --git a/src/gallium/drivers/llvmpipe/lp_memory.h b/src/gallium/drivers/llvmpipe/lp_memory.h index 1d0e5ebdb695..f7418f5e0871 100644 --- a/src/gallium/drivers/llvmpipe/lp_memory.h +++ b/src/gallium/drivers/llvmpipe/lp_memory.h @@ -30,16 +30,11 @@ #include "pipe/p_compiler.h" +#include "pipe/p_state.h" +#include "lp_limits.h" +extern PIPE_ALIGN_VAR(16) uint8_t lp_swizzled_cbuf[LP_MAX_THREADS][PIPE_MAX_COLOR_BUFS][TILE_SIZE * TILE_SIZE * 4]; -extern uint8_t * -lp_get_dummy_tile(void); - -uint8_t * -lp_get_dummy_tile_silent(void); - -extern boolean -lp_is_dummy_tile(void *tile); - +extern PIPE_ALIGN_VAR(16) uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4]; #endif /* LP_MEMORY_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index a023d2b668ee..654f4ea48ebe 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -67,7 +67,7 @@ lp_rast_begin( struct lp_rasterizer *rast, cbuf->level, cbuf->zslice, LP_TEX_USAGE_READ_WRITE, - LP_TEX_LAYOUT_NONE); + LP_TEX_LAYOUT_LINEAR); } if (fb->zsbuf) { @@ -271,11 +271,6 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, dst = task->depth_tile; - if (lp_is_dummy_tile(dst)) - return; - - assert(dst == lp_rast_get_depth_block_pointer(task, task->x, task->y)); - switch (block_size) { case 1: memset(dst, (uint8_t) clear_value, height * width); @@ -375,10 +370,15 @@ lp_rast_store_linear_color( struct lp_rasterizer_task *task, struct pipe_surface *cbuf = scene->fb.cbufs[buf]; const unsigned face = cbuf->face, level = cbuf->level; struct llvmpipe_resource *lpt = llvmpipe_resource(cbuf->texture); - /* this will convert the tiled data to linear if needed */ - (void) llvmpipe_get_texture_tile_linear(lpt, face, level, - LP_TEX_USAGE_READ, - task->x, task->y); + + if (!task->color_tiles[buf]) + continue; + + llvmpipe_unswizzle_cbuf_tile(lpt, + face, + level, + task->x, task->y, + task->color_tiles[buf]); } } @@ -589,6 +589,11 @@ lp_rast_tile_end(struct lp_rasterizer_task *task) (void) outline_subtiles; #endif + { + union lp_rast_cmd_arg dummy = {0}; + lp_rast_store_linear_color(task, dummy); + } + /* debug */ memset(task->color_tiles, 0, sizeof(task->color_tiles)); task->depth_tile = NULL; @@ -751,30 +756,8 @@ debug_bin( const struct cmd_bin *bin ) static boolean is_empty_bin( const struct cmd_bin *bin ) { - const struct cmd_block *head = bin->commands.head; - int i; - - if (0) - debug_bin(bin); - - /* We emit at most two load-tile commands at the start of the first - * command block. In addition we seem to emit a couple of - * set-state commands even in empty bins. - * - * As a heuristic, if a bin has more than 4 commands, consider it - * non-empty. - */ - if (head->next != NULL || - head->count > 4) { - return FALSE; - } - - for (i = 0; i < head->count; i++) - if (head->cmd[i] != lp_rast_store_linear_color) { - return FALSE; - } - - return TRUE; + if (0) debug_bin(bin); + return bin->commands.head->count == 0; } @@ -984,6 +967,10 @@ lp_rast_create( unsigned num_threads ) /* for synchronizing rasterization threads */ pipe_barrier_init( &rast->barrier, rast->num_threads ); + memset(lp_swizzled_cbuf, 0, sizeof lp_swizzled_cbuf); + + memset(lp_dummy_tile, 0, sizeof lp_dummy_tile); + return rast; } diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 8044927c8be2..b4a48cfd0244 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -148,7 +148,7 @@ lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task, * the oom warning as this most likely because there is no * zsbuf. */ - return lp_get_dummy_tile_silent(); + return lp_dummy_tile; } depth = (rast->zsbuf.map + @@ -178,15 +178,14 @@ lp_rast_get_color_tile_pointer(struct lp_rasterizer_task *task, struct llvmpipe_resource *lpt; assert(cbuf); lpt = llvmpipe_resource(cbuf->texture); - task->color_tiles[buf] = llvmpipe_get_texture_tile(lpt, - cbuf->face + cbuf->zslice, - cbuf->level, - usage, - task->x, - task->y); - if (!task->color_tiles[buf]) { - /* out of memory - use dummy tile memory */ - return lp_get_dummy_tile(); + task->color_tiles[buf] = lp_swizzled_cbuf[task->thread_index][buf]; + + if (usage != LP_TEX_USAGE_WRITE_ALL) { + llvmpipe_swizzle_cbuf_tile(lpt, + cbuf->face + cbuf->zslice, + cbuf->level, + task->x, task->y, + task->color_tiles[buf]); } } @@ -212,10 +211,7 @@ lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task, assert((y % TILE_VECTOR_HEIGHT) == 0); color = lp_rast_get_color_tile_pointer(task, buf, LP_TEX_USAGE_READ_WRITE); - if (!color) { - /* out of memory - use dummy tile memory */ - return lp_get_dummy_tile(); - } + assert(color); px = x % TILE_SIZE; py = y % TILE_SIZE; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 7d48ad8e7417..556e571585dc 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -280,20 +280,6 @@ lp_setup_flush( struct lp_setup_context *setup, LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); if (setup->scene) { - struct lp_scene *scene = lp_setup_get_current_scene(setup); - union lp_rast_cmd_arg dummy = {0}; - - if (flags & (PIPE_FLUSH_SWAPBUFFERS | - PIPE_FLUSH_FRAME)) { - /* Store colors in the linear color buffer(s). - * If we don't do this here, we'll end up converting the tiled - * data to linear in the texture_unmap() function, which will - * not be a parallel/threaded operation as here. - */ - lp_scene_bin_everywhere(scene, lp_rast_store_linear_color, dummy); - } - - if (fence) { /* if we're going to flush the setup/rasterization modules, emit * a fence. @@ -642,7 +628,7 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, if (!jit_tex->data[j]) { /* out of memory - use dummy tile memory */ - jit_tex->data[j] = lp_get_dummy_tile(); + jit_tex->data[j] = lp_dummy_tile; jit_tex->width = TILE_SIZE; jit_tex->height = TILE_SIZE; jit_tex->depth = 1; diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index d236bad69d18..bbd834519a3c 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -1209,6 +1209,94 @@ llvmpipe_get_texture_tile(struct llvmpipe_resource *lpr, /** + * Get pointer to tiled data for rendering. + * \return pointer to the tiled data at the given tile position + */ +void +llvmpipe_unswizzle_cbuf_tile(struct llvmpipe_resource *lpr, + unsigned face_slice, unsigned level, + unsigned x, unsigned y, + uint8_t *tile) +{ + struct llvmpipe_texture_image *linear_img = &lpr->linear[level]; + const unsigned tx = x / TILE_SIZE, ty = y / TILE_SIZE; + uint8_t *linear_image; + + assert(x % TILE_SIZE == 0); + assert(y % TILE_SIZE == 0); + + if (!linear_img->data) { + /* allocate memory for the linear image now */ + alloc_image_data(lpr, level, LP_TEX_LAYOUT_LINEAR); + } + + /* compute address of the slice/face of the image that contains the tile */ + linear_image = llvmpipe_get_texture_image_address(lpr, face_slice, level, + LP_TEX_LAYOUT_LINEAR); + + { + uint ii = x, jj = y; + uint tile_offset = jj / TILE_SIZE + ii / TILE_SIZE; + uint byte_offset = tile_offset * TILE_SIZE * TILE_SIZE * 4; + + /* Note that lp_tiled_to_linear expects the tile parameter to + * point at the first tile in a whole-image sized array. In + * this code, we have only a single tile and have to do some + * pointer arithmetic to figure out where the "image" would have + * started. + */ + lp_tiled_to_linear(tile - byte_offset, linear_image, + x, y, TILE_SIZE, TILE_SIZE, + lpr->base.format, + lpr->row_stride[level], + 1); /* tiles per row */ + } + + llvmpipe_set_texture_tile_layout(lpr, face_slice, level, tx, ty, + LP_TEX_LAYOUT_LINEAR); +} + + +/** + * Get pointer to tiled data for rendering. + * \return pointer to the tiled data at the given tile position + */ +void +llvmpipe_swizzle_cbuf_tile(struct llvmpipe_resource *lpr, + unsigned face_slice, unsigned level, + unsigned x, unsigned y, + uint8_t *tile) +{ + uint8_t *linear_image; + + assert(x % TILE_SIZE == 0); + assert(y % TILE_SIZE == 0); + + /* compute address of the slice/face of the image that contains the tile */ + linear_image = llvmpipe_get_texture_image_address(lpr, face_slice, level, + LP_TEX_LAYOUT_LINEAR); + + if (linear_image) { + uint ii = x, jj = y; + uint tile_offset = jj / TILE_SIZE + ii / TILE_SIZE; + uint byte_offset = tile_offset * TILE_SIZE * TILE_SIZE * 4; + + /* Note that lp_linear_to_tiled expects the tile parameter to + * point at the first tile in a whole-image sized array. In + * this code, we have only a single tile and have to do some + * pointer arithmetic to figure out where the "image" would have + * started. + */ + lp_linear_to_tiled(linear_image, tile - byte_offset, + x, y, TILE_SIZE, TILE_SIZE, + lpr->base.format, + lpr->row_stride[level], + 1); /* tiles per row */ + } +} + + +/** * Return size of resource in bytes */ unsigned diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h index 503b6a19a8d5..4e4a65dcb401 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.h +++ b/src/gallium/drivers/llvmpipe/lp_texture.h @@ -223,6 +223,17 @@ llvmpipe_get_texture_tile(struct llvmpipe_resource *lpr, unsigned x, unsigned y); +void +llvmpipe_unswizzle_cbuf_tile(struct llvmpipe_resource *lpr, + unsigned face_slice, unsigned level, + unsigned x, unsigned y, + uint8_t *tile); + +void +llvmpipe_swizzle_cbuf_tile(struct llvmpipe_resource *lpr, + unsigned face_slice, unsigned level, + unsigned x, unsigned y, + uint8_t *tile); extern void llvmpipe_print_resources(void); |