diff options
-rw-r--r-- | src/gallium/auxiliary/Makefile | 1 | ||||
-rw-r--r-- | src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 332 | ||||
-rw-r--r-- | src/gallium/auxiliary/vl/vl_mpeg12_decoder.h | 10 | ||||
-rw-r--r-- | src/gallium/auxiliary/vl/vl_vertex_buffers.c | 9 | ||||
-rw-r--r-- | src/gallium/auxiliary/vl/vl_vertex_buffers.h | 6 | ||||
-rw-r--r-- | src/gallium/auxiliary/vl/vl_zscan.c | 492 | ||||
-rw-r--r-- | src/gallium/auxiliary/vl/vl_zscan.h | 110 |
7 files changed, 849 insertions, 111 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index d210a25510..aa1b255c0c 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -152,6 +152,7 @@ C_SOURCES = \ vl/vl_mpeg12_decoder.c \ vl/vl_compositor.c \ vl/vl_csc.c \ + vl/vl_zscan.c \ vl/vl_idct.c \ vl/vl_mc.c \ vl/vl_vertex_buffers.c \ diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c index f262c13e0f..3b1d26d3db 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c @@ -44,6 +44,14 @@ static const unsigned const_empty_block_mask_420[3][2][2] = { { { 0x01, 0x01 }, { 0x01, 0x01 } } }; +static const enum pipe_format const_zscan_source_formats[] = { + PIPE_FORMAT_R16_SNORM, + PIPE_FORMAT_R16_SSCALED +}; + +static const unsigned num_zscan_source_formats = + sizeof(const_zscan_source_formats) / sizeof(enum pipe_format); + static const enum pipe_format const_idct_source_formats[] = { PIPE_FORMAT_R16G16B16A16_SNORM, PIPE_FORMAT_R16G16B16A16_SSCALED @@ -79,10 +87,8 @@ map_buffers(struct vl_mpeg12_decoder *ctx, struct vl_mpeg12_buffer *buffer) assert(ctx && buffer); - if (ctx->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) - sampler_views = buffer->idct_source->get_sampler_views(buffer->idct_source); - else - sampler_views = buffer->mc_source->get_sampler_views(buffer->mc_source); + sampler_views = buffer->zscan_source->get_sampler_views(buffer->zscan_source); + assert(sampler_views); for (i = 0; i < VL_MAX_PLANES; ++i) { @@ -112,21 +118,17 @@ upload_block(struct vl_mpeg12_buffer *buffer, unsigned plane, unsigned x, unsigned y, short *block, bool intra, enum pipe_mpeg12_dct_type type) { - unsigned tex_pitch; short *texels; - - unsigned i; + unsigned idx; assert(buffer); assert(block); - vl_vb_add_ycbcr(&buffer->vertex_stream, plane, x, y, intra, type); + idx = vl_vb_add_ycbcr(&buffer->vertex_stream, plane, x, y, intra, type); - tex_pitch = buffer->tex_transfer[plane]->stride / sizeof(short); - texels = buffer->texels[plane] + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH; + texels = buffer->texels[plane] + idx * BLOCK_WIDTH * BLOCK_HEIGHT; - for (i = 0; i < BLOCK_HEIGHT; ++i) - memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * sizeof(short)); + memcpy(texels, block, BLOCK_WIDTH * BLOCK_HEIGHT * sizeof(short)); } static void @@ -178,6 +180,144 @@ unmap_buffers(struct vl_mpeg12_decoder *ctx, struct vl_mpeg12_buffer *buffer) } } +static bool +init_zscan_buffer(struct vl_mpeg12_buffer *buffer) +{ + enum pipe_format formats[3]; + + struct pipe_sampler_view **source; + struct pipe_surface **destination; + + struct vl_mpeg12_decoder *dec; + + unsigned i; + + assert(buffer); + + dec = (struct vl_mpeg12_decoder*)buffer->base.decoder; + + formats[0] = formats[1] = formats[2] = dec->zscan_source_format; + buffer->zscan_source = vl_video_buffer_init(dec->base.context, dec->pipe, + dec->blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT, + dec->max_blocks / dec->blocks_per_line, + 1, PIPE_VIDEO_CHROMA_FORMAT_444, + formats, PIPE_USAGE_STATIC); + if (!buffer->zscan_source) + goto error_source; + + source = buffer->zscan_source->get_sampler_views(buffer->zscan_source); + if (!source) + goto error_sampler; + + if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) + destination = buffer->idct_source->get_surfaces(buffer->idct_source); + else + destination = buffer->mc_source->get_surfaces(buffer->mc_source); + + if (!destination) + goto error_surface; + + for (i = 0; i < VL_MAX_PLANES; ++i) + if (!vl_zscan_init_buffer(i == 0 ? &dec->zscan_y : &dec->zscan_c, + &buffer->zscan[i], source[i], destination[i])) + goto error_plane; + + return true; + +error_plane: + for (; i > 0; --i) + vl_zscan_cleanup_buffer(&buffer->zscan[i - 1]); + +error_surface: +error_sampler: + buffer->zscan_source->destroy(buffer->zscan_source); + +error_source: + return false; +} + +static void +cleanup_zscan_buffer(struct vl_mpeg12_buffer *buffer) +{ + unsigned i; + + assert(buffer); + + for (i = 0; i < VL_MAX_PLANES; ++i) + vl_zscan_cleanup_buffer(&buffer->zscan[i]); + buffer->zscan_source->destroy(buffer->zscan_source); +} + +static bool +init_idct_buffer(struct vl_mpeg12_buffer *buffer) +{ + enum pipe_format formats[3]; + + struct pipe_sampler_view **idct_source_sv, **idct_intermediate_sv; + struct pipe_surface **idct_surfaces; + + struct vl_mpeg12_decoder *dec; + + unsigned i; + + assert(buffer); + + dec = (struct vl_mpeg12_decoder*)buffer->base.decoder; + + formats[0] = formats[1] = formats[2] = dec->idct_source_format; + buffer->idct_source = vl_video_buffer_init(dec->base.context, dec->pipe, + dec->base.width / 4, dec->base.height, 1, + dec->base.chroma_format, + formats, PIPE_USAGE_STATIC); + if (!buffer->idct_source) + goto error_source; + + formats[0] = formats[1] = formats[2] = dec->idct_intermediate_format; + buffer->idct_intermediate = vl_video_buffer_init(dec->base.context, dec->pipe, + dec->base.width / dec->nr_of_idct_render_targets, + dec->base.height / 4, dec->nr_of_idct_render_targets, + dec->base.chroma_format, + formats, PIPE_USAGE_STATIC); + + if (!buffer->idct_intermediate) + goto error_intermediate; + + idct_source_sv = buffer->idct_source->get_sampler_views(buffer->idct_source); + if (!idct_source_sv) + goto error_source_sv; + + idct_intermediate_sv = buffer->idct_intermediate->get_sampler_views(buffer->idct_intermediate); + if (!idct_intermediate_sv) + goto error_intermediate_sv; + + idct_surfaces = buffer->mc_source->get_surfaces(buffer->mc_source); + if (!idct_surfaces) + goto error_surfaces; + + for (i = 0; i < 3; ++i) + if (!vl_idct_init_buffer(i == 0 ? &dec->idct_y : &dec->idct_c, + &buffer->idct[i], idct_source_sv[i], + idct_intermediate_sv[i], idct_surfaces[i])) + goto error_plane; + + return true; + +error_plane: + for (; i > 0; --i) + vl_idct_cleanup_buffer(i == 1 ? &dec->idct_c : &dec->idct_y, &buffer->idct[i - 1]); + +error_surfaces: +error_intermediate_sv: +error_source_sv: + buffer->idct_intermediate->destroy(buffer->idct_intermediate); + +error_intermediate: + buffer->idct_source->destroy(buffer->idct_source); + +error_source: + return false; +} + static void cleanup_idct_buffer(struct vl_mpeg12_buffer *buf) { @@ -187,11 +327,11 @@ cleanup_idct_buffer(struct vl_mpeg12_buffer *buf) dec = (struct vl_mpeg12_decoder*)buf->base.decoder; assert(dec); - buf->idct_source->destroy(buf->idct_source); - buf->idct_intermediate->destroy(buf->idct_intermediate); vl_idct_cleanup_buffer(&dec->idct_y, &buf->idct[0]); vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[1]); vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[2]); + buf->idct_source->destroy(buf->idct_source); + buf->idct_intermediate->destroy(buf->idct_intermediate); } static void @@ -206,6 +346,8 @@ vl_mpeg12_buffer_destroy(struct pipe_video_decode_buffer *buffer) dec = (struct vl_mpeg12_decoder*)buf->base.decoder; assert(dec); + cleanup_zscan_buffer(buf); + if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) cleanup_idct_buffer(buf); @@ -310,6 +452,9 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder) vl_idct_cleanup(&dec->idct_c); } + vl_zscan_cleanup(&dec->zscan_y); + vl_zscan_cleanup(&dec->zscan_c); + dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves_ycbcr); dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves_mv); @@ -319,76 +464,6 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder) FREE(dec); } -static bool -init_idct_buffer(struct vl_mpeg12_buffer *buffer) -{ - enum pipe_format formats[3]; - - struct pipe_sampler_view **idct_source_sv, **idct_intermediate_sv; - struct pipe_surface **idct_surfaces; - - struct vl_mpeg12_decoder *dec; - - unsigned i; - - assert(buffer); - - dec = (struct vl_mpeg12_decoder*)buffer->base.decoder; - - formats[0] = formats[1] = formats[2] = dec->idct_source_format; - buffer->idct_source = vl_video_buffer_init(dec->base.context, dec->pipe, - dec->base.width / 4, dec->base.height, 1, - dec->base.chroma_format, - formats, PIPE_USAGE_STREAM); - if (!buffer->idct_source) - goto error_source; - - formats[0] = formats[1] = formats[2] = dec->idct_intermediate_format; - buffer->idct_intermediate = vl_video_buffer_init(dec->base.context, dec->pipe, - dec->base.width / dec->nr_of_idct_render_targets, - dec->base.height / 4, dec->nr_of_idct_render_targets, - dec->base.chroma_format, - formats, PIPE_USAGE_STATIC); - - if (!buffer->idct_intermediate) - goto error_intermediate; - - idct_source_sv = buffer->idct_source->get_sampler_views(buffer->idct_source); - if (!idct_source_sv) - goto error_source_sv; - - idct_intermediate_sv = buffer->idct_intermediate->get_sampler_views(buffer->idct_intermediate); - if (!idct_intermediate_sv) - goto error_intermediate_sv; - - idct_surfaces = buffer->mc_source->get_surfaces(buffer->mc_source); - if (!idct_surfaces) - goto error_surfaces; - - for (i = 0; i < 3; ++i) - if (!vl_idct_init_buffer(i == 0 ? &dec->idct_y : &dec->idct_c, - &buffer->idct[i], idct_source_sv[i], - idct_intermediate_sv[i], idct_surfaces[i])) - goto error_plane; - - return true; - -error_plane: - for (; i > 0; --i) - vl_idct_cleanup_buffer(i == 1 ? &dec->idct_c : &dec->idct_y, &buffer->idct[i - 1]); - -error_surfaces: -error_intermediate_sv: -error_source_sv: - buffer->idct_intermediate->destroy(buffer->idct_intermediate); - -error_intermediate: - buffer->idct_source->destroy(buffer->idct_source); - -error_source: - return false; -} - static struct pipe_video_decode_buffer * vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder) { @@ -426,10 +501,6 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder) if (!buffer->mc_source) goto error_mc_source; - if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) - if (!init_idct_buffer(buffer)) - goto error_idct; - mc_source_sv = buffer->mc_source->get_sampler_views(buffer->mc_source); if (!mc_source_sv) goto error_mc_source_sv; @@ -443,8 +514,18 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder) if(!vl_mc_init_buffer(&dec->mc_c, &buffer->mc[2], mc_source_sv[2])) goto error_mc_cr; + if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) + if (!init_idct_buffer(buffer)) + goto error_idct; + + if (!init_zscan_buffer(buffer)) + goto error_zscan; + return &buffer->base; +error_zscan: + // TODO Cleanup error handling + error_mc_cr: vl_mc_cleanup_buffer(&buffer->mc[1]); @@ -517,6 +598,8 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer, vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, i); dec->pipe->set_vertex_buffers(dec->pipe, 2, vb); + vl_zscan_render(&buf->zscan[i] , num_instances); + if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) vl_idct_flush(i == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[i], num_instances); @@ -590,9 +673,47 @@ find_first_supported_format(struct vl_mpeg12_decoder *dec, } static bool -init_idct(struct vl_mpeg12_decoder *dec, unsigned buffer_width, unsigned buffer_height) +init_zscan(struct vl_mpeg12_decoder *dec) +{ + struct pipe_sampler_view *layout; + + unsigned num_channels; + + assert(dec); + + dec->blocks_per_line = 4; + dec->max_blocks = + (dec->base.width * dec->base.height) / + (BLOCK_WIDTH * BLOCK_HEIGHT); + + dec->zscan_source_format = find_first_supported_format(dec, const_zscan_source_formats, + num_zscan_source_formats, PIPE_TEXTURE_2D); + + if (dec->zscan_source_format == PIPE_FORMAT_NONE) + return false; + + layout = vl_zscan_linear(dec->pipe, dec->blocks_per_line); + + num_channels = dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT ? 4 : 1; + + if (!vl_zscan_init(&dec->zscan_y, dec->pipe, dec->base.width, dec->base.height, + dec->blocks_per_line, dec->max_blocks, num_channels)) + return false; + + vl_zscan_set_layout(&dec->zscan_y, layout); + + if (!vl_zscan_init(&dec->zscan_c, dec->pipe, dec->chroma_width, dec->chroma_height, + dec->blocks_per_line, dec->max_blocks, num_channels)) + return false; + + vl_zscan_set_layout(&dec->zscan_c, layout); + + return true; +} + +static bool +init_idct(struct vl_mpeg12_decoder *dec) { - unsigned chroma_width, chroma_height; struct pipe_sampler_view *matrix, *transpose; float matrix_scale, transpose_scale; @@ -645,22 +766,11 @@ init_idct(struct vl_mpeg12_decoder *dec, unsigned buffer_width, unsigned buffer_ } else pipe_sampler_view_reference(&transpose, matrix); - if (!vl_idct_init(&dec->idct_y, dec->pipe, buffer_width, buffer_height, + if (!vl_idct_init(&dec->idct_y, dec->pipe, dec->base.width, dec->base.height, dec->nr_of_idct_render_targets, matrix, transpose)) goto error_y; - if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) { - chroma_width = buffer_width / 2; - chroma_height = buffer_height / 2; - } else if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) { - chroma_width = buffer_width; - chroma_height = buffer_height / 2; - } else { - chroma_width = buffer_width; - chroma_height = buffer_height; - } - - if(!vl_idct_init(&dec->idct_c, dec->pipe, chroma_width, chroma_height, + if(!vl_idct_init(&dec->idct_c, dec->pipe, dec->chroma_width, dec->chroma_height, dec->nr_of_idct_render_targets, matrix, transpose)) goto error_c; @@ -736,8 +846,22 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context, if (dec->mc_source_format == PIPE_FORMAT_NONE) return NULL; + if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) { + dec->chroma_width = dec->base.width / 2; + dec->chroma_height = dec->base.height / 2; + } else if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) { + dec->chroma_width = dec->base.width; + dec->chroma_height = dec->base.height / 2; + } else { + dec->chroma_width = dec->base.width; + dec->chroma_height = dec->base.height; + } + + if (!init_zscan(dec)) + return NULL; // TODO error handling + if (entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) { - if (!init_idct(dec, dec->base.width, dec->base.height)) + if (!init_idct(dec)) goto error_idct; if (dec->mc_source_format == PIPE_FORMAT_R16_SSCALED) mc_scale = SCALE_FACTOR_SSCALED; diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h index c961e433b5..b94f12a9b7 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h +++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h @@ -30,6 +30,7 @@ #include <pipe/p_video_context.h> +#include "vl_zscan.h" #include "vl_idct.h" #include "vl_mc.h" @@ -44,9 +45,15 @@ struct vl_mpeg12_decoder struct pipe_video_decoder base; struct pipe_context *pipe; + unsigned chroma_width, chroma_height; + + unsigned blocks_per_line; + unsigned max_blocks; + const unsigned (*empty_block_mask)[3][2][2]; unsigned nr_of_idct_render_targets; + enum pipe_format zscan_source_format; enum pipe_format idct_source_format; enum pipe_format idct_intermediate_format; enum pipe_format mc_source_format; @@ -57,6 +64,7 @@ struct vl_mpeg12_decoder void *ves_ycbcr; void *ves_mv; + struct vl_zscan zscan_y, zscan_c; struct vl_idct idct_y, idct_c; struct vl_mc mc_y, mc_c; @@ -69,10 +77,12 @@ struct vl_mpeg12_buffer struct vl_vertex_buffer vertex_stream; + struct pipe_video_buffer *zscan_source; struct pipe_video_buffer *idct_source; struct pipe_video_buffer *idct_intermediate; struct pipe_video_buffer *mc_source; + struct vl_zscan_buffer zscan[VL_MAX_PLANES]; struct vl_idct_buffer idct[VL_MAX_PLANES]; struct vl_mc_buffer mc[VL_MAX_PLANES]; diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c index e61425843f..d2025f76b8 100644 --- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c +++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c @@ -301,9 +301,10 @@ vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe) } -void vl_vb_add_ycbcr(struct vl_vertex_buffer *buffer, - unsigned component, unsigned x, unsigned y, - bool intra, enum pipe_mpeg12_dct_type type) +unsigned +vl_vb_add_ycbcr(struct vl_vertex_buffer *buffer, + unsigned component, unsigned x, unsigned y, + bool intra, enum pipe_mpeg12_dct_type type) { struct vl_ycbcr_vertex_stream *stream; @@ -316,7 +317,7 @@ void vl_vb_add_ycbcr(struct vl_vertex_buffer *buffer, stream->intra = intra; stream->field = type == PIPE_MPEG12_DCT_TYPE_FIELD; - buffer->ycbcr[component].num_instances++; + return buffer->ycbcr[component].num_instances++; } unsigned diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h index 6a83111b4a..89d455225a 100644 --- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h +++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h @@ -84,9 +84,9 @@ void vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe); struct pipe_vertex_buffer vl_vb_get_ycbcr(struct vl_vertex_buffer *buffer, int component); -void vl_vb_add_ycbcr(struct vl_vertex_buffer *buffer, - unsigned component, unsigned x, unsigned y, - bool intra, enum pipe_mpeg12_dct_type type); +unsigned vl_vb_add_ycbcr(struct vl_vertex_buffer *buffer, + unsigned component, unsigned x, unsigned y, + bool intra, enum pipe_mpeg12_dct_type type); struct pipe_vertex_buffer vl_vb_get_mv(struct vl_vertex_buffer *buffer, int ref_frame); diff --git a/src/gallium/auxiliary/vl/vl_zscan.c b/src/gallium/auxiliary/vl/vl_zscan.c new file mode 100644 index 0000000000..4d4d3fd6d9 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_zscan.c @@ -0,0 +1,492 @@ +/************************************************************************** + * + * Copyright 2011 Christian König + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <assert.h> + +#include <pipe/p_screen.h> +#include <pipe/p_context.h> + +#include <util/u_draw.h> +#include <util/u_sampler.h> +#include <util/u_inlines.h> + +#include <tgsi/tgsi_ureg.h> + +#include <vl/vl_defines.h> +#include <vl/vl_types.h> + +#include "vl_zscan.h" +#include "vl_vertex_buffers.h" + +enum VS_OUTPUT +{ + VS_O_VPOS, + VS_O_VTEX +}; + +static void * +create_vert_shader(struct vl_zscan *zscan) +{ + struct ureg_program *shader; + + struct ureg_src scale, instance; + struct ureg_src vrect, vpos; + + struct ureg_dst tmp; + struct ureg_dst o_vpos, o_vtex[zscan->num_channels]; + + unsigned i; + + shader = ureg_create(TGSI_PROCESSOR_VERTEX); + if (!shader) + return NULL; + + scale = ureg_imm2f(shader, + (float)BLOCK_WIDTH / zscan->buffer_width, + (float)BLOCK_HEIGHT / zscan->buffer_height); + + instance = ureg_DECL_system_value(shader, 0, TGSI_SEMANTIC_INSTANCEID, 0); + + vrect = ureg_DECL_vs_input(shader, VS_I_RECT); + vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); + + tmp = ureg_DECL_temporary(shader); + + o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); + + for (i = 0; i < zscan->num_channels; ++i) + o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i); + + /* + * o_vpos.xy = (vpos + vrect) * scale + * o_vpos.zw = 1.0f + * + * tmp.xy = InstanceID / blocks_per_line + * tmp.x = frac(tmp.x) + * tmp.y = floor(tmp.y) + * + * o_vtex.x = vrect.x / blocks_per_line + tmp.x + * o_vtex.y = vrect.y + * o_vtex.z = tmp.z * blocks_per_line / blocks_total + */ + ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), vpos, vrect); + ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(tmp), scale); + ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f)); + + ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XZ), instance, + ureg_imm1f(shader, 1.0f / zscan->blocks_per_line)); + + ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(tmp)); + ureg_FLR(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_src(tmp)); + + for (i = 0; i < zscan->num_channels; ++i) { + if (i > 0) + ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(tmp), + ureg_imm1f(shader, 1.0f / (zscan->blocks_per_line * BLOCK_WIDTH))); + + ureg_MAD(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_X), vrect, + ureg_imm1f(shader, 1.0f / zscan->blocks_per_line), ureg_src(tmp)); + ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Y), vrect); + ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), ureg_src(tmp), + ureg_imm1f(shader, (float)zscan->blocks_per_line / zscan->blocks_total)); + } + + ureg_release_temporary(shader, tmp); + ureg_END(shader); + + return ureg_create_shader_and_destroy(shader, zscan->pipe); +} + +static void * +create_frag_shader(struct vl_zscan *zscan) +{ + struct ureg_program *shader; + struct ureg_src vtex[zscan->num_channels]; + + struct ureg_src src, scan, quant; + + struct ureg_dst tmp[zscan->num_channels]; + struct ureg_dst fragment; + + unsigned i; + + shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); + if (!shader) + return NULL; + + for (i = 0; i < zscan->num_channels; ++i) + vtex[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i, TGSI_INTERPOLATE_LINEAR); + + src = ureg_DECL_sampler(shader, 0); + scan = ureg_DECL_sampler(shader, 1); + quant = ureg_DECL_sampler(shader, 2); + + for (i = 0; i < zscan->num_channels; ++i) + tmp[i] = ureg_DECL_temporary(shader); + + fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); + + /* + * tmp.x = tex(vtex, 1) + * tmp.y = vtex.z + * fragment = tex(tmp, 0) * quant + */ + for (i = 0; i < zscan->num_channels; ++i) + ureg_TEX(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_X), TGSI_TEXTURE_2D, vtex[i], scan); + + for (i = 0; i < zscan->num_channels; ++i) + ureg_MOV(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_Y), ureg_scalar(vtex[i], TGSI_SWIZZLE_Z)); + + for (i = 0; i < zscan->num_channels; ++i) + ureg_TEX(shader, tmp[i], TGSI_TEXTURE_2D, ureg_src(tmp[i]), src); + + // TODO: Fetch quant and use it + for (i = 0; i < zscan->num_channels; ++i) + ureg_MUL(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), ureg_src(tmp[i]), ureg_imm1f(shader, 1.0f)); + + for (i = 0; i < zscan->num_channels; ++i) + ureg_release_temporary(shader, tmp[i]); + ureg_END(shader); + + return ureg_create_shader_and_destroy(shader, zscan->pipe); +} + +static bool +init_shaders(struct vl_zscan *zscan) +{ + assert(zscan); + + zscan->vs = create_vert_shader(zscan); + if (!zscan->vs) + goto error_vs; + + zscan->fs = create_frag_shader(zscan); + if (!zscan->fs) + goto error_fs; + + return true; + +error_fs: + zscan->pipe->delete_vs_state(zscan->pipe, zscan->vs); + +error_vs: + return false; +} + +static void +cleanup_shaders(struct vl_zscan *zscan) +{ + assert(zscan); + + zscan->pipe->delete_vs_state(zscan->pipe, zscan->vs); + zscan->pipe->delete_fs_state(zscan->pipe, zscan->fs); +} + +static bool +init_state(struct vl_zscan *zscan) +{ + struct pipe_blend_state blend; + struct pipe_rasterizer_state rs_state; + struct pipe_sampler_state sampler; + unsigned i; + + assert(zscan); + + memset(&rs_state, 0, sizeof(rs_state)); + rs_state.gl_rasterization_rules = false; + zscan->rs_state = zscan->pipe->create_rasterizer_state(zscan->pipe, &rs_state); + if (!zscan->rs_state) + goto error_rs_state; + + memset(&blend, 0, sizeof blend); + + blend.independent_blend_enable = 0; + blend.rt[0].blend_enable = 0; + blend.rt[0].rgb_func = PIPE_BLEND_ADD; + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].alpha_func = PIPE_BLEND_ADD; + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE; + blend.logicop_enable = 0; + blend.logicop_func = PIPE_LOGICOP_CLEAR; + /* Needed to allow color writes to FB, even if blending disabled */ + blend.rt[0].colormask = PIPE_MASK_RGBA; + blend.dither = 0; + zscan->blend = zscan->pipe->create_blend_state(zscan->pipe, &blend); + if (!zscan->blend) + goto error_blend; + + for (i = 0; i < 3; ++i) { + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_REPEAT; + sampler.wrap_t = PIPE_TEX_WRAP_REPEAT; + sampler.wrap_r = PIPE_TEX_WRAP_REPEAT; + sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + zscan->samplers[i] = zscan->pipe->create_sampler_state(zscan->pipe, &sampler); + if (!zscan->samplers[i]) + goto error_samplers; + } + + return true; + +error_samplers: + for (i = 0; i < 2; ++i) + if (zscan->samplers[i]) + zscan->pipe->delete_sampler_state(zscan->pipe, zscan->samplers[i]); + + zscan->pipe->delete_rasterizer_state(zscan->pipe, zscan->rs_state); + +error_blend: + zscan->pipe->delete_blend_state(zscan->pipe, zscan->blend); + +error_rs_state: + return false; +} + +static void +cleanup_state(struct vl_zscan *zscan) +{ + unsigned i; + + assert(zscan); + + for (i = 0; i < 3; ++i) + zscan->pipe->delete_sampler_state(zscan->pipe, zscan->samplers[i]); + + zscan->pipe->delete_rasterizer_state(zscan->pipe, zscan->rs_state); + zscan->pipe->delete_blend_state(zscan->pipe, zscan->blend); +} + +struct pipe_sampler_view * +vl_zscan_linear(struct pipe_context *pipe, unsigned blocks_per_line) +{ + const unsigned total_size = blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT; + + struct pipe_resource res_tmpl, *res; + struct pipe_sampler_view sv_tmpl, *sv; + struct pipe_transfer *buf_transfer; + unsigned x, y, i, pitch; + float *f; + + struct pipe_box rect = + { + 0, 0, 0, + BLOCK_WIDTH * blocks_per_line, + BLOCK_HEIGHT, + 1 + }; + + assert(pipe && blocks_per_line); + + memset(&res_tmpl, 0, sizeof(res_tmpl)); + res_tmpl.target = PIPE_TEXTURE_2D; + res_tmpl.format = PIPE_FORMAT_R32_FLOAT; + res_tmpl.width0 = BLOCK_WIDTH * blocks_per_line; + res_tmpl.height0 = BLOCK_HEIGHT; + res_tmpl.depth0 = 1; + res_tmpl.array_size = 1; + res_tmpl.usage = PIPE_USAGE_IMMUTABLE; + res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW; + + res = pipe->screen->resource_create(pipe->screen, &res_tmpl); + if (!res) + goto error_resource; + + buf_transfer = pipe->get_transfer + ( + pipe, res, + 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, + &rect + ); + if (!buf_transfer) + goto error_transfer; + + pitch = buf_transfer->stride / sizeof(float); + + f = pipe->transfer_map(pipe, buf_transfer); + if (!f) + goto error_map; + + for (i = 0; i < blocks_per_line; ++i) + for (y = 0; y < BLOCK_HEIGHT; ++y) + for (x = 0; x < BLOCK_WIDTH; ++x) { + float addr = x + y * BLOCK_WIDTH + + i * BLOCK_WIDTH * BLOCK_HEIGHT; + + addr /= total_size; + + f[i * BLOCK_WIDTH + y * pitch + x] = addr; + } + + pipe->transfer_unmap(pipe, buf_transfer); + pipe->transfer_destroy(pipe, buf_transfer); + + memset(&sv_tmpl, 0, sizeof(sv_tmpl)); + u_sampler_view_default_template(&sv_tmpl, res, res->format); + sv = pipe->create_sampler_view(pipe, res, &sv_tmpl); + pipe_resource_reference(&res, NULL); + if (!sv) + goto error_map; + + return sv; + +error_map: + pipe->transfer_destroy(pipe, buf_transfer); + +error_transfer: + pipe_resource_reference(&res, NULL); + +error_resource: + return NULL; +} + +#if 0 +// TODO +struct pipe_sampler_view * +vl_zscan_normal(struct pipe_context *pipe, unsigned blocks_per_line); + +struct pipe_sampler_view * +vl_zscan_alternate(struct pipe_context *pipe, unsigned blocks_per_line); +#endif + +bool +vl_zscan_init(struct vl_zscan *zscan, struct pipe_context *pipe, + unsigned buffer_width, unsigned buffer_height, + unsigned blocks_per_line, unsigned blocks_total, + unsigned num_channels) +{ + assert(zscan && pipe); + + zscan->pipe = pipe; + zscan->buffer_width = buffer_width; + zscan->buffer_height = buffer_height; + zscan->num_channels = num_channels; + zscan->blocks_per_line = blocks_per_line; + zscan->blocks_total = blocks_total; + + if(!init_shaders(zscan)) + return false; + + if(!init_state(zscan)) { + cleanup_shaders(zscan); + return false; + } + + return true; +} + +void +vl_zscan_cleanup(struct vl_zscan *zscan) +{ + assert(zscan); + + cleanup_shaders(zscan); + cleanup_state(zscan); +} + +void +vl_zscan_set_layout(struct vl_zscan *zscan, struct pipe_sampler_view *layout) +{ + assert(zscan); + assert(layout); + + pipe_sampler_view_reference(&zscan->scan, layout); +} + +#if 0 +// TODO +void +vl_zscan_upload_quant(struct vl_zscan *zscan, ...); +#endif + +bool +vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer, + struct pipe_sampler_view *src, struct pipe_surface *dst) +{ + assert(zscan && buffer); + + memset(buffer, 0, sizeof(struct vl_zscan_buffer)); + + buffer->zscan = zscan; + + pipe_sampler_view_reference(&buffer->src, src); + pipe_sampler_view_reference(&buffer->scan, zscan->scan); + pipe_sampler_view_reference(&buffer->quant, zscan->quant); + + buffer->viewport.scale[0] = dst->width; + buffer->viewport.scale[1] = dst->height; + buffer->viewport.scale[2] = 1; + buffer->viewport.scale[3] = 1; + buffer->viewport.translate[0] = 0; + buffer->viewport.translate[1] = 0; + buffer->viewport.translate[2] = 0; + buffer->viewport.translate[3] = 0; + + buffer->fb_state.width = dst->width; + buffer->fb_state.height = dst->height; + buffer->fb_state.nr_cbufs = 1; + pipe_surface_reference(&buffer->fb_state.cbufs[0], dst); + + return true; +} + +void +vl_zscan_cleanup_buffer(struct vl_zscan_buffer *buffer) +{ + assert(buffer); + + pipe_sampler_view_reference(&buffer->src, NULL); + pipe_sampler_view_reference(&buffer->scan, NULL); + pipe_sampler_view_reference(&buffer->quant, NULL); + pipe_surface_reference(&buffer->fb_state.cbufs[0], NULL); +} + +void +vl_zscan_render(struct vl_zscan_buffer *buffer, unsigned num_instances) +{ + struct vl_zscan *zscan; + + assert(buffer); + + zscan = buffer->zscan; + + zscan->pipe->bind_rasterizer_state(zscan->pipe, zscan->rs_state); + zscan->pipe->bind_blend_state(zscan->pipe, zscan->blend); + zscan->pipe->bind_fragment_sampler_states(zscan->pipe, 2, zscan->samplers); + zscan->pipe->set_framebuffer_state(zscan->pipe, &buffer->fb_state); + zscan->pipe->set_viewport_state(zscan->pipe, &buffer->viewport); + zscan->pipe->set_fragment_sampler_views(zscan->pipe, 2, &buffer->src); + zscan->pipe->bind_vs_state(zscan->pipe, zscan->vs); + zscan->pipe->bind_fs_state(zscan->pipe, zscan->fs); + util_draw_arrays_instanced(zscan->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances); +} diff --git a/src/gallium/auxiliary/vl/vl_zscan.h b/src/gallium/auxiliary/vl/vl_zscan.h new file mode 100644 index 0000000000..28b990ca83 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_zscan.h @@ -0,0 +1,110 @@ +/************************************************************************** + * + * Copyright 2011 Christian König + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef vl_zscan_h +#define vl_zscan_h + +#include <pipe/p_compiler.h> +#include <pipe/p_state.h> + +/* + * shader based zscan and quantification + * expect usage of vl_vertex_buffers as a todo list + */ +struct vl_zscan +{ + struct pipe_context *pipe; + + unsigned buffer_width; + unsigned buffer_height; + + unsigned num_channels; + + unsigned blocks_per_line; + unsigned blocks_total; + + void *rs_state; + void *blend; + + void *samplers[3]; + + void *vs, *fs; + + struct pipe_sampler_view *scan; + struct pipe_sampler_view *quant; +}; + +struct vl_zscan_buffer +{ + struct vl_zscan *zscan; + + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state fb_state; + + struct pipe_sampler_view *src, *scan, *quant; + struct pipe_surface *dst; +}; + +struct pipe_sampler_view * +vl_zscan_linear(struct pipe_context *pipe, unsigned blocks_per_line); + +#if 0 +struct pipe_sampler_view * +vl_zscan_normal(struct pipe_context *pipe, unsigned blocks_per_line); + +struct pipe_sampler_view * +vl_zscan_alternate(struct pipe_context *pipe, unsigned blocks_per_line); +#endif + +bool +vl_zscan_init(struct vl_zscan *zscan, struct pipe_context *pipe, + unsigned buffer_width, unsigned buffer_height, + unsigned blocks_per_line, unsigned blocks_total, + unsigned num_channels); + +void +vl_zscan_set_layout(struct vl_zscan *zscan, struct pipe_sampler_view *layout); + +void +vl_zscan_cleanup(struct vl_zscan *zscan); + +#if 0 +void +vl_zscan_upload_quant(struct vl_zscan *zscan, ...); +#endif + +bool +vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer, + struct pipe_sampler_view *src, struct pipe_surface *dst); + +void +vl_zscan_cleanup_buffer(struct vl_zscan_buffer *buffer); + +void +vl_zscan_render(struct vl_zscan_buffer *buffer, unsigned num_instances); + +#endif |