diff options
author | Xiang, Haihao <haihao.xiang@intel.com> | 2014-10-08 08:48:53 +0800 |
---|---|---|
committer | Xiang, Haihao <haihao.xiang@intel.com> | 2014-10-08 09:01:38 +0800 |
commit | 2b002e286c31c9b0c09c1f22237bb222ac77b97c (patch) | |
tree | d8ef471847df59fed762960e87cf6feedc8eb736 | |
parent | 4568e24f0382d2d03215bcaf78eb0eae68f88ae5 (diff) | |
parent | f11176415ec26eb5960ba6841d2d9c22f2cabc60 (diff) |
Merge remote-tracking branch 'fdo/master' into g45-h264g45-h264
Some users still use this branch
Conflicts:
src/i965_avc_bsd.c
src/i965_avc_ildb.c
src/i965_drv_video.c
223 files changed, 47715 insertions, 4723 deletions
@@ -1,5 +1,65 @@ -libva-intel-driver NEWS -- summary of changes. 2013-06-26 -Copyright (C) 2009-2013 Intel Corporation +libva-intel-driver NEWS -- summary of changes. 2014-09-30 +Copyright (C) 2009-2014 Intel Corporation + +Version 1.4.0 - 30.Sep.2014 +* Add support for exporting VA buffer +* Add support for MVC decoding/encoding +* Add support for encoding quality level on Sandybride and newer +* Add support of inserting packed slice header & raw data for encoding +* Add support for Cherryview +* Fix the GPU hang issue on Ivybridge when using the gstreamer and mplayer to play back one H264 clip + (https://bugs.freedesktop.org/show_bug.cgi?id=80720) +* Fix the GPU hang issue on Sandybride and newer when playing back one H264 clip + (https://bugs.freedesktop.org/show_bug.cgi?id=76363) +* Fix the GPU hang issue on Haswell when using XBMC to play back one H264 clip + (https://bugs.freedesktop.org/show_bug.cgi?id=81447) + +Version 1.3.2 - 16.Jun.2014 +* Export JPEG format by vaDeriveImage() +* Add support for MADI on SNB +* H.264: fix the support for grayscale format (Y800) +* Fix vaGetConfigAttributes() to validate the profile/entrypoint pair +* Fix vaCreateConfig() to not override user chroma format +* Fix the scaling issue on IVB/HSW/BDW +* Fix https://bugs.freedesktop.org/show_bug.cgi?id=73424 +* Fix https://bugs.freedesktop.org/show_bug.cgi?id=72522 +* Fix https://bugs.freedesktop.org/show_bug.cgi?id=77041 +* Quality improvement for H.264 encoding on BDW + +Version 1.3.1 - 09.May.2014 +* Add support for STE on Broadwell +* Add support for YV16 +* Add support for user specified tiling and stride +* Fix VP8 decoding on Broadwell +* Fix the wrong alpha when convert NV12 into RGBA +* Fix https://bugs.freedesktop.org/show_bug.cgi?id=77386 + +Version 1.3.0 - 24.Mar.2014 +* Add support for Broadwell + - Decoding: H.264/MPEG-2/VC-1/JPEG/VP8 + - Encoding: H.264/MPEG-2 + - VPP: CSC/scaling/NoiseReduction/Deinterlacing{Bob, MotionAdaptive, MotionCompensated}/Sharpening/ColorBalance +* Fix the wrong setting in MI_BATCH_BATCH_START + +Version 1.2.2 - 16.Dec.2013 +* Motion compensation DI on HSW +* Optimization of FPS for H.264 encoding on HSW +* Add brightness/contrast/hue/saturation support for rendering. +* Support BT601/BT709/SMPTE240 in vaPutSurface() +* Expose Constrained Baseline Profile instead of Baseline Profile for H.264 +* Bug fixes + +Version 1.2.1 - 23.Sep.2013 +* Add PCI IDs for Bay Trail +* Performance improvement for MPEG-2 Encoding on IVB/HSW +* Add basic processing support for packed YUV to packed YUV on ILK+ +* Check the underlying OS support for VEBOX on HSW +* Quality improvement for BobDI on SNB/IVB +* Add support for Motion Adaptive Deinterlacing on IVB +* vaDeriveImage() works for UYVY formats +* Fix thread safety issue +* Fix GPU hang issue when decoding some videos on SNB +* Fix output filter count from QueryVideoProcFilters() Version 1.2.0 - 26.Jun.2013 * The new H.264 encoding API on SNB/IVB/HSW diff --git a/configure.ac b/configure.ac index e39f1d5..d2bbe47 100644 --- a/configure.ac +++ b/configure.ac @@ -1,6 +1,6 @@ # intel-driver package version number m4_define([intel_driver_major_version], [1]) -m4_define([intel_driver_minor_version], [2]) +m4_define([intel_driver_minor_version], [4]) m4_define([intel_driver_micro_version], [1]) m4_define([intel_driver_pre_version], [1]) m4_define([intel_driver_version], @@ -10,8 +10,8 @@ m4_append([intel_driver_version], intel_driver_pre_version, [.pre]) ]) # libva minimum version requirement -m4_define([va_api_version], [0.34]) -m4_define([libva_package_version], [1.2.0]) +m4_define([va_api_version], [0.36]) +m4_define([libva_package_version], [1.4.0]) # libdrm minimum version requirement m4_define([libdrm_version], [2.4.45]) @@ -76,10 +76,15 @@ PKG_CHECK_MODULES([DRM], [libdrm >= $LIBDRM_VERSION]) AC_SUBST(LIBDRM_VERSION) dnl Check for gen4asm -PKG_CHECK_MODULES(GEN4ASM, [intel-gen4asm >= 1.3], [gen4asm=yes], [gen4asm=no]) +PKG_CHECK_MODULES(GEN4ASM, [intel-gen4asm >= 1.5], [gen4asm=yes], [gen4asm=no]) AM_CONDITIONAL(HAVE_GEN4ASM, test x$gen4asm = xyes) AC_PATH_PROG([GEN4ASM], [intel-gen4asm]) +dnl Check for git +AC_ARG_VAR([GIT], [Path to git program, if any]) +AC_PATH_PROG([GIT], [git]) +AM_CONDITIONAL([HAVE_GIT], [test -n "$GIT"]) + dnl Check for VA-API PKG_CHECK_MODULES(LIBVA_DEPS, [libva >= va_api_version]) @@ -178,6 +183,8 @@ AC_OUTPUT([ src/shaders/post_processing/Makefile src/shaders/post_processing/gen5_6/Makefile src/shaders/post_processing/gen7/Makefile + src/shaders/post_processing/gen75/Makefile + src/shaders/post_processing/gen8/Makefile src/shaders/render/Makefile src/shaders/utils/Makefile src/shaders/vme/Makefile diff --git a/src/Makefile.am b/src/Makefile.am index 3299733..acfa849 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -20,7 +20,10 @@ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -SUBDIRS = shaders +SUBDIRS = shaders +DIST_SUBDIRS = $(SUBDIRS) +EXTRA_DIST = +BUILT_SOURCES = AM_CPPFLAGS = \ -DPTHREADS \ @@ -56,6 +59,9 @@ source_c = \ gen7_mfd.c \ gen75_mfd.c \ gen75_mfc.c \ + gen8_mfc.c \ + gen8_mfd.c \ + gen8_vme.c \ gen75_picture_process.c \ gen75_vme.c \ gen75_vpp_gpe.c \ @@ -64,6 +70,7 @@ source_c = \ i965_avc_hw_scoreboard.c\ i965_avc_ildb.c \ i965_decoder_utils.c \ + i965_device_info.c \ i965_drv_video.c \ i965_encoder.c \ i965_encoder_utils.c \ @@ -72,7 +79,9 @@ source_c = \ i965_media_mpeg2.c \ i965_gpe_utils.c \ i965_post_processing.c \ + gen8_post_processing.c \ i965_render.c \ + gen8_render.c \ intel_batchbuffer.c \ intel_batchbuffer_dump.c\ intel_driver.c \ @@ -104,6 +113,7 @@ source_h = \ i965_media_mpeg2.h \ i965_mutext.h \ i965_gpe_utils.h \ + i965_pciids.h \ i965_post_processing.h \ i965_render.h \ i965_structs.h \ @@ -113,9 +123,11 @@ source_h = \ intel_driver.h \ intel_media.h \ intel_memman.h \ + intel_version.h \ object_heap.h \ sysdeps.h \ va_backend_compat.h \ + i965_fourcc.h \ $(NULL) i965_drv_video_la_LTLIBRARIES = i965_drv_video.la @@ -137,11 +149,43 @@ source_h += i965_output_wayland.h driver_cflags += $(WAYLAND_CFLAGS) endif +# git version +VERSION_FILE = .VERSION +OLD_VERSION_FILE = $(VERSION_FILE).old +NEW_VERSION_FILE = $(VERSION_FILE).new +PKG_VERSION_FILE = $(VERSION_FILE).pkg + +intel_version.h: gen-version + $(AM_V_GEN) \ + OV=`[ -f $(OLD_VERSION_FILE) ] && cat $(OLD_VERSION_FILE) || :`; \ + NV=`cat $(NEW_VERSION_FILE)`; \ + if [ "$$OV" != "$$NV" -o ! -f intel_version.h ]; then \ + cp -f $(NEW_VERSION_FILE) $(OLD_VERSION_FILE); \ + $(SED) -e "s|\@INTEL_DRIVER_GIT_VERSION\@|$${NV}|" \ + $(srcdir)/intel_version.h.in > intel_version.h; \ + fi + +gen-version: + @echo $(VERSION) > $(NEW_VERSION_FILE) +if HAVE_GIT + @[ -d $(top_srcdir)/.git ] && \ + (cd $(top_srcdir) && $(GIT) describe --tags) > $(NEW_VERSION_FILE) || : +endif + @[ -f $(srcdir)/$(PKG_VERSION_FILE) ] && \ + cp -f $(srcdir)/$(PKG_VERSION_FILE) $(NEW_VERSION_FILE) || : + +$(PKG_VERSION_FILE): $(NEW_VERSION_FILE) + @cp -f $< $@ + +BUILT_SOURCES += intel_version.h +EXTRA_DIST += intel_version.h.in $(PKG_VERSION_FILE) + # Wayland protocol +protocol_source_h = wayland-drm-client-protocol.h i965_output_wayland.c: $(protocol_source_h) @wayland_scanner_rules@ -DIST_SUBDIRS = $(SUBDIRS) wayland +DIST_SUBDIRS += wayland # Extra clean files so that maintainer-clean removes *everything* MAINTAINERCLEANFILES = Makefile.in config.h.in diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c index 883a42b..455721f 100644 --- a/src/gen6_mfc.c +++ b/src/gen6_mfc.c @@ -42,6 +42,10 @@ #include "gen6_vme.h" #include "intel_media.h" +#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7) +#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) +#define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index) + static const uint32_t gen6_mfc_batchbuffer_avc_intra[][4] = { #include "shaders/utils/mfc_batchbuffer_avc_intra.g6b" }; @@ -360,16 +364,29 @@ gen6_mfc_avc_slice_state(VADriverContextP ctx, int weighted_pred_idc = 0; unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom; unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom; - int bslice = 0; + int num_ref_l0 = 0, num_ref_l1 = 0; if (batch == NULL) batch = encoder_context->base.batch; - if (slice_type == SLICE_TYPE_P) { + if (slice_type == SLICE_TYPE_I) { + luma_log2_weight_denom = 0; + chroma_log2_weight_denom = 0; + } else if (slice_type == SLICE_TYPE_P) { weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag; + num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1; + + if (slice_param->num_ref_idx_active_override_flag) + num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1; } else if (slice_type == SLICE_TYPE_B) { weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc; - bslice = 1; + num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1; + num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1; + + if (slice_param->num_ref_idx_active_override_flag) { + num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1; + num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1; + } if (weighted_pred_idc == 2) { /* 8.4.3 - Derivation process for prediction weights (8-279) */ @@ -394,14 +411,11 @@ gen6_mfc_avc_slice_state(VADriverContextP ctx, OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) ); OUT_BCS_BATCH(batch, slice_type); /*Slice Type: I:P:B Slice*/ - if (slice_type == SLICE_TYPE_I) { - OUT_BCS_BATCH(batch, 0); /*no reference frames and pred_weight_table*/ - } else { - OUT_BCS_BATCH(batch, - (1 << 16) | (bslice << 24) | /*1 reference frame*/ - (chroma_log2_weight_denom << 8) | - (luma_log2_weight_denom << 0)); - } + OUT_BCS_BATCH(batch, + (num_ref_l0 << 16) | + (num_ref_l1 << 24) | + (chroma_log2_weight_denom << 8) | + (luma_log2_weight_denom << 0)); OUT_BCS_BATCH(batch, (weighted_pred_idc << 30) | @@ -516,9 +530,25 @@ gen6_mfc_init(VADriverContextP ctx, struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; dri_bo *bo; int i; - VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; - int width_in_mbs = pSequenceParameter->picture_width_in_mbs; - int height_in_mbs = pSequenceParameter->picture_height_in_mbs; + int width_in_mbs = 0; + int height_in_mbs = 0; + int slice_batchbuffer_size; + + if (encoder_context->codec == CODEC_H264) { + VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + width_in_mbs = pSequenceParameter->picture_width_in_mbs; + height_in_mbs = pSequenceParameter->picture_height_in_mbs; + } else { + VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; + + assert(encoder_context->codec == CODEC_MPEG2); + + width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16; + height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16; + } + + slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 + + (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext; /*Encode common setup for MFC*/ dri_bo_unreference(mfc_context->post_deblocking_output.bo); @@ -586,7 +616,8 @@ gen6_mfc_init(VADriverContextP ctx, if (mfc_context->aux_batchbuffer) intel_batchbuffer_free(mfc_context->aux_batchbuffer); - mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, 0); + mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, + slice_batchbuffer_size); mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer; dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo); mfc_context->aux_batchbuffer_surface.pitch = 16; @@ -597,8 +628,8 @@ gen6_mfc_init(VADriverContextP ctx, } static void gen6_mfc_avc_pipeline_picture_programing( VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -696,6 +727,7 @@ gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, in unsigned char target_mb_size,unsigned char max_mb_size, int slice_type, struct intel_batchbuffer *batch) { + struct gen6_vme_context *vme_context = encoder_context->vme_context; int len_in_dwords = 11; if (batch == NULL) @@ -725,8 +757,8 @@ gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, in /*Stuff for Inter MB*/ OUT_BCS_BATCH(batch, msg[1]); - OUT_BCS_BATCH(batch, 0x0); - OUT_BCS_BATCH(batch, 0x0); + OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]); + OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]); /*MaxSizeInWord and TargetSzieInWord*/ OUT_BCS_BATCH(batch, (max_mb_size << 24) | @@ -756,15 +788,18 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, int i,x,y; int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta; unsigned int rate_control_mode = encoder_context->rate_control_mode; - unsigned char *slice_header = NULL; - int slice_header_length_in_bits = 0; unsigned int tail_data[] = { 0x0, 0x0 }; int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); int is_intra = slice_type == SLICE_TYPE_I; + int qp_slice; + qp_slice = qp; if (rate_control_mode == VA_RC_CBR) { qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; - pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; + if (encode_state->slice_header_index[slice_index] == 0) { + pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; + qp_slice = qp; + } } /* only support for 8-bit pixel bit-depth */ @@ -777,18 +812,12 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, pPicParameter, pSliceParameter, encode_state, encoder_context, - (rate_control_mode == VA_RC_CBR), qp, slice_batch); + (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch); if ( slice_index == 0) intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); - slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header); - - // slice hander - mfc_context->insert_object(ctx, encoder_context, - (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f, - 5, /* first 5 bytes are start code + nal unit type */ - 1, 0, 1, slice_batch); + intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch); dri_bo_map(vme_context->vme_output.bo , 1); msg = (unsigned int *)vme_context->vme_output.bo->virtual; @@ -835,7 +864,6 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, 1, 1, 1, 0, slice_batch); } - free(slice_header); } @@ -845,10 +873,14 @@ gen6_mfc_avc_software_batchbuffer(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { struct i965_driver_data *i965 = i965_driver_data(ctx); - struct intel_batchbuffer *batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, 0); - dri_bo *batch_bo = batch->buffer; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + struct intel_batchbuffer *batch;; + dri_bo *batch_bo; int i; + batch = mfc_context->aux_batchbuffer; + batch_bo = batch->buffer; + for (i = 0; i < encode_state->num_slice_params_ext; i++) { gen6_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch); } @@ -861,7 +893,9 @@ gen6_mfc_avc_software_batchbuffer(VADriverContextP ctx, ADVANCE_BCS_BATCH(batch); dri_bo_reference(batch_bo); + intel_batchbuffer_free(batch); + mfc_context->aux_batchbuffer = NULL; return batch_bo; } @@ -992,11 +1026,12 @@ gen6_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch, int mb_x, int mb_y, int width_in_mbs, - int qp) + int qp, + unsigned int ref_index[2]) { - BEGIN_BATCH(batch, 12); + BEGIN_BATCH(batch, 14); - OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2)); + OUT_BATCH(batch, CMD_MEDIA_OBJECT | (14 - 2)); OUT_BATCH(batch, index); OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); @@ -1020,6 +1055,8 @@ gen6_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch, OUT_BATCH(batch, qp << 16 | width_in_mbs); + OUT_BATCH(batch, ref_index[0]); + OUT_BATCH(batch, ref_index[1]); ADVANCE_BATCH(batch); } @@ -1037,6 +1074,7 @@ gen6_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx, { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + struct gen6_vme_context *vme_context = encoder_context->vme_context; int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; int total_mbs = slice_param->num_macroblocks; int number_mb_cmds = 128; @@ -1068,7 +1106,8 @@ gen6_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx, mb_x, mb_y, width_in_mbs, - qp); + qp, + vme_context->ref_index_in_mb); if (first_object) { head_offset += head_size; @@ -1106,7 +1145,8 @@ gen6_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx, mb_x, mb_y, width_in_mbs, - qp); + qp, + vme_context->ref_index_in_mb); } } @@ -1130,17 +1170,21 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx, int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs); int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta; unsigned int rate_control_mode = encoder_context->rate_control_mode; - unsigned char *slice_header = NULL; - int slice_header_length_in_bits = 0; unsigned int tail_data[] = { 0x0, 0x0 }; long head_offset; int old_used = intel_batchbuffer_used_size(slice_batch), used; unsigned short head_size, tail_size; int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); + int qp_slice; + qp_slice = qp; if (rate_control_mode == VA_RC_CBR) { qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; - pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; + if (encode_state->slice_header_index[slice_index] == 0) { + pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; + /* Use the adjusted qp when slice_header is generated by driver */ + qp_slice = qp; + } } /* only support for 8-bit pixel bit-depth */ @@ -1156,26 +1200,13 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx, encode_state, encoder_context, (rate_control_mode == VA_RC_CBR), - qp, + qp_slice, slice_batch); if (slice_index == 0) intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); - slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header); - - // slice hander - mfc_context->insert_object(ctx, - encoder_context, - (unsigned int *)slice_header, - ALIGN(slice_header_length_in_bits, 32) >> 5, - slice_header_length_in_bits & 0x1f, - 5, /* first 5 bytes are start code + nal unit type */ - 1, - 0, - 1, - slice_batch); - free(slice_header); + intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch); intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */ used = intel_batchbuffer_used_size(slice_batch); @@ -1360,7 +1391,7 @@ gen6_mfc_pipeline(VADriverContextP ctx, VAStatus vaStatus; switch (profile) { - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: vaStatus = gen6_mfc_avc_encode_picture(ctx, encode_state, encoder_context); @@ -1436,6 +1467,9 @@ Bool gen6_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *e { struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context)); + if (!mfc_context) + return False; + mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS; diff --git a/src/gen6_mfc.h b/src/gen6_mfc.h index e6b04a1..67c62a4 100644 --- a/src/gen6_mfc.h +++ b/src/gen6_mfc.h @@ -42,19 +42,14 @@ struct encode_state; #define INTRA_MB_FLAG_MASK 0x00002000 -#define __SOFTWARE__ 0 - -#define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32) -#define SURFACE_STATE_PADDED_SIZE_GEN7 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7) +/* The space required for slice header SLICE_STATE + header. + * Is it enough? */ +#define SLICE_HEADER 80 -#define SURFACE_STATE_PADDED_SIZE_0_GEN6 ALIGN(sizeof(struct i965_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE_1_GEN6 ALIGN(sizeof(struct i965_surface_state2), 32) -#define SURFACE_STATE_PADDED_SIZE_GEN6 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN6, SURFACE_STATE_PADDED_SIZE_1_GEN6) +/* the space required for slice tail. */ +#define SLICE_TAIL 16 -#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7) -#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) -#define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index) +#define __SOFTWARE__ 0 #define MFC_BATCHBUFFER_AVC_INTRA 0 #define MFC_BATCHBUFFER_AVC_INTER 1 @@ -164,6 +159,12 @@ struct gen6_mfc_context int target_frame_size[3]; // I,P,B double bits_per_frame; double qpf_rounding_accumulator; + + double saved_bps; + double saved_fps; + int saved_intra_period; + int saved_ip_period; + int saved_idr_period; } brc; struct { @@ -200,10 +201,10 @@ struct gen6_mfc_context void (*set_surface_state)(VADriverContextP ctx, struct intel_encoder_context *encoder_context); void (*ind_obj_base_addr_state)(VADriverContextP ctx, - struct intel_encoder_context *encoder_context); + struct intel_encoder_context *encoder_context); void (*avc_img_state)(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context); + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context); void (*avc_qm_state)(VADriverContextP ctx, struct intel_encoder_context *encoder_context); void (*avc_fqm_state)(VADriverContextP ctx, @@ -234,38 +235,47 @@ Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context * extern int intel_mfc_update_hrd(struct encode_state *encode_state, - struct gen6_mfc_context *mfc_context, - int frame_bits); + struct gen6_mfc_context *mfc_context, + int frame_bits); extern int intel_mfc_brc_postpack(struct encode_state *encode_state, - struct gen6_mfc_context *mfc_context, - int frame_bits); + struct gen6_mfc_context *mfc_context, + int frame_bits); extern void intel_mfc_hrd_context_update(struct encode_state *encode_state, - struct gen6_mfc_context *mfc_context); + struct gen6_mfc_context *mfc_context); extern int intel_mfc_interlace_check(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context); + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context); extern void intel_mfc_brc_prepare(struct encode_state *encode_state, - struct intel_encoder_context *encoder_context); + struct intel_encoder_context *encoder_context); extern void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - struct intel_batchbuffer *slice_batch); + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + struct intel_batchbuffer *slice_batch); extern VAStatus intel_mfc_avc_prepare(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context); + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context); extern int intel_avc_enc_slice_type_fixup(int type); - extern void intel_mfc_avc_ref_idx_state(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context); + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context); + +extern +Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context); + +extern void +intel_avc_slice_insert_packed_data(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int slice_index, + struct intel_batchbuffer *slice_batch); #endif /* _GEN6_MFC_BCS_H_ */ diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index ab91c86..77c46dd 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -43,10 +43,10 @@ #include "gen6_vme.h" #include "intel_media.h" -#define BRC_CLIP(x, min, max) \ -{ \ - x = ((x > (max)) ? (max) : ((x < (min)) ? (min) : x)); \ -} +#define BRC_CLIP(x, min, max) \ + { \ + x = ((x > (max)) ? (max) : ((x < (min)) ? (min) : x)); \ + } #define BRC_P_B_QP_DIFF 4 #define BRC_I_P_QP_DIFF 2 @@ -86,11 +86,11 @@ int intel_avc_enc_slice_type_fixup(int slice_type) static void intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state, - struct gen6_mfc_context *mfc_context) + struct gen6_mfc_context *mfc_context) { VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; - int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; - int height_in_mbs = (mfc_context->surface_state.height + 15) / 16; + int width_in_mbs = pSequenceParameter->picture_width_in_mbs; + int height_in_mbs = pSequenceParameter->picture_height_in_mbs; float fps = pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ; int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / (fps+4.0) / width_in_mbs / height_in_mbs; int intra_mb_size = inter_mb_size * 5.0; @@ -130,7 +130,7 @@ intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state, } static void intel_mfc_brc_init(struct encode_state *encode_state, - struct intel_encoder_context* encoder_context) + struct intel_encoder_context* encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; @@ -166,7 +166,7 @@ static void intel_mfc_brc_init(struct encode_state *encode_state, mfc_context->hrd.buffer_size = (double)pParameterHRD->buffer_size; mfc_context->hrd.current_buffer_fullness = (double)(pParameterHRD->initial_buffer_fullness < mfc_context->hrd.buffer_size)? - pParameterHRD->initial_buffer_fullness: mfc_context->hrd.buffer_size/2.; + pParameterHRD->initial_buffer_fullness: mfc_context->hrd.buffer_size/2.; mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.; mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/qp1_size; mfc_context->hrd.violation_noted = 0; @@ -188,8 +188,8 @@ static void intel_mfc_brc_init(struct encode_state *encode_state, } int intel_mfc_update_hrd(struct encode_state *encode_state, - struct gen6_mfc_context *mfc_context, - int frame_bits) + struct gen6_mfc_context *mfc_context, + int frame_bits) { double prev_bf = mfc_context->hrd.current_buffer_fullness; @@ -213,8 +213,8 @@ int intel_mfc_update_hrd(struct encode_state *encode_state, } int intel_mfc_brc_postpack(struct encode_state *encode_state, - struct gen6_mfc_context *mfc_context, - int frame_bits) + struct gen6_mfc_context *mfc_context, + int frame_bits) { gen6_brc_status sts = BRC_NO_HRD_VIOLATION; VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; @@ -243,7 +243,7 @@ int intel_mfc_brc_postpack(struct encode_state *encode_state, frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype]; if (frame_size_alpha > 30) frame_size_alpha = 30; frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) / - (double)(frame_size_alpha + 1.); + (double)(frame_size_alpha + 1.); /* frame_size_next: avoiding negative number and too small value */ if ((double)frame_size_next < (double)(target_frame_size * 0.25)) @@ -333,7 +333,7 @@ int intel_mfc_brc_postpack(struct encode_state *encode_state, } static void intel_mfc_hrd_context_init(struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; @@ -357,14 +357,14 @@ static void intel_mfc_hrd_context_init(struct encode_state *encode_state, void intel_mfc_hrd_context_update(struct encode_state *encode_state, - struct gen6_mfc_context *mfc_context) + struct gen6_mfc_context *mfc_context) { mfc_context->vui_hrd.i_frame_number++; } int intel_mfc_interlace_check(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; VAEncSliceParameterBufferH264 *pSliceParameter; @@ -384,33 +384,136 @@ int intel_mfc_interlace_check(VADriverContextP ctx, return 1; } +/* + * Check whether the parameters related with CBR are updated and decide whether + * it needs to reinitialize the configuration related with CBR. + * Currently it will check the following parameters: + * bits_per_second + * frame_rate + * gop_configuration(intra_period, ip_period, intra_idr_period) + */ +static bool intel_mfc_brc_updated_check(struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + unsigned int rate_control_mode = encoder_context->rate_control_mode; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + double cur_fps, cur_bitrate; + VAEncSequenceParameterBufferH264 *pSequenceParameter; + + + if (rate_control_mode != VA_RC_CBR) { + return false; + } + + pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + + cur_bitrate = pSequenceParameter->bits_per_second; + cur_fps = (double)pSequenceParameter->time_scale / + (2 * (double)pSequenceParameter->num_units_in_tick); + + if ((cur_bitrate == mfc_context->brc.saved_bps) && + (cur_fps == mfc_context->brc.saved_fps) && + (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period) && + (pSequenceParameter->intra_idr_period == mfc_context->brc.saved_idr_period) && + (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period)) { + /* the parameters related with CBR are not updaetd */ + return false; + } + + mfc_context->brc.saved_ip_period = pSequenceParameter->ip_period; + mfc_context->brc.saved_intra_period = pSequenceParameter->intra_period; + mfc_context->brc.saved_idr_period = pSequenceParameter->intra_idr_period; + mfc_context->brc.saved_fps = cur_fps; + mfc_context->brc.saved_bps = cur_bitrate; + return true; +} + void intel_mfc_brc_prepare(struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct intel_encoder_context *encoder_context) { unsigned int rate_control_mode = encoder_context->rate_control_mode; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; if (rate_control_mode == VA_RC_CBR) { + bool brc_updated; + assert(encoder_context->codec != CODEC_MPEG2); + + brc_updated = intel_mfc_brc_updated_check(encode_state, encoder_context); + /*Programing bit rate control */ - if ( mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord == 0 ) { + if ((mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord == 0) || + brc_updated) { intel_mfc_bit_rate_control_context_init(encode_state, mfc_context); intel_mfc_brc_init(encode_state, encoder_context); } /*Programing HRD control */ - if ( mfc_context->vui_hrd.i_cpb_size_value == 0 ) + if ((mfc_context->vui_hrd.i_cpb_size_value == 0) || brc_updated ) intel_mfc_hrd_context_init(encode_state, encoder_context); } } +static int intel_avc_find_skipemulcnt(unsigned char *buf, int bits_length) +{ + int i, found; + int leading_zero_cnt, byte_length, zero_byte; + int nal_unit_type; + int skip_cnt = 0; + +#define NAL_UNIT_TYPE_MASK 0x1f +#define HW_MAX_SKIP_LENGTH 15 + + byte_length = ALIGN(bits_length, 32) >> 3; + + + leading_zero_cnt = 0; + found = 0; + for(i = 0; i < byte_length - 4; i++) { + if (((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)) || + ((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 0) && (buf[i + 3] == 1))) { + found = 1; + break; + } + leading_zero_cnt++; + } + if (!found) { + /* warning message is complained. But anyway it will be inserted. */ + WARN_ONCE("Invalid packed header data. " + "Can't find the 000001 start_prefix code\n"); + return 0; + } + i = leading_zero_cnt; + + zero_byte = 0; + if (!((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1))) + zero_byte = 1; + + skip_cnt = leading_zero_cnt + zero_byte + 3; + + /* the unit header byte is accounted */ + nal_unit_type = (buf[skip_cnt]) & NAL_UNIT_TYPE_MASK; + skip_cnt += 1; + + if (nal_unit_type == 14 || nal_unit_type == 20 || nal_unit_type == 21) { + /* more unit header bytes are accounted for MVC/SVC */ + skip_cnt += 3; + } + if (skip_cnt > HW_MAX_SKIP_LENGTH) { + WARN_ONCE("Too many leading zeros are padded for packed data. " + "It is beyond the HW range.!!!\n"); + } + return skip_cnt; +} + void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - struct intel_batchbuffer *slice_batch) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + struct intel_batchbuffer *slice_batch) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS); unsigned int rate_control_mode = encoder_context->rate_control_mode; + unsigned int skip_emul_byte_cnt; if (encode_state->packed_header_data[idx]) { VAEncPackedHeaderParameterBuffer *param = NULL; @@ -421,12 +524,13 @@ void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx, param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer; length_in_bits = param->bit_length; + skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits); mfc_context->insert_object(ctx, encoder_context, header_data, ALIGN(length_in_bits, 32) >> 5, length_in_bits & 0x1f, - 5, /* FIXME: check it */ + skip_emul_byte_cnt, 0, 0, !param->has_emulation_bytes, @@ -444,12 +548,14 @@ void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx, param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer; length_in_bits = param->bit_length; + skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits); + mfc_context->insert_object(ctx, encoder_context, header_data, ALIGN(length_in_bits, 32) >> 5, length_in_bits & 0x1f, - 5, /* FIXME: check it */ + skip_emul_byte_cnt, 0, 0, !param->has_emulation_bytes, @@ -467,12 +573,13 @@ void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx, param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer; length_in_bits = param->bit_length; + skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits); mfc_context->insert_object(ctx, encoder_context, header_data, ALIGN(length_in_bits, 32) >> 5, length_in_bits & 0x1f, - 5, /* FIXME: check it */ + skip_emul_byte_cnt, 0, 0, !param->has_emulation_bytes, @@ -484,13 +591,13 @@ void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx, unsigned char *sei_data = NULL; int length_in_bits = build_avc_sei_buffer_timing( - mfc_context->vui_hrd.i_initial_cpb_removal_delay_length, - mfc_context->vui_hrd.i_initial_cpb_removal_delay, - 0, - mfc_context->vui_hrd.i_cpb_removal_delay_length, mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number, - mfc_context->vui_hrd.i_dpb_output_delay_length, - 0, - &sei_data); + mfc_context->vui_hrd.i_initial_cpb_removal_delay_length, + mfc_context->vui_hrd.i_initial_cpb_removal_delay, + 0, + mfc_context->vui_hrd.i_cpb_removal_delay_length, mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number, + mfc_context->vui_hrd.i_dpb_output_delay_length, + 0, + &sei_data); mfc_context->insert_object(ctx, encoder_context, (unsigned int *)sei_data, @@ -506,8 +613,8 @@ void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx, } VAStatus intel_mfc_avc_prepare(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -523,7 +630,7 @@ VAStatus intel_mfc_avc_prepare(VADriverContextP ctx, int width_in_mbs = pSequenceParameter->picture_width_in_mbs; int height_in_mbs = pSequenceParameter->picture_height_in_mbs; - if (IS_GEN6(i965->intel.device_id)) { + if (IS_GEN6(i965->intel.device_info)) { /* On the SNB it should be fixed to 128 for the DMV buffer */ width_in_mbs = 128; } @@ -552,7 +659,7 @@ VAStatus intel_mfc_avc_prepare(VADriverContextP ctx, /* Setup current frame and current direct mv buffer*/ obj_surface = encode_state->reconstructed_object; - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); if ( obj_surface->private_data == NULL) { gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1); @@ -642,7 +749,7 @@ VAStatus intel_mfc_avc_prepare(VADriverContextP ctx, dri_bo_map(bo, 1); coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual; coded_buffer_segment->mapped = 0; - coded_buffer_segment->codec = CODED_H264; + coded_buffer_segment->codec = encoder_context->codec; dri_bo_unmap(bo); return vaStatus; @@ -662,44 +769,44 @@ VAStatus intel_mfc_avc_prepare(VADriverContextP ctx, */ int intel_format_lutvalue(int value, int max) { - int ret; - int logvalue, temp1, temp2; + int ret; + int logvalue, temp1, temp2; - if (value <= 0) - return 0; + if (value <= 0) + return 0; - logvalue = (int)(log2f((float)value)); - if (logvalue < 4) { - ret = value; - } else { - int error, temp_value, base, j, temp_err; - error = value; - j = logvalue - 4 + 1; - ret = -1; - for(; j <= logvalue; j++) { - if (j == 0) { - base = value >> j; - } else { - base = (value + (1 << (j - 1)) - 1) >> j; - } - if (base >= 16) - continue; - - temp_value = base << j; - temp_err = abs(value - temp_value); - if (temp_err < error) { - error = temp_err; - ret = (j << 4) | base; - if (temp_err == 0) - break; - } - } - } - temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4); - temp2 = (max & 0xf) << ((max & 0xf0) >> 4); - if (temp1 > temp2) - ret = max; - return ret; + logvalue = (int)(log2f((float)value)); + if (logvalue < 4) { + ret = value; + } else { + int error, temp_value, base, j, temp_err; + error = value; + j = logvalue - 4 + 1; + ret = -1; + for(; j <= logvalue; j++) { + if (j == 0) { + base = value >> j; + } else { + base = (value + (1 << (j - 1)) - 1) >> j; + } + if (base >= 16) + continue; + + temp_value = base << j; + temp_err = abs(value - temp_value); + if (temp_err < error) { + error = temp_err; + ret = (j << 4) | base; + if (temp_err == 0) + break; + } + } + } + temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4); + temp2 = (max & 0xf) << ((max & 0xf0) >> 4); + if (temp1 > temp2) + ret = max; + return ret; } @@ -709,19 +816,19 @@ int intel_format_lutvalue(int value, int max) static float intel_lambda_qp(int qp) { - float value, lambdaf; - value = qp; - value = value / 6 - 2; - if (value < 0) - value = 0; - lambdaf = roundf(powf(2, value)); - return lambdaf; + float value, lambdaf; + value = qp; + value = value / 6 - 2; + if (value < 0) + value = 0; + lambdaf = roundf(powf(2, value)); + return lambdaf; } void intel_vme_update_mbmv_cost(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct gen6_vme_context *vme_context = encoder_context->vme_context; @@ -756,30 +863,30 @@ void intel_vme_update_mbmv_cost(VADriverContextP ctx, m_cost = 0; vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f); for (j = 1; j < 3; j++) { - m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda; - m_cost = (int)m_costf; - vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f); + m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda; + m_cost = (int)m_costf; + vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f); } mv_count = 3; for (j = 4; j <= 64; j *= 2) { - m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda; - m_cost = (int)m_costf; - vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f); - mv_count++; + m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda; + m_cost = (int)m_costf; + vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f); + mv_count++; } if (qp <= 25) { - vme_state_message[MODE_INTRA_16X16] = 0x4a; - vme_state_message[MODE_INTRA_8X8] = 0x4a; - vme_state_message[MODE_INTRA_4X4] = 0x4a; - vme_state_message[MODE_INTRA_NONPRED] = 0x4a; - vme_state_message[MODE_INTER_16X16] = 0x4a; - vme_state_message[MODE_INTER_16X8] = 0x4a; - vme_state_message[MODE_INTER_8X8] = 0x4a; - vme_state_message[MODE_INTER_8X4] = 0x4a; - vme_state_message[MODE_INTER_4X4] = 0x4a; - vme_state_message[MODE_INTER_BWD] = 0x2a; - return; + vme_state_message[MODE_INTRA_16X16] = 0x4a; + vme_state_message[MODE_INTRA_8X8] = 0x4a; + vme_state_message[MODE_INTRA_4X4] = 0x4a; + vme_state_message[MODE_INTRA_NONPRED] = 0x4a; + vme_state_message[MODE_INTER_16X16] = 0x4a; + vme_state_message[MODE_INTER_16X8] = 0x4a; + vme_state_message[MODE_INTER_8X8] = 0x4a; + vme_state_message[MODE_INTER_8X4] = 0x4a; + vme_state_message[MODE_INTER_4X4] = 0x4a; + vme_state_message[MODE_INTER_BWD] = 0x2a; + return; } m_costf = lambda * 10; vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f); @@ -791,42 +898,42 @@ void intel_vme_update_mbmv_cost(VADriverContextP ctx, m_cost = m_costf; vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f); if (slice_type == SLICE_TYPE_P) { - m_costf = lambda * 2.5; - m_cost = m_costf; - vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f); - m_costf = lambda * 4; - m_cost = m_costf; - vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f); - m_costf = lambda * 1.5; - m_cost = m_costf; - vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f); - m_costf = lambda * 3; - m_cost = m_costf; - vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f); - m_costf = lambda * 5; - m_cost = m_costf; - vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f); - /* BWD is not used in P-frame */ - vme_state_message[MODE_INTER_BWD] = 0; + m_costf = lambda * 2.5; + m_cost = m_costf; + vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f); + m_costf = lambda * 4; + m_cost = m_costf; + vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f); + m_costf = lambda * 1.5; + m_cost = m_costf; + vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f); + m_costf = lambda * 3; + m_cost = m_costf; + vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f); + m_costf = lambda * 5; + m_cost = m_costf; + vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f); + /* BWD is not used in P-frame */ + vme_state_message[MODE_INTER_BWD] = 0; } else { - m_costf = lambda * 2.5; - m_cost = m_costf; - vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f); - m_costf = lambda * 5.5; - m_cost = m_costf; - vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f); - m_costf = lambda * 3.5; - m_cost = m_costf; - vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f); - m_costf = lambda * 5.0; - m_cost = m_costf; - vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f); - m_costf = lambda * 6.5; - m_cost = m_costf; - vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f); - m_costf = lambda * 1.5; - m_cost = m_costf; - vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f); + m_costf = lambda * 2.5; + m_cost = m_costf; + vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f); + m_costf = lambda * 5.5; + m_cost = m_costf; + vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f); + m_costf = lambda * 3.5; + m_cost = m_costf; + vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f); + m_costf = lambda * 5.0; + m_cost = m_costf; + vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f); + m_costf = lambda * 6.5; + m_cost = m_costf; + vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f); + m_costf = lambda * 1.5; + m_cost = m_costf; + vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f); } } } @@ -841,8 +948,8 @@ gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_cont vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1; vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING; vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A | - MB_SCOREBOARD_B | - MB_SCOREBOARD_C); + MB_SCOREBOARD_B | + MB_SCOREBOARD_C); /* In VME prediction the current mb depends on the neighbour * A/B/C macroblock. So the left/up/up-right dependency should @@ -862,25 +969,25 @@ gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_cont /* check whether the mb of (x_index, y_index) is out of bound */ static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height) { - int mb_index; - if (x_index < 0 || x_index >= mb_width) - return -1; - if (y_index < 0 || y_index >= mb_height) - return -1; + int mb_index; + if (x_index < 0 || x_index >= mb_width) + return -1; + if (y_index < 0 || y_index >= mb_height) + return -1; - mb_index = y_index * mb_width + x_index; - if (mb_index < first_mb || mb_index > (first_mb + num_mb)) - return -1; - return 0; + mb_index = y_index * mb_width + x_index; + if (mb_index < first_mb || mb_index > (first_mb + num_mb)) + return -1; + return 0; } void gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, - struct encode_state *encode_state, - int mb_width, int mb_height, - int kernel, - int transform_8x8_mode_flag, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + int mb_width, int mb_height, + int kernel, + int transform_8x8_mode_flag, + struct intel_encoder_context *encoder_context) { struct gen6_vme_context *vme_context = encoder_context->vme_context; int mb_row; @@ -922,7 +1029,7 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, if (x_inner != (mb_width -1)) { mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; score_dep |= MB_SCOREBOARD_C; - } + } } *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); @@ -944,7 +1051,7 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, xtemp_outer = mb_width - 2; if (xtemp_outer < 0) - xtemp_outer = 0; + xtemp_outer = 0; x_outer = xtemp_outer; y_outer = first_mb / mb_width; for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { @@ -966,7 +1073,7 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, if (x_inner != (mb_width -1)) { mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; score_dep |= MB_SCOREBOARD_C; - } + } } *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); @@ -1001,113 +1108,513 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, static uint8_t intel_get_ref_idx_state_1(VAPictureH264 *va_pic, unsigned int frame_store_id) { - unsigned int is_long_term = - !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE); - unsigned int is_top_field = - !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD); - unsigned int is_bottom_field = - !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD); - - return ((is_long_term << 6) | - ((is_top_field ^ is_bottom_field ^ 1) << 5) | - (frame_store_id << 1) | - ((is_top_field ^ 1) & is_bottom_field)); + unsigned int is_long_term = + !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE); + unsigned int is_top_field = + !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD); + unsigned int is_bottom_field = + !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD); + + return ((is_long_term << 6) | + ((is_top_field ^ is_bottom_field ^ 1) << 5) | + (frame_store_id << 1) | + ((is_top_field ^ 1) & is_bottom_field)); } void intel_mfc_avc_ref_idx_state(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { - struct intel_batchbuffer *batch = encoder_context->base.batch; - struct i965_driver_data *i965 = i965_driver_data(ctx); - int slice_type; - struct object_surface *slice_obj_surface, *obj_surface; - int ref_surface_id; - unsigned int fref_entry, bref_entry; - int frame_index, i; - VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; - VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; - - fref_entry = 0x80808080; - bref_entry = 0x80808080; - slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); - - if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) { - slice_obj_surface = NULL; - ref_surface_id = slice_param->RefPicList0[0].picture_id; - if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { - slice_obj_surface = SURFACE(ref_surface_id); - } - if (slice_obj_surface && slice_obj_surface->bo) { - obj_surface = slice_obj_surface; - } else { - obj_surface = encode_state->reference_objects[0]; - } - frame_index = -1; - for (i = 0; i < 16; i++) { - if (obj_surface == encode_state->reference_objects[i]) { - frame_index = i; - break; - } + struct gen6_vme_context *vme_context = encoder_context->vme_context; + struct intel_batchbuffer *batch = encoder_context->base.batch; + int slice_type; + struct object_surface *obj_surface; + unsigned int fref_entry, bref_entry; + int frame_index, i; + VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + + fref_entry = 0x80808080; + bref_entry = 0x80808080; + slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + + if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) { + int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff); + + if (ref_idx_l0 > 3) { + WARN_ONCE("ref_idx_l0 is out of range\n"); + ref_idx_l0 = 0; + } + + obj_surface = vme_context->used_reference_objects[0]; + frame_index = -1; + for (i = 0; i < 16; i++) { + if (obj_surface && + obj_surface == encode_state->reference_objects[i]) { + frame_index = i; + break; + } + } + if (frame_index == -1) { + WARN_ONCE("RefPicList0 is not found in DPB!\n"); + } else { + int ref_idx_l0_shift = ref_idx_l0 * 8; + fref_entry &= ~(0xFF << ref_idx_l0_shift); + fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift); + } + } + + if (slice_type == SLICE_TYPE_B) { + int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff); + + if (ref_idx_l1 > 3) { + WARN_ONCE("ref_idx_l1 is out of range\n"); + ref_idx_l1 = 0; + } + + obj_surface = vme_context->used_reference_objects[1]; + frame_index = -1; + for (i = 0; i < 16; i++) { + if (obj_surface && + obj_surface == encode_state->reference_objects[i]) { + frame_index = i; + break; + } + } + if (frame_index == -1) { + WARN_ONCE("RefPicList1 is not found in DPB!\n"); + } else { + int ref_idx_l1_shift = ref_idx_l1 * 8; + bref_entry &= ~(0xFF << ref_idx_l1_shift); + bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift); + } + } + + BEGIN_BCS_BATCH(batch, 10); + OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); + OUT_BCS_BATCH(batch, 0); //Select L0 + OUT_BCS_BATCH(batch, fref_entry); //Only 1 reference + for(i = 0; i < 7; i++) { + OUT_BCS_BATCH(batch, 0x80808080); + } + ADVANCE_BCS_BATCH(batch); + + BEGIN_BCS_BATCH(batch, 10); + OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); + OUT_BCS_BATCH(batch, 1); //Select L1 + OUT_BCS_BATCH(batch, bref_entry); //Only 1 reference + for(i = 0; i < 7; i++) { + OUT_BCS_BATCH(batch, 0x80808080); + } + ADVANCE_BCS_BATCH(batch); +} + + +void intel_vme_mpeg2_state_setup(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message); + VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; + int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; + int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; + uint32_t mv_x, mv_y; + VAEncSliceParameterBufferMPEG2 *slice_param = NULL; + VAEncPictureParameterBufferMPEG2 *pic_param = NULL; + slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer; + + if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) { + mv_x = 512; + mv_y = 64; + } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) { + mv_x = 1024; + mv_y = 128; + } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) { + mv_x = 2048; + mv_y = 128; + } else { + WARN_ONCE("Incorrect Mpeg2 level setting!\n"); + mv_x = 512; + mv_y = 64; + } + + pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer; + if (pic_param->picture_type != VAEncPictureTypeIntra) { + int qp, m_cost, j, mv_count; + float lambda, m_costf; + slice_param = (VAEncSliceParameterBufferMPEG2 *) + encode_state->slice_params_ext[0]->buffer; + qp = slice_param->quantiser_scale_code; + lambda = intel_lambda_qp(qp); + /* No Intra prediction. So it is zero */ + vme_state_message[MODE_INTRA_8X8] = 0; + vme_state_message[MODE_INTRA_4X4] = 0; + vme_state_message[MODE_INTER_MV0] = 0; + for (j = 1; j < 3; j++) { + m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda; + m_cost = (int)m_costf; + vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f); + } + mv_count = 3; + for (j = 4; j <= 64; j *= 2) { + m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda; + m_cost = (int)m_costf; + vme_state_message[MODE_INTER_MV0 + mv_count] = + intel_format_lutvalue(m_cost, 0x6f); + mv_count++; + } + m_cost = lambda; + /* It can only perform the 16x16 search. So mode cost can be ignored for + * the other mode. for example: 16x8/8x8 + */ + vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f); + vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f); + + vme_state_message[MODE_INTER_16X8] = 0; + vme_state_message[MODE_INTER_8X8] = 0; + vme_state_message[MODE_INTER_8X4] = 0; + vme_state_message[MODE_INTER_4X4] = 0; + vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f); + + } + vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x); + + vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) | + width_in_mbs; +} + +void +gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx, + struct encode_state *encode_state, + int mb_width, int mb_height, + int kernel, + struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + unsigned int *command_ptr; + +#define MPEG2_SCOREBOARD (1 << 21) + + dri_bo_map(vme_context->vme_batchbuffer.bo, 1); + command_ptr = vme_context->vme_batchbuffer.bo->virtual; + + { + unsigned int mb_intra_ub, score_dep; + int x_outer, y_outer, x_inner, y_inner; + int xtemp_outer = 0; + int first_mb = 0; + int num_mb = mb_width * mb_height; + + x_outer = 0; + y_outer = 0; + + + for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { + x_inner = x_outer; + y_inner = y_outer; + for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) { + mb_intra_ub = 0; + score_dep = 0; + if (x_inner != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE; + score_dep |= MB_SCOREBOARD_A; } - if (frame_index == -1) { - WARN_ONCE("RefPicList0 is not found in DPB!\n"); - } else if (slice_obj_surface && slice_obj_surface->bo) { - /* This is passed by Slice_param->RefPicList0 */ - fref_entry &= ~(0xFF); - fref_entry += intel_get_ref_idx_state_1(&slice_param->RefPicList0[0], frame_index); - } else { - /* This is passed by the hacked mode */ - fref_entry &= ~(0xFF); - fref_entry += intel_get_ref_idx_state_1(&pic_param->ReferenceFrames[frame_index], frame_index); + if (y_inner != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B; + score_dep |= MB_SCOREBOARD_B; + + if (x_inner != 0) + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D; + + if (x_inner != (mb_width -1)) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; + score_dep |= MB_SCOREBOARD_C; + } } + + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = kernel; + *command_ptr++ = MPEG2_SCOREBOARD; + /* Indirect data */ + *command_ptr++ = 0; + /* the (X, Y) term of scoreboard */ + *command_ptr++ = ((y_inner << 16) | x_inner); + *command_ptr++ = score_dep; + /*inline data */ + *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner); + *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8)); + x_inner -= 2; + y_inner += 1; + } + x_outer += 1; } - if (slice_type == SLICE_TYPE_B) { - slice_obj_surface = NULL; - ref_surface_id = slice_param->RefPicList1[0].picture_id; - if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { - slice_obj_surface = SURFACE(ref_surface_id); - } - if (slice_obj_surface && slice_obj_surface->bo) { - obj_surface = slice_obj_surface; - } else { - obj_surface = encode_state->reference_objects[1]; - } - frame_index = -1; - for (i = 0; i < 16; i++) { - if (obj_surface == encode_state->reference_objects[i]) { - frame_index = i; - break; - } + xtemp_outer = mb_width - 2; + if (xtemp_outer < 0) + xtemp_outer = 0; + x_outer = xtemp_outer; + y_outer = 0; + for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { + y_inner = y_outer; + x_inner = x_outer; + for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) { + mb_intra_ub = 0; + score_dep = 0; + if (x_inner != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE; + score_dep |= MB_SCOREBOARD_A; } - if (frame_index == -1) { - WARN_ONCE("RefPicList1 is not found in DPB!\n"); - } else if (slice_obj_surface && slice_obj_surface->bo) { - bref_entry &= ~(0xFF); - bref_entry += intel_get_ref_idx_state_1(&slice_param->RefPicList1[0], frame_index); - } else { - bref_entry &= ~(0xFF); - bref_entry += intel_get_ref_idx_state_1(&pic_param->ReferenceFrames[frame_index], frame_index); + if (y_inner != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B; + score_dep |= MB_SCOREBOARD_B; + + if (x_inner != 0) + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D; + + if (x_inner != (mb_width -1)) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; + score_dep |= MB_SCOREBOARD_C; + } } - } - BEGIN_BCS_BATCH(batch, 10); - OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); - OUT_BCS_BATCH(batch, 0); //Select L0 - OUT_BCS_BATCH(batch, fref_entry); //Only 1 reference - for(i = 0; i < 7; i++) { - OUT_BCS_BATCH(batch, 0x80808080); - } - ADVANCE_BCS_BATCH(batch); - - BEGIN_BCS_BATCH(batch, 10); - OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); - OUT_BCS_BATCH(batch, 1); //Select L1 - OUT_BCS_BATCH(batch, bref_entry); //Only 1 reference - for(i = 0; i < 7; i++) { - OUT_BCS_BATCH(batch, 0x80808080); + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = kernel; + *command_ptr++ = MPEG2_SCOREBOARD; + /* Indirect data */ + *command_ptr++ = 0; + /* the (X, Y) term of scoreboard */ + *command_ptr++ = ((y_inner << 16) | x_inner); + *command_ptr++ = score_dep; + /*inline data */ + *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner); + *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8)); + + x_inner -= 2; + y_inner += 1; + } + x_outer++; + if (x_outer >= mb_width) { + y_outer += 1; + x_outer = xtemp_outer; + } } - ADVANCE_BCS_BATCH(batch); + } + + *command_ptr++ = 0; + *command_ptr++ = MI_BATCH_BUFFER_END; + + dri_bo_unmap(vme_context->vme_batchbuffer.bo); + return; +} + +static int +avc_temporal_find_surface(VAPictureH264 *curr_pic, + VAPictureH264 *ref_list, + int num_pictures, + int dir) +{ + int i, found = -1, min = 0x7FFFFFFF; + + for (i = 0; i < num_pictures; i++) { + int tmp; + + if ((ref_list[i].flags & VA_PICTURE_H264_INVALID) || + (ref_list[i].picture_id == VA_INVALID_SURFACE)) + break; + + tmp = curr_pic->TopFieldOrderCnt - ref_list[i].TopFieldOrderCnt; + + if (dir) + tmp = -tmp; + + if (tmp > 0 && tmp < min) { + min = tmp; + found = i; + } + } + + return found; +} + +void +intel_avc_vme_reference_state(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int list_index, + int surface_index, + void (* vme_source_surface_state)( + VADriverContextP ctx, + int index, + struct object_surface *obj_surface, + struct intel_encoder_context *encoder_context)) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + struct object_surface *obj_surface = NULL; + struct i965_driver_data *i965 = i965_driver_data(ctx); + VASurfaceID ref_surface_id; + VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + int max_num_references; + VAPictureH264 *curr_pic; + VAPictureH264 *ref_list; + int ref_idx; + + if (list_index == 0) { + max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1; + ref_list = slice_param->RefPicList0; + } else { + max_num_references = pic_param->num_ref_idx_l1_active_minus1 + 1; + ref_list = slice_param->RefPicList1; + } + + if (max_num_references == 1) { + if (list_index == 0) { + ref_surface_id = slice_param->RefPicList0[0].picture_id; + vme_context->used_references[0] = &slice_param->RefPicList0[0]; + } else { + ref_surface_id = slice_param->RefPicList1[0].picture_id; + vme_context->used_references[1] = &slice_param->RefPicList1[0]; + } + + if (ref_surface_id != VA_INVALID_SURFACE) + obj_surface = SURFACE(ref_surface_id); + + if (!obj_surface || + !obj_surface->bo) { + obj_surface = encode_state->reference_objects[list_index]; + vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index]; + } + + ref_idx = 0; + } else { + curr_pic = &pic_param->CurrPic; + + /* select the reference frame in temporal space */ + ref_idx = avc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1); + ref_surface_id = ref_list[ref_idx].picture_id; + + if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */ + obj_surface = SURFACE(ref_surface_id); + + vme_context->used_reference_objects[list_index] = obj_surface; + vme_context->used_references[list_index] = &ref_list[ref_idx]; + } + + if (obj_surface && + obj_surface->bo) { + assert(ref_idx >= 0); + vme_context->used_reference_objects[list_index] = obj_surface; + vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context); + vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 | + ref_idx << 16 | + ref_idx << 8 | + ref_idx); + } else { + vme_context->used_reference_objects[list_index] = NULL; + vme_context->used_references[list_index] = NULL; + vme_context->ref_index_in_mb[list_index] = 0; + } +} + +void intel_avc_slice_insert_packed_data(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int slice_index, + struct intel_batchbuffer *slice_batch) +{ + int count, i, start_index; + unsigned int length_in_bits; + VAEncPackedHeaderParameterBuffer *param = NULL; + unsigned int *header_data = NULL; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + int slice_header_index; + + if (encode_state->slice_header_index[slice_index] == 0) + slice_header_index = -1; + else + slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK); + + count = encode_state->slice_rawdata_count[slice_index]; + start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK); + + for (i = 0; i < count; i++) { + unsigned int skip_emul_byte_cnt; + + header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer; + + param = (VAEncPackedHeaderParameterBuffer *) + (encode_state->packed_header_params_ext[start_index + i]->buffer); + + /* skip the slice header packed data type as it is lastly inserted */ + if (param->type == VAEncPackedHeaderSlice) + continue; + + length_in_bits = param->bit_length; + + skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits); + + /* as the slice header is still required, the last header flag is set to + * zero. + */ + mfc_context->insert_object(ctx, + encoder_context, + header_data, + ALIGN(length_in_bits, 32) >> 5, + length_in_bits & 0x1f, + skip_emul_byte_cnt, + 0, + 0, + !param->has_emulation_bytes, + slice_batch); + } + + if (slice_header_index == -1) { + unsigned char *slice_header = NULL; + int slice_header_length_in_bits = 0; + VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; + + /* No slice header data is passed. And the driver needs to generate it */ + /* For the Normal H264 */ + slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, + pPicParameter, + pSliceParameter, + &slice_header); + mfc_context->insert_object(ctx, encoder_context, + (unsigned int *)slice_header, + ALIGN(slice_header_length_in_bits, 32) >> 5, + slice_header_length_in_bits & 0x1f, + 5, /* first 5 bytes are start code + nal unit type */ + 1, 0, 1, slice_batch); + + free(slice_header); + } else { + unsigned int skip_emul_byte_cnt; + + header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer; + + param = (VAEncPackedHeaderParameterBuffer *) + (encode_state->packed_header_params_ext[slice_header_index]->buffer); + length_in_bits = param->bit_length; + + /* as the slice header is the last header data for one slice, + * the last header flag is set to one. + */ + skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits); + + mfc_context->insert_object(ctx, + encoder_context, + header_data, + ALIGN(length_in_bits, 32) >> 5, + length_in_bits & 0x1f, + skip_emul_byte_cnt, + 1, + 0, + !param->has_emulation_bytes, + slice_batch); + } + + return; } diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c index 3f696dd..b6d19e8 100755 --- a/src/gen6_mfd.c +++ b/src/gen6_mfd.c @@ -61,6 +61,7 @@ gen6_mfd_init_avc_surface(VADriverContextP ctx, if (!gen6_avc_surface) { gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1); + gen6_avc_surface->frame_store_id = -1; assert((obj_surface->size & 0x3f) == 0); obj_surface->private_data = gen6_avc_surface; } @@ -130,7 +131,11 @@ gen6_mfd_surface_state(VADriverContextP ctx, { struct intel_batchbuffer *batch = gen6_mfd_context->base.batch; struct object_surface *obj_surface = decode_state->render_object; - + unsigned int surface_format; + + surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ? + MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; + BEGIN_BCS_BATCH(batch, 6); OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); OUT_BCS_BATCH(batch, 0); @@ -138,7 +143,7 @@ gen6_mfd_surface_state(VADriverContextP ctx, ((obj_surface->orig_height - 1) << 19) | ((obj_surface->orig_width - 1) << 6)); OUT_BCS_BATCH(batch, - (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */ + (surface_format << 28) | /* 420 planar YUV surface */ (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */ (0 << 22) | /* surface object control state, FIXME??? */ ((obj_surface->width - 1) << 3) | /* pitch */ @@ -416,7 +421,7 @@ gen6_mfd_avc_directmode_state(VADriverContextP ctx, struct object_surface *obj_surface; GenAvcSurface *gen6_avc_surface; VAPictureH264 *va_pic; - int i, j; + int i; BEGIN_BCS_BATCH(batch, 69); OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2)); @@ -468,26 +473,14 @@ gen6_mfd_avc_directmode_state(VADriverContextP ctx, /* POC List */ for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) { - if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) { - int found = 0; + obj_surface = gen6_mfd_context->reference_surface[i].obj_surface; - assert(gen6_mfd_context->reference_surface[i].obj_surface != NULL); - - for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) { - va_pic = &pic_param->ReferenceFrames[j]; - - if (va_pic->flags & VA_PICTURE_H264_INVALID) - continue; - - if (va_pic->picture_id == gen6_mfd_context->reference_surface[i].surface_id) { - found = 1; - break; - } - } + if (obj_surface) { + const VAPictureH264 * const va_pic = avc_find_picture( + obj_surface->base.id, pic_param->ReferenceFrames, + ARRAY_ELEMS(pic_param->ReferenceFrames)); - assert(found == 1); - assert(!(va_pic->flags & VA_PICTURE_H264_INVALID)); - + assert(va_pic != NULL); OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt); OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt); } else { @@ -603,32 +596,6 @@ gen6_mfd_avc_slice_state(VADriverContextP ctx, ADVANCE_BCS_BATCH(batch); } -static void -gen6_mfd_avc_phantom_slice_state(VADriverContextP ctx, - VAPictureParameterBufferH264 *pic_param, - struct gen6_mfd_context *gen6_mfd_context) -{ - struct intel_batchbuffer *batch = gen6_mfd_context->base.batch; - int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1; - int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */ - - BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */ - OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2)); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, - height_in_mbs << 24 | - width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag)); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - ADVANCE_BCS_BATCH(batch); -} - static inline void gen6_mfd_avc_ref_idx_state(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param, @@ -730,29 +697,20 @@ gen6_mfd_avc_bsd_object(VADriverContextP ctx, } static void -gen6_mfd_avc_phantom_slice_bsd_object(VADriverContextP ctx, - VAPictureParameterBufferH264 *pic_param, - struct gen6_mfd_context *gen6_mfd_context) +gen6_mfd_avc_phantom_slice_first(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *next_slice_param, + struct gen6_mfd_context *gen6_mfd_context) { - struct intel_batchbuffer *batch = gen6_mfd_context->base.batch; - - BEGIN_BCS_BATCH(batch, 6); - OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2)); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - ADVANCE_BCS_BATCH(batch); + gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen6_mfd_context->base.batch); } static void -gen6_mfd_avc_phantom_slice(VADriverContextP ctx, - VAPictureParameterBufferH264 *pic_param, - struct gen6_mfd_context *gen6_mfd_context) +gen6_mfd_avc_phantom_slice_last(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + struct gen6_mfd_context *gen6_mfd_context) { - gen6_mfd_avc_phantom_slice_state(ctx, pic_param, gen6_mfd_context); - gen6_mfd_avc_phantom_slice_bsd_object(ctx, pic_param, gen6_mfd_context); + gen6_mfd_avc_phantom_slice(ctx, pic_param, NULL, gen6_mfd_context->base.batch); } static void @@ -791,25 +749,18 @@ gen6_mfd_avc_decode_init(VADriverContextP ctx, assert(decode_state->pic_param && decode_state->pic_param->buffer); pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer; - intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen6_mfd_context->reference_surface); + intel_update_avc_frame_store_index(ctx, decode_state, pic_param, + gen6_mfd_context->reference_surface, &gen6_mfd_context->fs_ctx); width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff); /* Current decoded picture */ obj_surface = decode_state->render_object; - obj_surface->flags &= ~SURFACE_REF_DIS_MASK; - obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0); - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); - - /* initial uv component for YUV400 case */ - if (pic_param->seq_fields.bits.chroma_format_idc == 0) { - unsigned int uv_offset = obj_surface->width * obj_surface->height; - unsigned int uv_size = obj_surface->width * obj_surface->height / 2; - - drm_intel_gem_bo_map_gtt(obj_surface->bo); - memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size); - drm_intel_gem_bo_unmap_gtt(obj_surface->bo); - } + if (pic_param->pic_fields.bits.reference_pic_flag) + obj_surface->flags |= SURFACE_REFERENCED; + else + obj_surface->flags &= ~SURFACE_REFERENCED; + avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param); gen6_mfd_init_avc_surface(ctx, pic_param, obj_surface); dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo); @@ -896,6 +847,10 @@ gen6_mfd_avc_decode_picture(VADriverContextP ctx, else next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer; + if (j == 0 && + slice_param->first_mb_in_slice) + gen6_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen6_mfd_context); + for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) { assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL); assert((slice_param->slice_type == SLICE_TYPE_I) || @@ -918,7 +873,7 @@ gen6_mfd_avc_decode_picture(VADriverContextP ctx, } } - gen6_mfd_avc_phantom_slice(ctx, pic_param, gen6_mfd_context); + gen6_mfd_avc_phantom_slice_last(ctx, pic_param, gen6_mfd_context); intel_batchbuffer_end_atomic(batch); intel_batchbuffer_flush(batch); } @@ -947,7 +902,7 @@ gen6_mfd_mpeg2_decode_init(VADriverContextP ctx, /* Current decoded picture */ obj_surface = decode_state->render_object; - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo); gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo; @@ -1121,9 +1076,9 @@ gen6_mfd_mpeg2_decode_picture(VADriverContextP ctx, { struct intel_batchbuffer *batch = gen6_mfd_context->base.batch; VAPictureParameterBufferMPEG2 *pic_param; - VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param; + VASliceParameterBufferMPEG2 *slice_param, *next_slice_param; dri_bo *slice_data_bo; - int i, j; + int group_idx = 0, pre_group_idx = -1, element_idx = 0; assert(decode_state->pic_param && decode_state->pic_param->buffer); pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer; @@ -1142,28 +1097,18 @@ gen6_mfd_mpeg2_decode_picture(VADriverContextP ctx, gen6_mfd_context->wa_mpeg2_slice_vertical_position = mpeg2_wa_slice_vertical_position(decode_state, pic_param); - for (j = 0; j < decode_state->num_slice_params; j++) { - assert(decode_state->slice_params && decode_state->slice_params[j]->buffer); - slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer; - slice_data_bo = decode_state->slice_datas[j]->bo; - gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen6_mfd_context); + slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[group_idx]->buffer; - if (j == decode_state->num_slice_params - 1) - next_slice_group_param = NULL; - else - next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer; - - for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) { - assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL); - - if (i < decode_state->slice_params[j]->num_elements - 1) - next_slice_param = slice_param + 1; - else - next_slice_param = next_slice_group_param; - - gen6_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen6_mfd_context); - slice_param++; + for (; slice_param;) { + if (pre_group_idx != group_idx) { + slice_data_bo = decode_state->slice_datas[group_idx]->bo; + gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen6_mfd_context); + pre_group_idx = group_idx; } + + next_slice_param = intel_mpeg2_find_next_slice(decode_state, pic_param, slice_param, &group_idx, &element_idx); + gen6_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen6_mfd_context); + slice_param = next_slice_param; } intel_batchbuffer_end_atomic(batch); @@ -1270,7 +1215,7 @@ gen6_mfd_vc1_decode_init(VADriverContextP ctx, /* Current decoded picture */ obj_surface = decode_state->render_object; - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); gen6_mfd_init_vc1_surface(ctx, pic_param, obj_surface); dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo); @@ -1837,9 +1782,10 @@ gen6_mfd_decode_picture(VADriverContextP ctx, gen6_mfd_mpeg2_decode_picture(ctx, decode_state, gen6_mfd_context); break; - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: + case VAProfileH264StereoHigh: gen6_mfd_avc_decode_picture(ctx, decode_state, gen6_mfd_context); break; diff --git a/src/gen6_mfd.h b/src/gen6_mfd.h index de131d6..f499803 100644 --- a/src/gen6_mfd.h +++ b/src/gen6_mfd.h @@ -62,6 +62,7 @@ struct gen6_mfd_context VAIQMatrixBufferMPEG2 mpeg2; } iq_matrix; + GenFrameStoreContext fs_ctx; GenFrameStore reference_surface[MAX_GEN_REFERENCE_FRAMES]; GenBuffer post_deblocking_output; GenBuffer pre_deblocking_output; diff --git a/src/gen6_vme.c b/src/gen6_vme.c index 69c667d..2e02591 100644 --- a/src/gen6_vme.c +++ b/src/gen6_vme.c @@ -40,14 +40,6 @@ #include "gen6_vme.h" #include "gen6_mfc.h" -#define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32) -#define SURFACE_STATE_PADDED_SIZE_GEN7 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7) - -#define SURFACE_STATE_PADDED_SIZE_0_GEN6 ALIGN(sizeof(struct i965_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE_1_GEN6 ALIGN(sizeof(struct i965_surface_state2), 32) -#define SURFACE_STATE_PADDED_SIZE_GEN6 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN6, SURFACE_STATE_PADDED_SIZE_1_GEN6) - #define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7) #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) #define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index) @@ -204,7 +196,6 @@ gen6_vme_surface_setup(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { struct object_surface *obj_surface; - struct i965_driver_data *i965 = i965_driver_data(ctx); /*Setup surfaces state*/ /* current picture for encoding */ @@ -215,43 +206,14 @@ gen6_vme_surface_setup(VADriverContextP ctx, if (!is_intra) { VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; int slice_type; - struct object_surface *slice_obj_surface; - int ref_surface_id; slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI); - if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) { - slice_obj_surface = NULL; - ref_surface_id = slice_param->RefPicList0[0].picture_id; - if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { - slice_obj_surface = SURFACE(ref_surface_id); - } - if (slice_obj_surface && slice_obj_surface->bo) { - obj_surface = slice_obj_surface; - } else { - obj_surface = encode_state->reference_objects[0]; - } - /* reference 0 */ - if (obj_surface && obj_surface->bo) - gen6_vme_source_surface_state(ctx, 1, obj_surface, encoder_context); - } - if (slice_type == SLICE_TYPE_B) { - /* reference 1 */ - slice_obj_surface = NULL; - ref_surface_id = slice_param->RefPicList1[0].picture_id; - if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { - slice_obj_surface = SURFACE(ref_surface_id); - } - if (slice_obj_surface && slice_obj_surface->bo) { - obj_surface = slice_obj_surface; - } else { - obj_surface = encode_state->reference_objects[0]; - } - - obj_surface = encode_state->reference_objects[1]; - if (obj_surface && obj_surface->bo) - gen6_vme_source_surface_state(ctx, 2, obj_surface, encoder_context); - } + intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen6_vme_source_surface_state); + + if (slice_type == SLICE_TYPE_B) + intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen6_vme_source_surface_state); } /* VME output */ @@ -319,7 +281,7 @@ static VAStatus gen6_vme_constant_setup(VADriverContextP ctx, if (vme_context->h264_level >= 30) { mv_num = 16; if (vme_context->h264_level >= 31) - mv_num = 8; + mv_num = 8; } dri_bo_map(vme_context->gpe_context.curbe.bo, 1); @@ -422,21 +384,38 @@ static VAStatus gen6_vme_vme_state_setup(VADriverContextP ctx, dri_bo_map(vme_context->vme_state.bo, 1); assert(vme_context->vme_state.bo->virtual); vme_state_message = (unsigned int *)vme_context->vme_state.bo->virtual; - - vme_state_message[0] = 0x01010101; - vme_state_message[1] = 0x10010101; - vme_state_message[2] = 0x0F0F0F0F; - vme_state_message[3] = 0x100F0F0F; - vme_state_message[4] = 0x01010101; - vme_state_message[5] = 0x10010101; - vme_state_message[6] = 0x0F0F0F0F; - vme_state_message[7] = 0x100F0F0F; - vme_state_message[8] = 0x01010101; - vme_state_message[9] = 0x10010101; - vme_state_message[10] = 0x0F0F0F0F; - vme_state_message[11] = 0x000F0F0F; - vme_state_message[12] = 0x00; - vme_state_message[13] = 0x00; + + if (encoder_context->quality_level != ENCODER_LOW_QUALITY) { + vme_state_message[0] = 0x01010101; + vme_state_message[1] = 0x10010101; + vme_state_message[2] = 0x0F0F0F0F; + vme_state_message[3] = 0x100F0F0F; + vme_state_message[4] = 0x01010101; + vme_state_message[5] = 0x10010101; + vme_state_message[6] = 0x0F0F0F0F; + vme_state_message[7] = 0x100F0F0F; + vme_state_message[8] = 0x01010101; + vme_state_message[9] = 0x10010101; + vme_state_message[10] = 0x0F0F0F0F; + vme_state_message[11] = 0x000F0F0F; + vme_state_message[12] = 0x00; + vme_state_message[13] = 0x00; + } else { + vme_state_message[0] = 0x10010101; + vme_state_message[1] = 0x100F0F0F; + vme_state_message[2] = 0x10010101; + vme_state_message[3] = 0x000F0F0F; + vme_state_message[4] = 0; + vme_state_message[5] = 0; + vme_state_message[6] = 0; + vme_state_message[7] = 0; + vme_state_message[8] = 0; + vme_state_message[9] = 0; + vme_state_message[10] = 0; + vme_state_message[11] = 0; + vme_state_message[12] = 0; + vme_state_message[13] = 0; + } vme_state_message[14] = 0x4a4a; vme_state_message[15] = 0x0; @@ -490,7 +469,7 @@ gen6_vme_fill_vme_batchbuffer(VADriverContextP ctx, number_mb_cmds = slice_mb_number - i; } - *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2)); *command_ptr++ = kernel; *command_ptr++ = 0; *command_ptr++ = 0; @@ -500,6 +479,7 @@ gen6_vme_fill_vme_batchbuffer(VADriverContextP ctx, /*inline data */ *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x); *command_ptr++ = (number_mb_cmds << 16 | transform_8x8_mode_flag | ((i==0) << 1)); + *command_ptr++ = encoder_context->quality_level; i += number_mb_cmds; } @@ -558,7 +538,7 @@ static void gen6_vme_pipeline_programing(VADriverContextP ctx, intel_batchbuffer_start_atomic(batch, 0x1000); gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch); BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6)); + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8)); OUT_RELOC(batch, vme_context->vme_batchbuffer.bo, I915_GEM_DOMAIN_COMMAND, 0, @@ -579,7 +559,7 @@ static VAStatus gen6_vme_prepare(VADriverContextP ctx, struct gen6_vme_context *vme_context = encoder_context->vme_context; if (!vme_context->h264_level || - (vme_context->h264_level != pSequenceParameter->level_idc)) { + (vme_context->h264_level != pSequenceParameter->level_idc)) { vme_context->h264_level = pSequenceParameter->level_idc; } /*Setup all the memory object*/ @@ -649,9 +629,7 @@ Bool gen6_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e { struct gen6_vme_context *vme_context = NULL; - if (encoder_context->profile != VAProfileH264Baseline && - encoder_context->profile != VAProfileH264Main && - encoder_context->profile != VAProfileH264High) { + if (encoder_context->codec != CODEC_H264) { /* Never get here */ assert(0); return False; @@ -659,7 +637,7 @@ Bool gen6_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e vme_context = calloc(1, sizeof(struct gen6_vme_context)); vme_context->gpe_context.surface_state_binding_table.length = - (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; + (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6; vme_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data); diff --git a/src/gen6_vme.h b/src/gen6_vme.h index 17f199e..d461982 100644 --- a/src/gen6_vme.h +++ b/src/gen6_vme.h @@ -62,31 +62,44 @@ struct gen6_vme_context void (*vme_surface2_setup)(VADriverContextP ctx, - struct i965_gpe_context *gpe_context, - struct object_surface *obj_surface, - unsigned long binding_table_offset, - unsigned long surface_state_offset); + struct i965_gpe_context *gpe_context, + struct object_surface *obj_surface, + unsigned long binding_table_offset, + unsigned long surface_state_offset); void (*vme_media_rw_surface_setup)(VADriverContextP ctx, - struct i965_gpe_context *gpe_context, - struct object_surface *obj_surface, - unsigned long binding_table_offset, - unsigned long surface_state_offset); + struct i965_gpe_context *gpe_context, + struct object_surface *obj_surface, + unsigned long binding_table_offset, + unsigned long surface_state_offset); void (*vme_buffer_suface_setup)(VADriverContextP ctx, struct i965_gpe_context *gpe_context, struct i965_buffer_surface *buffer_surface, unsigned long binding_table_offset, unsigned long surface_state_offset); void (*vme_media_chroma_surface_setup)(VADriverContextP ctx, - struct i965_gpe_context *gpe_context, - struct object_surface *obj_surface, - unsigned long binding_table_offset, - unsigned long surface_state_offset); + struct i965_gpe_context *gpe_context, + struct object_surface *obj_surface, + unsigned long binding_table_offset, + unsigned long surface_state_offset); void *vme_state_message; unsigned int h264_level; unsigned int video_coding_type; unsigned int vme_kernel_sum; + unsigned int mpeg2_level; + + struct object_surface *used_reference_objects[2]; + void *used_references[2]; + unsigned int ref_index_in_mb[2]; }; +#define MPEG2_PIC_WIDTH_HEIGHT 30 +#define MPEG2_MV_RANGE 29 +#define MPEG2_LEVEL_MASK 0x0f +#define MPEG2_LEVEL_LOW 0x0a +#define MPEG2_LEVEL_MAIN 0x08 +#define MPEG2_LEVEL_HIGH 0x04 + + Bool gen75_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context); extern void intel_vme_update_mbmv_cost(VADriverContextP ctx, @@ -128,13 +141,38 @@ Bool gen7_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e extern void gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, - struct encode_state *encode_state, - int mb_width, int mb_height, - int kernel, - int transform_8x8_mode_flag, - struct intel_encoder_context *encoder_context); + struct encode_state *encode_state, + int mb_width, int mb_height, + int kernel, + int transform_8x8_mode_flag, + struct intel_encoder_context *encoder_context); extern void gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context); +extern void +intel_vme_mpeg2_state_setup(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context); + +extern void +gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx, + struct encode_state *encode_state, + int mb_width, int mb_height, + int kernel, + struct intel_encoder_context *encoder_context); + +void +intel_avc_vme_reference_state(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int list_index, + int surface_index, + void (* vme_source_surface_state)( + VADriverContextP ctx, + int index, + struct object_surface *obj_surface, + struct intel_encoder_context *encoder_context)); + +extern Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context); #endif /* _GEN6_VME_H_ */ diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c index cfc3c22..a6a3c1d 100644 --- a/src/gen75_mfc.c +++ b/src/gen75_mfc.c @@ -43,33 +43,31 @@ #include "gen6_vme.h" #include "intel_media.h" -#define MFC_SOFTWARE_HASWELL 1 +#define AVC_INTRA_RDO_OFFSET 4 +#define AVC_INTER_RDO_OFFSET 10 +#define AVC_INTER_MSG_OFFSET 8 +#define AVC_INTER_MV_OFFSET 48 +#define AVC_RDO_MASK 0xFFFF + +#define MFC_SOFTWARE_HASWELL 0 + +#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7) +#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) +#define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index) #define B0_STEP_REV 2 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV) -static const uint32_t gen75_mfc_batchbuffer_avc_intra[][4] = { -#include "shaders/utils/mfc_batchbuffer_avc_intra.g7b" -}; - -static const uint32_t gen75_mfc_batchbuffer_avc_inter[][4] = { -#include "shaders/utils/mfc_batchbuffer_avc_inter.g7b" +static const uint32_t gen75_mfc_batchbuffer_avc[][4] = { +#include "shaders/utils/mfc_batchbuffer_hsw.g75b" }; static struct i965_kernel gen75_mfc_kernels[] = { { "MFC AVC INTRA BATCHBUFFER ", MFC_BATCHBUFFER_AVC_INTRA, - gen75_mfc_batchbuffer_avc_intra, - sizeof(gen75_mfc_batchbuffer_avc_intra), - NULL - }, - - { - "MFC AVC INTER BATCHBUFFER ", - MFC_BATCHBUFFER_AVC_INTER, - gen75_mfc_batchbuffer_avc_inter, - sizeof(gen75_mfc_batchbuffer_avc_inter), + gen75_mfc_batchbuffer_avc, + sizeof(gen75_mfc_batchbuffer_avc), NULL }, }; @@ -86,8 +84,8 @@ static struct i965_kernel gen75_mfc_kernels[] = { static void gen75_mfc_pipe_mode_select(VADriverContextP ctx, - int standard_select, - struct intel_encoder_context *encoder_context) + int standard_select, + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -152,7 +150,7 @@ gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *enco static void gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx, - struct intel_encoder_context *encoder_context) + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -161,11 +159,11 @@ gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx, BEGIN_BCS_BATCH(batch, 26); OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2)); - /* the DW1-3 is for the MFX indirect bistream offset */ + /* the DW1-3 is for the MFX indirect bistream offset */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); - /* the DW4-5 is the MFX upper bound */ + /* the DW4-5 is the MFX upper bound */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); @@ -176,14 +174,14 @@ gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx, OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */ OUT_BCS_BATCH(batch, 0); - /* the DW11-15 is for MFX IT-COFF. Not used on encoder */ + /* the DW11-15 is for MFX IT-COFF. Not used on encoder */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); - /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */ + /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); @@ -247,7 +245,7 @@ gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_con static void gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -261,7 +259,7 @@ gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state, OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2)); /*DW1. MB setting of frame */ OUT_BCS_BATCH(batch, - ((width_in_mbs * height_in_mbs) & 0xFFFF)); + ((width_in_mbs * height_in_mbs - 1) & 0xFFFF)); OUT_BCS_BATCH(batch, ((height_in_mbs - 1) << 16) | ((width_in_mbs - 1) << 0)); @@ -291,22 +289,22 @@ gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state, (1 << 2) | /* Frame MB only flag */ (0 << 1) | /* MBAFF mode is in active */ (0 << 0)); /* Field picture flag */ - /* DW5 Trellis quantization */ + /* DW5 Trellis quantization */ OUT_BCS_BATCH(batch, 0); /* Mainly about MB rate control and debug, just ignoring */ OUT_BCS_BATCH(batch, /* Inter and Intra Conformance Max size limit */ (0xBB8 << 16) | /* InterMbMaxSz */ (0xEE8) ); /* IntraMbMaxSz */ OUT_BCS_BATCH(batch, 0); /* Reserved */ - /* DW8. QP delta */ + /* DW8. QP delta */ OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */ OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */ - /* DW10. Bit setting for MB */ + /* DW10. Bit setting for MB */ OUT_BCS_BATCH(batch, 0x8C000000); OUT_BCS_BATCH(batch, 0x00010000); - /* DW12. */ + /* DW12. */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0x02010100); - /* DW14. For short format */ + /* DW14. For short format */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); @@ -315,10 +313,10 @@ gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state, static void gen75_mfc_qm_state(VADriverContextP ctx, - int qm_type, - unsigned int *qm, - int qm_length, - struct intel_encoder_context *encoder_context) + int qm_type, + unsigned int *qm, + int qm_length, + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; unsigned int qm_buffer[16]; @@ -352,10 +350,10 @@ gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encod static void gen75_mfc_fqm_state(VADriverContextP ctx, - int fqm_type, - unsigned int *fqm, - int fqm_length, - struct intel_encoder_context *encoder_context) + int fqm_type, + unsigned int *fqm, + int fqm_length, + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; unsigned int fqm_buffer[32]; @@ -393,9 +391,9 @@ gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *enco static void gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context, - unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw, - int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag, - struct intel_batchbuffer *batch) + unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw, + int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag, + struct intel_batchbuffer *batch) { if (batch == NULL) batch = encoder_context->base.batch; @@ -418,16 +416,33 @@ gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context * static void gen75_mfc_init(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; dri_bo *bo; int i; - VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; - int width_in_mbs = pSequenceParameter->picture_width_in_mbs; - int height_in_mbs = pSequenceParameter->picture_height_in_mbs; + int width_in_mbs = 0; + int height_in_mbs = 0; + int slice_batchbuffer_size; + + if (encoder_context->codec == CODEC_H264 || + encoder_context->codec == CODEC_H264_MVC) { + VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + width_in_mbs = pSequenceParameter->picture_width_in_mbs; + height_in_mbs = pSequenceParameter->picture_height_in_mbs; + } else { + VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; + + assert(encoder_context->codec == CODEC_MPEG2); + + width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16; + height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16; + } + + slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 + + (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext; /*Encode common setup for MFC*/ dri_bo_unreference(mfc_context->post_deblocking_output.bo); @@ -495,7 +510,8 @@ static void gen75_mfc_init(VADriverContextP ctx, if (mfc_context->aux_batchbuffer) intel_batchbuffer_free(mfc_context->aux_batchbuffer); - mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, 0); + mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, + slice_batchbuffer_size); mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer; dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo); mfc_context->aux_batchbuffer_surface.pitch = 16; @@ -507,7 +523,7 @@ static void gen75_mfc_init(VADriverContextP ctx, static void gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx, - struct intel_encoder_context *encoder_context) + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -525,9 +541,9 @@ gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx, else OUT_BCS_BATCH(batch, 0); /* pre output addr */ - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - /* the DW4-6 is for the post_deblocking */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + /* the DW4-6 is for the post_deblocking */ if (mfc_context->post_deblocking_output.bo) OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo, @@ -535,37 +551,37 @@ gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx, 0); /* post output addr */ else OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); - /* the DW7-9 is for the uncompressed_picture */ + /* the DW7-9 is for the uncompressed_picture */ OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); /* uncompressed data */ - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); - /* the DW10-12 is for the mb status */ + /* the DW10-12 is for the mb status */ OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); /* StreamOut data*/ - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); - /* the DW13-15 is for the intra_row_store_scratch */ + /* the DW13-15 is for the intra_row_store_scratch */ OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); - /* the DW16-18 is for the deblocking filter */ + /* the DW16-18 is for the deblocking filter */ OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); /* the DW 19-50 is for Reference pictures*/ for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) { @@ -578,25 +594,25 @@ gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx, } OUT_BCS_BATCH(batch, 0); } - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); - /* The DW 52-54 is for the MB status buffer */ + /* The DW 52-54 is for the MB status buffer */ OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); /* Macroblock status buffer*/ - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); - /* the DW 55-57 is the ILDB buffer */ - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); + /* the DW 55-57 is the ILDB buffer */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); - /* the DW 58-60 is the second ILDB buffer */ - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); + /* the DW 58-60 is the second ILDB buffer */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); ADVANCE_BCS_BATCH(batch); } @@ -657,14 +673,14 @@ gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); /* Macroblock status buffer*/ - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); ADVANCE_BCS_BATCH(batch); } static void gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx, - struct intel_encoder_context *encoder_context) + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -688,15 +704,15 @@ gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx, OUT_BCS_BATCH(batch, 0); } } - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); - /* the DW34-36 is the MV for the current reference */ - OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - 0); + /* the DW34-36 is the MV for the current reference */ + OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); /* POL list */ for(i = 0; i < 32; i++) { @@ -749,7 +765,7 @@ gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_contex static void gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx, - struct intel_encoder_context *encoder_context) + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -763,12 +779,12 @@ gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx, OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); - /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */ + /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); - /* the DW7-9 is for Bitplane Read Buffer Base Address */ + /* the DW7-9 is for Bitplane Read Buffer Base Address */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); @@ -802,8 +818,8 @@ gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_con static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -821,8 +837,8 @@ static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx, static VAStatus gen75_mfc_run(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; @@ -834,9 +850,9 @@ static VAStatus gen75_mfc_run(VADriverContextP ctx, static VAStatus gen75_mfc_stop(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - int *encoded_bits_size) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int *encoded_bits_size) { VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN; VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; @@ -853,13 +869,13 @@ gen75_mfc_stop(VADriverContextP ctx, static void gen75_mfc_avc_slice_state(VADriverContextP ctx, - VAEncPictureParameterBufferH264 *pic_param, - VAEncSliceParameterBufferH264 *slice_param, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - int rate_control_enable, - int qp, - struct intel_batchbuffer *batch) + VAEncPictureParameterBufferH264 *pic_param, + VAEncSliceParameterBufferH264 *slice_param, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int rate_control_enable, + int qp, + struct intel_batchbuffer *batch) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; @@ -875,19 +891,32 @@ gen75_mfc_avc_slice_state(VADriverContextP ctx, int maxQpN, maxQpP; unsigned char correct[6], grow, shrink; int i; - int bslice = 0; int weighted_pred_idc = 0; unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom; unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom; + int num_ref_l0 = 0, num_ref_l1 = 0; if (batch == NULL) batch = encoder_context->base.batch; - if (slice_type == SLICE_TYPE_P) { + if (slice_type == SLICE_TYPE_I) { + luma_log2_weight_denom = 0; + chroma_log2_weight_denom = 0; + } else if (slice_type == SLICE_TYPE_P) { weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag; + num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1; + + if (slice_param->num_ref_idx_active_override_flag) + num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1; } else if (slice_type == SLICE_TYPE_B) { weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc; - bslice = 1; + num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1; + num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1; + + if (slice_param->num_ref_idx_active_override_flag) { + num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1; + num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1; + } if (weighted_pred_idc == 2) { /* 8.4.3 - Derivation process for prediction weights (8-279) */ @@ -912,14 +941,11 @@ gen75_mfc_avc_slice_state(VADriverContextP ctx, OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) ); OUT_BCS_BATCH(batch, slice_type); /*Slice Type: I:P:B Slice*/ - if (slice_type == SLICE_TYPE_I) { - OUT_BCS_BATCH(batch, 0); /*no reference frames and pred_weight_table*/ - } else { - OUT_BCS_BATCH(batch, - (1 << 16) | (bslice << 24) | /*1 reference frame*/ - (chroma_log2_weight_denom << 8) | - (luma_log2_weight_denom << 0)); - } + OUT_BCS_BATCH(batch, + (num_ref_l0 << 16) | + (num_ref_l1 << 24) | + (chroma_log2_weight_denom << 8) | + (luma_log2_weight_denom << 0)); OUT_BCS_BATCH(batch, (weighted_pred_idc << 30) | @@ -969,14 +995,14 @@ gen75_mfc_avc_slice_state(VADriverContextP ctx, } -#ifdef MFC_SOFTWARE_HASWELL +#if MFC_SOFTWARE_HASWELL static int gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, - int qp,unsigned int *msg, - struct intel_encoder_context *encoder_context, - unsigned char target_mb_size, unsigned char max_mb_size, - struct intel_batchbuffer *batch) + int qp,unsigned int *msg, + struct intel_encoder_context *encoder_context, + unsigned char target_mb_size, unsigned char max_mb_size, + struct intel_batchbuffer *batch) { int len_in_dwords = 12; unsigned int intra_msg; @@ -1023,13 +1049,14 @@ gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, static int gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp, - unsigned int *msg, unsigned int offset, - struct intel_encoder_context *encoder_context, - unsigned char target_mb_size,unsigned char max_mb_size, int slice_type, - struct intel_batchbuffer *batch) + unsigned int *msg, unsigned int offset, + struct intel_encoder_context *encoder_context, + unsigned char target_mb_size,unsigned char max_mb_size, int slice_type, + struct intel_batchbuffer *batch) { + struct gen6_vme_context *vme_context = encoder_context->vme_context; int len_in_dwords = 12; - unsigned int inter_msg = 0; + unsigned int inter_msg = 0; if (batch == NULL) batch = encoder_context->base.batch; { @@ -1041,30 +1068,30 @@ gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, i * command. */ if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) { - /* MV[0] and MV[2] are replicated */ - mv_ptr[4] = mv_ptr[0]; - mv_ptr[5] = mv_ptr[1]; - mv_ptr[2] = mv_ptr[8]; - mv_ptr[3] = mv_ptr[9]; - mv_ptr[6] = mv_ptr[8]; - mv_ptr[7] = mv_ptr[9]; + /* MV[0] and MV[2] are replicated */ + mv_ptr[4] = mv_ptr[0]; + mv_ptr[5] = mv_ptr[1]; + mv_ptr[2] = mv_ptr[8]; + mv_ptr[3] = mv_ptr[9]; + mv_ptr[6] = mv_ptr[8]; + mv_ptr[7] = mv_ptr[9]; } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) { - /* MV[0] and MV[1] are replicated */ - mv_ptr[2] = mv_ptr[0]; - mv_ptr[3] = mv_ptr[1]; - mv_ptr[4] = mv_ptr[16]; - mv_ptr[5] = mv_ptr[17]; - mv_ptr[6] = mv_ptr[24]; - mv_ptr[7] = mv_ptr[25]; + /* MV[0] and MV[1] are replicated */ + mv_ptr[2] = mv_ptr[0]; + mv_ptr[3] = mv_ptr[1]; + mv_ptr[4] = mv_ptr[16]; + mv_ptr[5] = mv_ptr[17]; + mv_ptr[6] = mv_ptr[24]; + mv_ptr[7] = mv_ptr[25]; } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) && - !(msg[1] & SUBMB_SHAPE_MASK)) { - /* Don't touch MV[0] or MV[1] */ - mv_ptr[2] = mv_ptr[8]; - mv_ptr[3] = mv_ptr[9]; - mv_ptr[4] = mv_ptr[16]; - mv_ptr[5] = mv_ptr[17]; - mv_ptr[6] = mv_ptr[24]; - mv_ptr[7] = mv_ptr[25]; + !(msg[1] & SUBMB_SHAPE_MASK)) { + /* Don't touch MV[0] or MV[1] */ + mv_ptr[2] = mv_ptr[8]; + mv_ptr[3] = mv_ptr[9]; + mv_ptr[4] = mv_ptr[16]; + mv_ptr[5] = mv_ptr[17]; + mv_ptr[6] = mv_ptr[24]; + mv_ptr[7] = mv_ptr[25]; } } @@ -1072,21 +1099,21 @@ gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, i OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2)); - inter_msg = 32; - /* MV quantity */ - if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) { - if (msg[1] & SUBMB_SHAPE_MASK) - inter_msg = 128; - } + inter_msg = 32; + /* MV quantity */ + if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) { + if (msg[1] & SUBMB_SHAPE_MASK) + inter_msg = 128; + } OUT_BCS_BATCH(batch, inter_msg); /* 32 MV*/ OUT_BCS_BATCH(batch, offset); - inter_msg = msg[0] & (0x1F00FFFF); - inter_msg |= INTER_MV8; - inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17)); - if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) && - (msg[1] & SUBMB_SHAPE_MASK)) { - inter_msg |= INTER_MV32; - } + inter_msg = msg[0] & (0x1F00FFFF); + inter_msg |= INTER_MV8; + inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17)); + if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) && + (msg[1] & SUBMB_SHAPE_MASK)) { + inter_msg |= INTER_MV32; + } OUT_BCS_BATCH(batch, inter_msg); @@ -1102,11 +1129,11 @@ gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, i OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */ #endif - inter_msg = msg[1] >> 8; + inter_msg = msg[1] >> 8; /*Stuff for Inter MB*/ OUT_BCS_BATCH(batch, inter_msg); - OUT_BCS_BATCH(batch, 0x0); - OUT_BCS_BATCH(batch, 0x0); + OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]); + OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]); /*MaxSizeInWord and TargetSzieInWord*/ OUT_BCS_BATCH(batch, (max_mb_size << 24) | @@ -1119,18 +1146,12 @@ gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, i return len_in_dwords; } -#define AVC_INTRA_RDO_OFFSET 4 -#define AVC_INTER_RDO_OFFSET 10 -#define AVC_INTER_MSG_OFFSET 8 -#define AVC_INTER_MV_OFFSET 48 -#define AVC_RDO_MASK 0xFFFF - static void gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - int slice_index, - struct intel_batchbuffer *slice_batch) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int slice_index, + struct intel_batchbuffer *slice_batch) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct gen6_vme_context *vme_context = encoder_context->vme_context; @@ -1145,15 +1166,18 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, int i,x,y; int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta; unsigned int rate_control_mode = encoder_context->rate_control_mode; - unsigned char *slice_header = NULL; - int slice_header_length_in_bits = 0; unsigned int tail_data[] = { 0x0, 0x0 }; int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); int is_intra = slice_type == SLICE_TYPE_I; + int qp_slice; + qp_slice = qp; if (rate_control_mode == VA_RC_CBR) { qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; - pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; + if (encode_state->slice_header_index[slice_index] == 0) { + pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; + qp_slice = qp; + } } /* only support for 8-bit pixel bit-depth */ @@ -1162,22 +1186,16 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52); assert(qp >= 0 && qp < 52); - gen75_mfc_avc_slice_state(ctx, - pPicParameter, - pSliceParameter, - encode_state, encoder_context, - (rate_control_mode == VA_RC_CBR), qp, slice_batch); + gen75_mfc_avc_slice_state(ctx, + pPicParameter, + pSliceParameter, + encode_state, encoder_context, + (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch); - if ( slice_index == 0) + if ( slice_index == 0) intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); - slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header); - - // slice hander - mfc_context->insert_object(ctx, encoder_context, - (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f, - 5, /* first 5 bytes are start code + nal unit type */ - 1, 0, 1, slice_batch); + intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch); dri_bo_map(vme_context->vme_output.bo , 1); msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual; @@ -1223,27 +1241,21 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, tail_data, 1, 8, 1, 1, 1, 0, slice_batch); } - - free(slice_header); - } static dri_bo * gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch; dri_bo *batch_bo; int i; int buffer_size; - VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; - int width_in_mbs = pSequenceParameter->picture_width_in_mbs; - int height_in_mbs = pSequenceParameter->picture_height_in_mbs; - buffer_size = width_in_mbs * height_in_mbs * 64; - batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size); + batch = mfc_context->aux_batchbuffer; batch_bo = batch->buffer; for (i = 0; i < encode_state->num_slice_params_ext; i++) { gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch); @@ -1257,7 +1269,9 @@ gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx, ADVANCE_BCS_BATCH(batch); dri_bo_reference(batch_bo); + intel_batchbuffer_free(batch); + mfc_context->aux_batchbuffer = NULL; return batch_bo; } @@ -1266,8 +1280,8 @@ gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx, static void gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_vme_context *vme_context = encoder_context->vme_context; @@ -1279,43 +1293,27 @@ gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx, &vme_context->vme_output, BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT), SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT)); - assert(mfc_context->aux_batchbuffer_surface.bo); - mfc_context->buffer_suface_setup(ctx, - &mfc_context->gpe_context, - &mfc_context->aux_batchbuffer_surface, - BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER), - SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER)); } static void gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { - struct i965_driver_data *i965 = i965_driver_data(ctx); struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; - int width_in_mbs = pSequenceParameter->picture_width_in_mbs; - int height_in_mbs = pSequenceParameter->picture_height_in_mbs; - mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1; - mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */ - mfc_context->mfc_batchbuffer_surface.pitch = 16; - mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, - "MFC batchbuffer", - mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block, - 0x1000); + assert(mfc_context->aux_batchbuffer_surface.bo); mfc_context->buffer_suface_setup(ctx, &mfc_context->gpe_context, - &mfc_context->mfc_batchbuffer_surface, + &mfc_context->aux_batchbuffer_surface, BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER), SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER)); } static void gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context); gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context); @@ -1323,8 +1321,8 @@ gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, static void gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct gen6_interface_descriptor_data *desc; @@ -1366,155 +1364,140 @@ gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, static void gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; (void)mfc_context; } +#define AVC_PAK_LEN_IN_BYTE 48 +#define AVC_PAK_LEN_IN_OWORD 3 + static void gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch, - int index, - int head_offset, - int batchbuffer_offset, - int head_size, - int tail_size, - int number_mb_cmds, - int first_object, - int last_object, - int last_slice, - int mb_x, - int mb_y, - int width_in_mbs, - int qp) + uint32_t intra_flag, + int head_offset, + int number_mb_cmds, + int slice_end_x, + int slice_end_y, + int mb_x, + int mb_y, + int width_in_mbs, + int qp, + uint32_t fwd_ref, + uint32_t bwd_ref) { - BEGIN_BATCH(batch, 12); + uint32_t temp_value; + BEGIN_BATCH(batch, 14); - OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2)); - OUT_BATCH(batch, index); + OUT_BATCH(batch, CMD_MEDIA_OBJECT | (14 - 2)); + OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); /*inline data */ - OUT_BATCH(batch, head_offset); - OUT_BATCH(batch, batchbuffer_offset); - OUT_BATCH(batch, - head_size << 16 | - tail_size); - OUT_BATCH(batch, - number_mb_cmds << 16 | - first_object << 2 | - last_object << 1 | - last_slice); - OUT_BATCH(batch, - mb_y << 8 | - mb_x); + OUT_BATCH(batch, head_offset / 16); + OUT_BATCH(batch, (intra_flag) | (qp << 16)); + temp_value = (mb_x | (mb_y << 8) | (width_in_mbs << 16)); + OUT_BATCH(batch, temp_value); + + OUT_BATCH(batch, number_mb_cmds); + OUT_BATCH(batch, - qp << 16 | - width_in_mbs); + ((slice_end_y << 8) | (slice_end_x))); + OUT_BATCH(batch, fwd_ref); + OUT_BATCH(batch, bwd_ref); + + OUT_BATCH(batch, MI_NOOP); ADVANCE_BATCH(batch); } static void gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx, - struct intel_encoder_context *encoder_context, - VAEncSliceParameterBufferH264 *slice_param, - int head_offset, - unsigned short head_size, - unsigned short tail_size, - int batchbuffer_offset, - int qp, - int last_slice) + struct intel_encoder_context *encoder_context, + VAEncSliceParameterBufferH264 *slice_param, + int head_offset, + int qp, + int last_slice) { struct intel_batchbuffer *batch = encoder_context->base.batch; + struct gen6_vme_context *vme_context = encoder_context->vme_context; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; int total_mbs = slice_param->num_macroblocks; + int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); int number_mb_cmds = 128; - int starting_mb = 0; - int last_object = 0; - int first_object = 1; - int i; + int starting_offset = 0; int mb_x, mb_y; - int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER; - - for (i = 0; i < total_mbs / number_mb_cmds; i++) { - last_object = (total_mbs - starting_mb) == number_mb_cmds; - mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs; - mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs; - assert(mb_x <= 255 && mb_y <= 255); + int last_mb, slice_end_x, slice_end_y; + int remaining_mb = total_mbs; + uint32_t fwd_ref , bwd_ref, mb_flag; - starting_mb += number_mb_cmds; + last_mb = slice_param->macroblock_address + total_mbs - 1; + slice_end_x = last_mb % width_in_mbs; + slice_end_y = last_mb / width_in_mbs; - gen75_mfc_batchbuffer_emit_object_command(batch, - index, - head_offset, - batchbuffer_offset, - head_size, - tail_size, - number_mb_cmds, - first_object, - last_object, - last_slice, - mb_x, - mb_y, - width_in_mbs, - qp); - - if (first_object) { - head_offset += head_size; - batchbuffer_offset += head_size; - } - - if (last_object) { - head_offset += tail_size; - batchbuffer_offset += tail_size; - } - - batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD; + if (slice_type == SLICE_TYPE_I) { + fwd_ref = 0; + bwd_ref = 0; + mb_flag = 1; + } else { + fwd_ref = vme_context->ref_index_in_mb[0]; + bwd_ref = vme_context->ref_index_in_mb[1]; + mb_flag = 0; + } - first_object = 0; + if (width_in_mbs >= 100) { + number_mb_cmds = width_in_mbs / 5; + } else if (width_in_mbs >= 80) { + number_mb_cmds = width_in_mbs / 4; + } else if (width_in_mbs >= 60) { + number_mb_cmds = width_in_mbs / 3; + } else if (width_in_mbs >= 40) { + number_mb_cmds = width_in_mbs / 2; + } else { + number_mb_cmds = width_in_mbs; } - if (!last_object) { - last_object = 1; - number_mb_cmds = total_mbs % number_mb_cmds; - mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs; - mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs; - assert(mb_x <= 255 && mb_y <= 255); - starting_mb += number_mb_cmds; + do { + if (number_mb_cmds >= remaining_mb) { + number_mb_cmds = remaining_mb; + } + mb_x = (slice_param->macroblock_address + starting_offset) % width_in_mbs; + mb_y = (slice_param->macroblock_address + starting_offset) / width_in_mbs; gen75_mfc_batchbuffer_emit_object_command(batch, - index, - head_offset, - batchbuffer_offset, - head_size, - tail_size, - number_mb_cmds, - first_object, - last_object, - last_slice, - mb_x, - mb_y, - width_in_mbs, - qp); - } + mb_flag, + head_offset, + number_mb_cmds, + slice_end_x, + slice_end_y, + mb_x, + mb_y, + width_in_mbs, + qp, + fwd_ref, + bwd_ref); + + head_offset += (number_mb_cmds * AVC_PAK_LEN_IN_BYTE); + remaining_mb -= number_mb_cmds; + starting_offset += number_mb_cmds; + } while (remaining_mb > 0); } /* * return size in Owords (16bytes) */ -static int +static void gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - int slice_index, - int batchbuffer_offset) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int slice_index) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer; @@ -1526,17 +1509,18 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx, int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs); int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta; unsigned int rate_control_mode = encoder_context->rate_control_mode; - unsigned char *slice_header = NULL; - int slice_header_length_in_bits = 0; unsigned int tail_data[] = { 0x0, 0x0 }; long head_offset; - int old_used = intel_batchbuffer_used_size(slice_batch), used; - unsigned short head_size, tail_size; int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); + int qp_slice; + qp_slice = qp; if (rate_control_mode == VA_RC_CBR) { qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; - pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; + if (encode_state->slice_header_index[slice_index] == 0) { + pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; + qp_slice = qp; + } } /* only support for 8-bit pixel bit-depth */ @@ -1545,40 +1529,35 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx, assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52); assert(qp >= 0 && qp < 52); - head_offset = old_used / 16; gen75_mfc_avc_slice_state(ctx, - pPicParameter, - pSliceParameter, - encode_state, - encoder_context, - (rate_control_mode == VA_RC_CBR), - qp, - slice_batch); + pPicParameter, + pSliceParameter, + encode_state, + encoder_context, + (rate_control_mode == VA_RC_CBR), + qp_slice, + slice_batch); if (slice_index == 0) intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); - slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header); - - // slice hander - mfc_context->insert_object(ctx, - encoder_context, - (unsigned int *)slice_header, - ALIGN(slice_header_length_in_bits, 32) >> 5, - slice_header_length_in_bits & 0x1f, - 5, /* first 5 bytes are start code + nal unit type */ - 1, - 0, - 1, - slice_batch); - free(slice_header); + intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch); intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */ - used = intel_batchbuffer_used_size(slice_batch); - head_size = (used - old_used) / 16; - old_used = used; + head_offset = intel_batchbuffer_used_size(slice_batch); + + slice_batch->ptr += pSliceParameter->num_macroblocks * AVC_PAK_LEN_IN_BYTE; + + gen75_mfc_avc_batchbuffer_slice_command(ctx, + encoder_context, + pSliceParameter, + head_offset, + qp, + last_slice); - /* tail */ + + /* Aligned for tail */ + intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */ if (last_slice) { mfc_context->insert_object(ctx, encoder_context, @@ -1603,48 +1582,41 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx, slice_batch); } - intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */ - used = intel_batchbuffer_used_size(slice_batch); - tail_size = (used - old_used) / 16; - - - gen75_mfc_avc_batchbuffer_slice_command(ctx, - encoder_context, - pSliceParameter, - head_offset, - head_size, - tail_size, - batchbuffer_offset, - qp, - last_slice); - - return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD; + return; } static void gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct intel_batchbuffer *batch = encoder_context->base.batch; - int i, size, offset = 0; + int i; intel_batchbuffer_start_atomic(batch, 0x4000); gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch); for ( i = 0; i < encode_state->num_slice_params_ext; i++) { - size = gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset); - offset += size; + gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i); + } + { + struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer; + intel_batchbuffer_align(slice_batch, 8); + BEGIN_BCS_BATCH(slice_batch, 2); + OUT_BCS_BATCH(slice_batch, 0); + OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END); + ADVANCE_BCS_BATCH(slice_batch); + mfc_context->aux_batchbuffer = NULL; + intel_batchbuffer_free(slice_batch); } - intel_batchbuffer_end_atomic(batch); intel_batchbuffer_flush(batch); } static void gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context); gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context); @@ -1654,23 +1626,23 @@ gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx, static dri_bo * gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo); gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context); - dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo); - return mfc_context->mfc_batchbuffer_surface.bo; + return mfc_context->aux_batchbuffer_surface.bo; } #endif static void gen75_mfc_avc_pipeline_programing(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; dri_bo *slice_batch_bo; @@ -1681,7 +1653,7 @@ gen75_mfc_avc_pipeline_programing(VADriverContextP ctx, return; } -#ifdef MFC_SOFTWARE_HASWELL +#if MFC_SOFTWARE_HASWELL slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context); #else slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context); @@ -1711,8 +1683,8 @@ gen75_mfc_avc_pipeline_programing(VADriverContextP ctx, static VAStatus gen75_mfc_avc_encode_picture(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; unsigned int rate_control_mode = encoder_context->rate_control_mode; @@ -1768,9 +1740,11 @@ gen75_mfc_mpeg2_pic_state(VADriverContextP ctx, VAEncPictureParameterBufferMPEG2 *pic_param; int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; int height_in_mbs = (mfc_context->surface_state.height + 15) / 16; + VAEncSliceParameterBufferMPEG2 *slice_param = NULL; assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer); pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer; + slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer; BEGIN_BCS_BATCH(batch, 13); OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2)); @@ -1795,7 +1769,11 @@ gen75_mfc_mpeg2_pic_state(VADriverContextP ctx, 1 << 31 | /* slice concealment */ (height_in_mbs - 1) << 16 | (width_in_mbs - 1)); - OUT_BCS_BATCH(batch, 0); + if (slice_param && slice_param->quantiser_scale_code >= 14) + OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12)); + else + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0xFFF << 16 | /* InterMBMaxSize */ @@ -1814,7 +1792,7 @@ static void gen75_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { unsigned char intra_qm[64] = { - 8, 16, 19, 22, 26, 27, 29, 34, + 8, 16, 19, 22, 26, 27, 29, 34, 16, 16, 22, 24, 27, 29, 34, 37, 19, 22, 26, 27, 29, 34, 34, 38, 22, 22, 26, 27, 29, 34, 37, 40, @@ -1843,14 +1821,14 @@ static void gen75_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { unsigned short intra_fqm[64] = { - 65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, - 65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d, - 65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23, - 65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26, - 65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e, - 65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38, - 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45, - 65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53, + 65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, + 65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d, + 65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23, + 65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26, + 65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e, + 65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38, + 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45, + 65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53, }; unsigned short non_intra_fqm[64] = { @@ -2228,20 +2206,39 @@ gen75_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx, 0xff, slice_batch); } else { - gen75_mfc_mpeg2_pak_object_inter(ctx, - encode_state, - encoder_context, - msg, - width_in_mbs, height_in_mbs, - h_pos, v_pos, - first_mb_in_slice, - last_mb_in_slice, - first_mb_in_slice_group, - last_mb_in_slice_group, - slice_param->quantiser_scale_code, - 0, - 0xff, - slice_batch); + int inter_rdo, intra_rdo; + inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK; + intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK; + + if (intra_rdo < inter_rdo) + gen75_mfc_mpeg2_pak_object_intra(ctx, + encoder_context, + h_pos, v_pos, + first_mb_in_slice, + last_mb_in_slice, + first_mb_in_slice_group, + last_mb_in_slice_group, + 0x1a, + slice_param->quantiser_scale_code, + 0x3f, + 0, + 0xff, + slice_batch); + else + gen75_mfc_mpeg2_pak_object_inter(ctx, + encode_state, + encoder_context, + msg, + width_in_mbs, height_in_mbs, + h_pos, v_pos, + first_mb_in_slice, + last_mb_in_slice, + first_mb_in_slice_group, + last_mb_in_slice_group, + slice_param->quantiser_scale_code, + 0, + 0xff, + slice_batch); } } @@ -2286,18 +2283,13 @@ gen75_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx, struct encode_state *encode_state, struct intel_encoder_context *encoder_context) { - struct i965_driver_data *i965 = i965_driver_data(ctx); + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct intel_batchbuffer *batch; - VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL; dri_bo *batch_bo; int i; - int buffer_size; - int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; - int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; - buffer_size = width_in_mbs * height_in_mbs * 64; - batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size); + batch = mfc_context->aux_batchbuffer; batch_bo = batch->buffer; for (i = 0; i < encode_state->num_slice_params_ext; i++) { @@ -2318,6 +2310,7 @@ gen75_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx, dri_bo_reference(batch_bo); intel_batchbuffer_free(batch); + mfc_context->aux_batchbuffer = NULL; return batch_bo; } @@ -2385,7 +2378,7 @@ intel_mfc_mpeg2_prepare(VADriverContextP ctx, /* reconstructed surface */ obj_surface = encode_state->reconstructed_object; - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); mfc_context->pre_deblocking_output.bo = obj_surface->bo; dri_bo_reference(mfc_context->pre_deblocking_output.bo); mfc_context->surface_state.width = obj_surface->orig_width; @@ -2439,7 +2432,7 @@ intel_mfc_mpeg2_prepare(VADriverContextP ctx, dri_bo_map(bo, 1); coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual; coded_buffer_segment->mapped = 0; - coded_buffer_segment->codec = CODED_MPEG2; + coded_buffer_segment->codec = encoder_context->codec; dri_bo_unmap(bo); return vaStatus; @@ -2516,16 +2509,18 @@ gen75_mfc_context_destroy(void *context) } static VAStatus gen75_mfc_pipeline(VADriverContextP ctx, - VAProfile profile, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + VAProfile profile, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { VAStatus vaStatus; switch (profile) { - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: + case VAProfileH264MultiviewHigh: + case VAProfileH264StereoHigh: vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context); break; @@ -2563,7 +2558,7 @@ Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context * i965_gpe_load_kernels(ctx, &mfc_context->gpe_context, gen75_mfc_kernels, - NUM_MFC_KERNEL); + 1); mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select; mfc_context->set_surface_state = gen75_mfc_surface_state; diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c index 11644d6..299f2b5 100644 --- a/src/gen75_mfd.c +++ b/src/gen75_mfd.c @@ -67,6 +67,7 @@ gen75_mfd_init_avc_surface(VADriverContextP ctx, if (!gen7_avc_surface) { gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1); + gen7_avc_surface->frame_store_id = -1; assert((obj_surface->size & 0x3f) == 0); obj_surface->private_data = gen7_avc_surface; } @@ -137,12 +138,16 @@ gen75_mfd_surface_state(VADriverContextP ctx, struct object_surface *obj_surface = decode_state->render_object; unsigned int y_cb_offset; unsigned int y_cr_offset; + unsigned int surface_format; assert(obj_surface); y_cb_offset = obj_surface->y_cb_offset; y_cr_offset = obj_surface->y_cr_offset; + surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ? + MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; + BEGIN_BCS_BATCH(batch, 6); OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); OUT_BCS_BATCH(batch, 0); @@ -150,7 +155,7 @@ gen75_mfd_surface_state(VADriverContextP ctx, ((obj_surface->orig_height - 1) << 18) | ((obj_surface->orig_width - 1) << 4)); OUT_BCS_BATCH(batch, - (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */ + (surface_format << 28) | /* 420 planar YUV surface */ ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */ (0 << 22) | /* surface object control state, ignored */ ((obj_surface->width - 1) << 3) | /* pitch */ @@ -566,7 +571,7 @@ gen75_mfd_avc_img_state(VADriverContextP ctx, BEGIN_BCS_BATCH(batch, 17); OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2)); OUT_BCS_BATCH(batch, - width_in_mbs * height_in_mbs); + (width_in_mbs * height_in_mbs - 1)); OUT_BCS_BATCH(batch, ((height_in_mbs - 1) << 16) | ((width_in_mbs - 1) << 0)); @@ -628,25 +633,13 @@ gen75_mfd_avc_qm_state(VADriverContextP ctx, } } -static void +static inline void gen75_mfd_avc_picid_state(VADriverContextP ctx, struct decode_state *decode_state, struct gen7_mfd_context *gen7_mfd_context) { - struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; - - BEGIN_BCS_BATCH(batch, 10); - OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2)); - OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - ADVANCE_BCS_BATCH(batch); + gen75_send_avc_picid_state(gen7_mfd_context->base.batch, + gen7_mfd_context->reference_surface); } static void @@ -660,7 +653,7 @@ gen75_mfd_avc_directmode_state_bplus(VADriverContextP ctx, struct object_surface *obj_surface; GenAvcSurface *gen7_avc_surface; VAPictureH264 *va_pic; - int i, j; + int i; BEGIN_BCS_BATCH(batch, 71); OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2)); @@ -700,26 +693,14 @@ gen75_mfd_avc_directmode_state_bplus(VADriverContextP ctx, /* POC List */ for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) { - if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) { - int found = 0; - - assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL); + obj_surface = gen7_mfd_context->reference_surface[i].obj_surface; - for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) { - va_pic = &pic_param->ReferenceFrames[j]; - - if (va_pic->flags & VA_PICTURE_H264_INVALID) - continue; + if (obj_surface) { + const VAPictureH264 * const va_pic = avc_find_picture( + obj_surface->base.id, pic_param->ReferenceFrames, + ARRAY_ELEMS(pic_param->ReferenceFrames)); - if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) { - found = 1; - break; - } - } - - assert(found == 1); - assert(!(va_pic->flags & VA_PICTURE_H264_INVALID)); - + assert(va_pic != NULL); OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt); OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt); } else { @@ -747,7 +728,7 @@ gen75_mfd_avc_directmode_state(VADriverContextP ctx, struct object_surface *obj_surface; GenAvcSurface *gen7_avc_surface; VAPictureH264 *va_pic; - int i, j; + int i; if (IS_STEPPING_BPLUS(i965)) { gen75_mfd_avc_directmode_state_bplus(ctx, decode_state, pic_param, slice_param, @@ -807,26 +788,14 @@ gen75_mfd_avc_directmode_state(VADriverContextP ctx, /* POC List */ for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) { - if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) { - int found = 0; + obj_surface = gen7_mfd_context->reference_surface[i].obj_surface; - assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL); + if (obj_surface) { + const VAPictureH264 * const va_pic = avc_find_picture( + obj_surface->base.id, pic_param->ReferenceFrames, + ARRAY_ELEMS(pic_param->ReferenceFrames)); - for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) { - va_pic = &pic_param->ReferenceFrames[j]; - - if (va_pic->flags & VA_PICTURE_H264_INVALID) - continue; - - if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) { - found = 1; - break; - } - } - - assert(found == 1); - assert(!(va_pic->flags & VA_PICTURE_H264_INVALID)); - + assert(va_pic != NULL); OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt); OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt); } else { @@ -843,6 +812,15 @@ gen75_mfd_avc_directmode_state(VADriverContextP ctx, } static void +gen75_mfd_avc_phantom_slice_first(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *next_slice_param, + struct gen7_mfd_context *gen7_mfd_context) +{ + gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch); +} + +static void gen75_mfd_avc_slice_state(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param, VASliceParameterBufferH264 *slice_param, @@ -1074,7 +1052,8 @@ gen75_mfd_avc_decode_init(VADriverContextP ctx, assert(decode_state->pic_param && decode_state->pic_param->buffer); pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer; - intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface); + gen75_update_avc_frame_store_index(ctx, decode_state, pic_param, + gen7_mfd_context->reference_surface); width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1; height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */ @@ -1082,20 +1061,12 @@ gen75_mfd_avc_decode_init(VADriverContextP ctx, /* Current decoded picture */ obj_surface = decode_state->render_object; - obj_surface->flags &= ~SURFACE_REF_DIS_MASK; - obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0); - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); - - /* initial uv component for YUV400 case */ - if (pic_param->seq_fields.bits.chroma_format_idc == 0) { - unsigned int uv_offset = obj_surface->width * obj_surface->height; - unsigned int uv_size = obj_surface->width * obj_surface->height / 2; - - drm_intel_gem_bo_map_gtt(obj_surface->bo); - memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size); - drm_intel_gem_bo_unmap_gtt(obj_surface->bo); - } + if (pic_param->pic_fields.bits.reference_pic_flag) + obj_surface->flags |= SURFACE_REFERENCED; + else + obj_surface->flags &= ~SURFACE_REFERENCED; + avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param); gen75_mfd_init_avc_surface(ctx, pic_param, obj_surface); dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo); @@ -1169,8 +1140,8 @@ gen75_mfd_avc_decode_picture(VADriverContextP ctx, gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context); gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context); gen75_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context); - gen75_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context); gen75_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context); + gen75_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context); for (j = 0; j < decode_state->num_slice_params; j++) { assert(decode_state->slice_params && decode_state->slice_params[j]->buffer); @@ -1183,6 +1154,9 @@ gen75_mfd_avc_decode_picture(VADriverContextP ctx, else next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer; + if (j == 0 && slice_param->first_mb_in_slice) + gen75_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context); + for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) { assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL); assert((slice_param->slice_type == SLICE_TYPE_I) || @@ -1233,7 +1207,7 @@ gen75_mfd_mpeg2_decode_init(VADriverContextP ctx, /* Current decoded picture */ obj_surface = decode_state->render_object; - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo); gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo; @@ -1569,7 +1543,7 @@ gen75_mfd_vc1_decode_init(VADriverContextP ctx, /* Current decoded picture */ obj_surface = decode_state->render_object; - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); gen75_mfd_init_vc1_surface(ctx, pic_param, obj_surface); dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo); @@ -2171,13 +2145,13 @@ gen75_mfd_jpeg_decode_init(VADriverContextP ctx, struct object_surface *obj_surface; VAPictureParameterBufferJPEGBaseline *pic_param; int subsampling = SUBSAMPLE_YUV420; - int fourcc = VA_FOURCC('I', 'M', 'C', '3'); + int fourcc = VA_FOURCC_IMC3; pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer; if (pic_param->num_components == 1) { subsampling = SUBSAMPLE_YUV400; - fourcc = VA_FOURCC('Y', '8', '0', '0'); + fourcc = VA_FOURCC_Y800; } else if (pic_param->num_components == 3) { int h1 = pic_param->components[0].h_sampling_factor; int h2 = pic_param->components[1].h_sampling_factor; @@ -2189,31 +2163,31 @@ gen75_mfd_jpeg_decode_init(VADriverContextP ctx, if (h1 == 2 && h2 == 1 && h3 == 1 && v1 == 2 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV420; - fourcc = VA_FOURCC('I', 'M', 'C', '3'); + fourcc = VA_FOURCC_IMC3; } else if (h1 == 2 && h2 == 1 && h3 == 1 && v1 == 1 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV422H; - fourcc = VA_FOURCC('4', '2', '2', 'H'); + fourcc = VA_FOURCC_422H; } else if (h1 == 1 && h2 == 1 && h3 == 1 && v1 == 1 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV444; - fourcc = VA_FOURCC('4', '4', '4', 'P'); + fourcc = VA_FOURCC_444P; } else if (h1 == 4 && h2 == 1 && h3 == 1 && v1 == 1 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV411; - fourcc = VA_FOURCC('4', '1', '1', 'P'); + fourcc = VA_FOURCC_411P; } else if (h1 == 1 && h2 == 1 && h3 == 1 && v1 == 2 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV422V; - fourcc = VA_FOURCC('4', '2', '2', 'V'); + fourcc = VA_FOURCC_422V; } else if (h1 == 2 && h2 == 1 && h3 == 1 && v1 == 2 && v2 == 2 && v3 == 2) { subsampling = SUBSAMPLE_YUV422H; - fourcc = VA_FOURCC('4', '2', '2', 'H'); + fourcc = VA_FOURCC_422H; } else if (h2 == 2 && h2 == 2 && h3 == 2 && v1 == 2 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV422V; - fourcc = VA_FOURCC('4', '2', '2', 'V'); + fourcc = VA_FOURCC_422V; } else assert(0); } else { @@ -2466,18 +2440,6 @@ gen75_mfd_jpeg_bsd_object(VADriverContextP ctx, /* Workaround for JPEG decoding on Ivybridge */ -VAStatus -i965_DestroySurfaces(VADriverContextP ctx, - VASurfaceID *surface_list, - int num_surfaces); -VAStatus -i965_CreateSurfaces(VADriverContextP ctx, - int width, - int height, - int format, - int num_surfaces, - VASurfaceID *surfaces); - static struct { int width; int height; @@ -2520,7 +2482,7 @@ gen75_jpeg_wa_init(VADriverContextP ctx, obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id); assert(obj_surface); - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); gen7_mfd_context->jpeg_wa_surface_object = obj_surface; if (!gen7_mfd_context->jpeg_wa_slice_data_bo) { @@ -2826,7 +2788,7 @@ gen75_jpeg_wa_avc_img_state(VADriverContextP ctx, BEGIN_BCS_BATCH(batch, 16); OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2)); OUT_BCS_BATCH(batch, - width_in_mbs * height_in_mbs); + (width_in_mbs * height_in_mbs - 1)); OUT_BCS_BATCH(batch, ((height_in_mbs - 1) << 16) | ((width_in_mbs - 1) << 0)); @@ -3191,9 +3153,11 @@ gen75_mfd_decode_picture(VADriverContextP ctx, gen75_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context); break; - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: + case VAProfileH264StereoHigh: + case VAProfileH264MultiviewHigh: gen75_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context); break; @@ -3285,9 +3249,11 @@ gen75_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config gen75_mfd_mpeg2_context_init(ctx, gen7_mfd_context); break; - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: + case VAProfileH264StereoHigh: + case VAProfileH264MultiviewHigh: gen75_mfd_avc_context_init(ctx, gen7_mfd_context); break; default: diff --git a/src/gen75_picture_process.c b/src/gen75_picture_process.c index e2344aa..6978d4b 100644 --- a/src/gen75_picture_process.c +++ b/src/gen75_picture_process.c @@ -38,12 +38,6 @@ #include "i965_post_processing.h" #include "gen75_picture_process.h" -extern void -i965_proc_picture(VADriverContextP ctx, - VAProfile profile, - union codec_state *codec_state, - struct hw_context *hw_context); - extern struct hw_context * i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config); @@ -63,8 +57,8 @@ gen75_vpp_fmt_cvt(VADriverContextP ctx, proc_ctx->vpp_fmt_cvt_ctx = i965_proc_context_init(ctx, NULL); } - i965_proc_picture(ctx, profile, codec_state, - proc_ctx->vpp_fmt_cvt_ctx); + va_status = i965_proc_picture(ctx, profile, codec_state, + proc_ctx->vpp_fmt_cvt_ctx); return va_status; } @@ -75,6 +69,7 @@ gen75_vpp_vebox(VADriverContextP ctx, { VAStatus va_status = VA_STATUS_SUCCESS; VAProcPipelineParameterBuffer* pipeline_param = proc_ctx->pipeline_param; + struct i965_driver_data *i965 = i965_driver_data(ctx); /* vpp features based on VEBox fixed function */ if(proc_ctx->vpp_vebox_ctx == NULL) { @@ -85,7 +80,10 @@ gen75_vpp_vebox(VADriverContextP ctx, proc_ctx->vpp_vebox_ctx->surface_input_object = proc_ctx->surface_pipeline_input_object; proc_ctx->vpp_vebox_ctx->surface_output_object = proc_ctx->surface_render_output_object; - va_status = gen75_vebox_process_picture(ctx, proc_ctx->vpp_vebox_ctx); + if (IS_HASWELL(i965->intel.device_info)) + va_status = gen75_vebox_process_picture(ctx, proc_ctx->vpp_vebox_ctx); + else if (IS_GEN8(i965->intel.device_info)) + va_status = gen8_vebox_process_picture(ctx, proc_ctx->vpp_vebox_ctx); return va_status; } @@ -97,14 +95,14 @@ gen75_vpp_gpe(VADriverContextP ctx, VAStatus va_status = VA_STATUS_SUCCESS; if(proc_ctx->vpp_gpe_ctx == NULL){ - proc_ctx->vpp_gpe_ctx = gen75_gpe_context_init(ctx); + proc_ctx->vpp_gpe_ctx = vpp_gpe_context_init(ctx); } proc_ctx->vpp_gpe_ctx->pipeline_param = proc_ctx->pipeline_param; proc_ctx->vpp_gpe_ctx->surface_pipeline_input_object = proc_ctx->surface_pipeline_input_object; proc_ctx->vpp_gpe_ctx->surface_output_object = proc_ctx->surface_render_output_object; - va_status = gen75_gpe_process_picture(ctx, proc_ctx->vpp_gpe_ctx); + va_status = vpp_gpe_process_picture(ctx, proc_ctx->vpp_gpe_ctx); return va_status; } @@ -123,29 +121,43 @@ gen75_proc_picture(VADriverContextP ctx, (VAProcPipelineParameterBuffer *)proc_st->pipeline_param->buffer; struct object_surface *obj_dst_surf = NULL; struct object_surface *obj_src_surf = NULL; + VAStatus status; + proc_ctx->pipeline_param = pipeline_param; - assert(proc_st->current_render_target != VA_INVALID_SURFACE); if (proc_st->current_render_target == VA_INVALID_SURFACE || - pipeline_param->surface == VA_INVALID_SURFACE) + pipeline_param->surface == VA_INVALID_SURFACE) { + status = VA_STATUS_ERROR_INVALID_SURFACE; goto error; + } obj_dst_surf = SURFACE(proc_st->current_render_target); - if (!obj_dst_surf) + if (!obj_dst_surf) { + status = VA_STATUS_ERROR_INVALID_SURFACE; goto error; + } obj_src_surf = SURFACE(proc_ctx->pipeline_param->surface); - if (!obj_src_surf) + if (!obj_src_surf) { + status = VA_STATUS_ERROR_INVALID_SURFACE; + goto error; + } + + if (!obj_src_surf->bo) { + status = VA_STATUS_ERROR_INVALID_VALUE; /* The input surface is created without valid content */ goto error; + } - if (pipeline_param->num_filters && !pipeline_param->filters) + if (pipeline_param->num_filters && !pipeline_param->filters) { + status = VA_STATUS_ERROR_INVALID_PARAMETER; goto error; + } if (!obj_dst_surf->bo) { unsigned int is_tiled = 0; - unsigned int fourcc = VA_FOURCC('N','V','1','2'); + unsigned int fourcc = VA_FOURCC_NV12; int sampling = SUBSAMPLE_YUV420; i965_check_alloc_surface_bo(ctx, obj_dst_surf, is_tiled, fourcc, sampling); } @@ -166,23 +178,25 @@ gen75_proc_picture(VADriverContextP ctx, if (!obj_buf || !obj_buf->buffer_store || - !obj_buf->buffer_store->buffer) + !obj_buf->buffer_store->buffer) { + status = VA_STATUS_ERROR_INVALID_FILTER_CHAIN; goto error; + } VAProcFilterParameterBuffer* filter = (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer; - if (filter->type == VAProcFilterNoiseReduction || - filter->type == VAProcFilterDeinterlacing || + if (filter->type == VAProcFilterNoiseReduction || + filter->type == VAProcFilterDeinterlacing || + filter->type == VAProcFilterSkinToneEnhancement || filter->type == VAProcFilterColorBalance){ gen75_vpp_vebox(ctx, proc_ctx); }else if(filter->type == VAProcFilterSharpening){ - assert(obj_src_surf->fourcc == VA_FOURCC('N','V','1','2') && - obj_dst_surf->fourcc == VA_FOURCC('N','V','1','2')); - - if (obj_src_surf->fourcc != VA_FOURCC('N', 'V', '1', '2') || - obj_dst_surf->fourcc != VA_FOURCC('N', 'V', '1', '2')) + if (obj_src_surf->fourcc != VA_FOURCC_NV12 || + obj_dst_surf->fourcc != VA_FOURCC_NV12) { + status = VA_STATUS_ERROR_UNIMPLEMENTED; goto error; + } gen75_vpp_gpe(ctx, proc_ctx); } @@ -191,18 +205,19 @@ gen75_proc_picture(VADriverContextP ctx, for (i = 0; i < pipeline_param->num_filters; i++){ struct object_buffer * obj_buf = BUFFER(pipeline_param->filters[i]); - assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer); - if (!obj_buf || !obj_buf->buffer_store || - !obj_buf->buffer_store->buffer) + !obj_buf->buffer_store->buffer) { + status = VA_STATUS_ERROR_INVALID_FILTER_CHAIN; goto error; + } VAProcFilterParameterBuffer* filter = (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer; - if (filter->type != VAProcFilterNoiseReduction && - filter->type != VAProcFilterDeinterlacing && + if (filter->type != VAProcFilterNoiseReduction && + filter->type != VAProcFilterDeinterlacing && + filter->type != VAProcFilterSkinToneEnhancement && filter->type != VAProcFilterColorBalance) { printf("Do not support multiply filters outside vebox pipeline \n"); assert(0); @@ -214,7 +229,7 @@ gen75_proc_picture(VADriverContextP ctx, return VA_STATUS_SUCCESS; error: - return VA_STATUS_ERROR_INVALID_PARAMETER; + return status; } static void @@ -235,7 +250,7 @@ gen75_proc_context_destroy(void *hw_context) } if(proc_ctx->vpp_gpe_ctx){ - gen75_gpe_context_destroy(ctx,proc_ctx->vpp_gpe_ctx); + vpp_gpe_context_destroy(ctx,proc_ctx->vpp_gpe_ctx); proc_ctx->vpp_gpe_ctx = NULL; } diff --git a/src/gen75_vme.c b/src/gen75_vme.c index b796505..576e91a 100644 --- a/src/gen75_vme.c +++ b/src/gen75_vme.c @@ -38,14 +38,6 @@ #include "gen6_vme.h" #include "gen6_mfc.h" -#define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32) -#define SURFACE_STATE_PADDED_SIZE_GEN7 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7) - -#define SURFACE_STATE_PADDED_SIZE_0_GEN6 ALIGN(sizeof(struct i965_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE_1_GEN6 ALIGN(sizeof(struct i965_surface_state2), 32) -#define SURFACE_STATE_PADDED_SIZE_GEN6 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN6, SURFACE_STATE_PADDED_SIZE_1_GEN6) - #define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7) #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) #define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index) @@ -113,7 +105,7 @@ static const uint32_t gen75_vme_mpeg2_intra_frame[][4] = { }; static const uint32_t gen75_vme_mpeg2_inter_frame[][4] = { -#include "shaders/vme/mpeg2_inter_frame_haswell.g75b" +#include "shaders/vme/mpeg2_inter_haswell.g75b" }; static const uint32_t gen75_vme_mpeg2_batchbuffer[][4] = { @@ -264,7 +256,6 @@ gen75_vme_surface_setup(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { struct object_surface *obj_surface; - struct i965_driver_data *i965 = i965_driver_data(ctx); /*Setup surfaces state*/ /* current picture for encoding */ @@ -276,43 +267,14 @@ gen75_vme_surface_setup(VADriverContextP ctx, if (!is_intra) { VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; int slice_type; - struct object_surface *slice_obj_surface; - int ref_surface_id; slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI); - if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) { - slice_obj_surface = NULL; - ref_surface_id = slice_param->RefPicList0[0].picture_id; - if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { - slice_obj_surface = SURFACE(ref_surface_id); - } - if (slice_obj_surface && slice_obj_surface->bo) { - obj_surface = slice_obj_surface; - } else { - obj_surface = encode_state->reference_objects[0]; - } - /* reference 0 */ - if (obj_surface && obj_surface->bo) - gen75_vme_source_surface_state(ctx, 1, obj_surface, encoder_context); - } - if (slice_type == SLICE_TYPE_B) { - /* reference 1 */ - slice_obj_surface = NULL; - ref_surface_id = slice_param->RefPicList1[0].picture_id; - if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { - slice_obj_surface = SURFACE(ref_surface_id); - } - if (slice_obj_surface && slice_obj_surface->bo) { - obj_surface = slice_obj_surface; - } else { - obj_surface = encode_state->reference_objects[0]; - } + intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen75_vme_source_surface_state); - obj_surface = encode_state->reference_objects[1]; - if (obj_surface && obj_surface->bo) - gen75_vme_source_surface_state(ctx, 2, obj_surface, encoder_context); - } + if (slice_type == SLICE_TYPE_B) + intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen75_vme_source_surface_state); } /* VME output */ @@ -374,17 +336,15 @@ static VAStatus gen75_vme_constant_setup(VADriverContextP ctx, vme_state_message = (unsigned int *)vme_context->vme_state_message; - if (encoder_context->profile == VAProfileH264Baseline || - encoder_context->profile == VAProfileH264Main || - encoder_context->profile == VAProfileH264High) { + if (encoder_context->codec == CODEC_H264 || + encoder_context->codec == CODEC_H264_MVC) { if (vme_context->h264_level >= 30) { mv_num = 16; if (vme_context->h264_level >= 31) mv_num = 8; } - } else if (encoder_context->profile == VAProfileMPEG2Simple || - encoder_context->profile == VAProfileMPEG2Main) { + } else if (encoder_context->codec == CODEC_MPEG2) { mv_num = 2; } @@ -501,10 +461,9 @@ static VAStatus gen75_vme_vme_state_setup(VADriverContextP ctx, vme_state_message[i] = 0; } - switch (encoder_context->profile) { - case VAProfileH264Baseline: - case VAProfileH264Main: - case VAProfileH264High: + switch (encoder_context->codec) { + case CODEC_H264: + case CODEC_H264_MVC: gen75_vme_state_setup_fixup(ctx, encode_state, encoder_context, vme_state_message); break; @@ -575,7 +534,7 @@ gen75_vme_fill_vme_batchbuffer(VADriverContextP ctx, /*inline data */ *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x); - *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); + *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); i += 1; } @@ -619,44 +578,50 @@ static void gen75_vme_pipeline_programing(VADriverContextP ctx, int kernel_shader; bool allow_hwscore = true; int s; - - for (s = 0; s < encode_state->num_slice_params_ext; s++) { - pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; - if ((pSliceParameter->macroblock_address % width_in_mbs)) { - allow_hwscore = false; - break; - } + unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY); + + if (is_low_quality) + allow_hwscore = false; + else { + for (s = 0; s < encode_state->num_slice_params_ext; s++) { + pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; + if ((pSliceParameter->macroblock_address % width_in_mbs)) { + allow_hwscore = false; + break; + } + } } + if ((pSliceParameter->slice_type == SLICE_TYPE_I) || (pSliceParameter->slice_type == SLICE_TYPE_I)) { kernel_shader = VME_INTRA_SHADER; - } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) || - (pSliceParameter->slice_type == SLICE_TYPE_SP)) { + } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) || + (pSliceParameter->slice_type == SLICE_TYPE_SP)) { kernel_shader = VME_INTER_SHADER; - } else { + } else { kernel_shader = VME_BINTER_SHADER; if (!allow_hwscore) - kernel_shader = VME_INTER_SHADER; - } + kernel_shader = VME_INTER_SHADER; + } if (allow_hwscore) gen7_vme_walker_fill_vme_batchbuffer(ctx, - encode_state, - width_in_mbs, height_in_mbs, - kernel_shader, - pPicParameter->pic_fields.bits.transform_8x8_mode_flag, - encoder_context); + encode_state, + width_in_mbs, height_in_mbs, + kernel_shader, + pPicParameter->pic_fields.bits.transform_8x8_mode_flag, + encoder_context); else gen75_vme_fill_vme_batchbuffer(ctx, - encode_state, - width_in_mbs, height_in_mbs, - kernel_shader, - pPicParameter->pic_fields.bits.transform_8x8_mode_flag, - encoder_context); + encode_state, + width_in_mbs, height_in_mbs, + kernel_shader, + pPicParameter->pic_fields.bits.transform_8x8_mode_flag, + encoder_context); intel_batchbuffer_start_atomic(batch, 0x1000); gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch); BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6)); + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8)); OUT_RELOC(batch, vme_context->vme_batchbuffer.bo, I915_GEM_DOMAIN_COMMAND, 0, @@ -921,21 +886,54 @@ gen75_vme_mpeg2_pipeline_programing(VADriverContextP ctx, { struct gen6_vme_context *vme_context = encoder_context->vme_context; struct intel_batchbuffer *batch = encoder_context->base.batch; + VAEncPictureParameterBufferMPEG2 *pic_param = NULL; VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; + bool allow_hwscore = true; + int s; + int kernel_shader; - gen75_vme_mpeg2_fill_vme_batchbuffer(ctx, - encode_state, - width_in_mbs, height_in_mbs, - is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER, - 0, - encoder_context); + pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer; + + for (s = 0; s < encode_state->num_slice_params_ext; s++) { + int j; + VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer; + + for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) { + if (slice_param->macroblock_address % width_in_mbs) { + allow_hwscore = false; + break; + } + } + } + + pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer; + if (pic_param->picture_type == VAEncPictureTypeIntra) { + allow_hwscore = false; + kernel_shader = VME_INTRA_SHADER; + } else { + kernel_shader = VME_INTER_SHADER; + } + + if (allow_hwscore) + gen7_vme_mpeg2_walker_fill_vme_batchbuffer(ctx, + encode_state, + width_in_mbs, height_in_mbs, + kernel_shader, + encoder_context); + else + gen75_vme_mpeg2_fill_vme_batchbuffer(ctx, + encode_state, + width_in_mbs, height_in_mbs, + kernel_shader, + 0, + encoder_context); intel_batchbuffer_start_atomic(batch, 0x1000); gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch); BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6)); + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8)); OUT_RELOC(batch, vme_context->vme_batchbuffer.bo, I915_GEM_DOMAIN_COMMAND, 0, @@ -953,10 +951,19 @@ gen75_vme_mpeg2_prepare(VADriverContextP ctx, VAStatus vaStatus = VA_STATUS_SUCCESS; VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer; + VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; + struct gen6_vme_context *vme_context = encoder_context->vme_context; + + if ((!vme_context->mpeg2_level) || + (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) { + vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK; + } + /*Setup all the memory object*/ gen75_vme_mpeg2_surface_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context); gen75_vme_interface_setup(ctx, encode_state, encoder_context); gen75_vme_vme_state_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context); + intel_vme_mpeg2_state_setup(ctx, encode_state, encoder_context); gen75_vme_constant_setup(ctx, encode_state, encoder_context); /*Programing media pipeline*/ @@ -1007,19 +1014,17 @@ Bool gen75_vme_context_init(VADriverContextP ctx, struct intel_encoder_context * { struct gen6_vme_context *vme_context = calloc(1, sizeof(struct gen6_vme_context)); struct i965_kernel *vme_kernel_list = NULL; - int i965_kernel_num; + int i965_kernel_num; - switch (encoder_context->profile) { - case VAProfileH264Baseline: - case VAProfileH264Main: - case VAProfileH264High: + switch (encoder_context->codec) { + case CODEC_H264: + case CODEC_H264_MVC: vme_kernel_list = gen75_vme_kernels; encoder_context->vme_pipeline = gen75_vme_pipeline; i965_kernel_num = sizeof(gen75_vme_kernels) / sizeof(struct i965_kernel); break; - case VAProfileMPEG2Simple: - case VAProfileMPEG2Main: + case CODEC_MPEG2: vme_kernel_list = gen75_vme_mpeg2_kernels; encoder_context->vme_pipeline = gen75_vme_mpeg2_pipeline; i965_kernel_num = sizeof(gen75_vme_mpeg2_kernels) / sizeof(struct i965_kernel); @@ -1041,9 +1046,9 @@ Bool gen75_vme_context_init(VADriverContextP ctx, struct intel_encoder_context * vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH; vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1; - vme_context->gpe_context.vfe_state.num_urb_entries = 16; + vme_context->gpe_context.vfe_state.num_urb_entries = 64; vme_context->gpe_context.vfe_state.gpgpu_mode = 0; - vme_context->gpe_context.vfe_state.urb_entry_size = 59 - 1; + vme_context->gpe_context.vfe_state.urb_entry_size = 16; vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1; gen7_vme_scoreboard_init(ctx, vme_context); diff --git a/src/gen75_vpp_gpe.c b/src/gen75_vpp_gpe.c index 70f229b..042e4e6 100644 --- a/src/gen75_vpp_gpe.c +++ b/src/gen75_vpp_gpe.c @@ -33,6 +33,7 @@ #include "intel_batchbuffer.h" #include "intel_driver.h" +#include "i965_structs.h" #include "i965_defines.h" #include "i965_drv_video.h" #include "gen75_vpp_gpe.h" @@ -40,30 +41,16 @@ #define MAX_INTERFACE_DESC_GEN6 MAX_GPE_KERNELS #define MAX_MEDIA_SURFACES_GEN6 34 -#define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32) -#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7) +#define SURFACE_STATE_OFFSET_GEN7(index) (SURFACE_STATE_PADDED_SIZE_GEN7 * (index)) +#define BINDING_TABLE_OFFSET_GEN7(index) (SURFACE_STATE_OFFSET_GEN7(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index)) -#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * (index)) -#define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index)) +#define SURFACE_STATE_OFFSET_GEN8(index) (SURFACE_STATE_PADDED_SIZE_GEN8 * (index)) +#define BINDING_TABLE_OFFSET_GEN8(index) (SURFACE_STATE_OFFSET_GEN8(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index)) #define CURBE_ALLOCATION_SIZE 37 #define CURBE_TOTAL_DATA_LENGTH (4 * 32) #define CURBE_URB_ENTRY_LENGTH 4 -extern VAStatus -i965_CreateSurfaces(VADriverContextP ctx, - int width, - int height, - int format, - int num_surfaces, - VASurfaceID *surfaces); - -extern VAStatus -i965_DestroySurfaces(VADriverContextP ctx, - VASurfaceID *surface_list, - int num_surfaces); - /* Shaders information for sharpening */ static const unsigned int gen75_gpe_sharpening_h_blur[][4] = { #include "shaders/post_processing/gen75/sharpening_h_blur.g75b" @@ -98,8 +85,43 @@ static struct i965_kernel gen75_vpp_sharpening_kernels[] = { }, }; +/* sharpening kernels for Broadwell */ +static const unsigned int gen8_gpe_sharpening_h_blur[][4] = { + #include "shaders/post_processing/gen8/sharpening_h_blur.g8b" +}; +static const unsigned int gen8_gpe_sharpening_v_blur[][4] = { + #include "shaders/post_processing/gen8/sharpening_v_blur.g8b" +}; +static const unsigned int gen8_gpe_sharpening_unmask[][4] = { + #include "shaders/post_processing/gen8/sharpening_unmask.g8b" +}; + +static struct i965_kernel gen8_vpp_sharpening_kernels[] = { + { + "vpp: sharpening(horizontal blur)", + VPP_GPE_SHARPENING, + gen8_gpe_sharpening_h_blur, + sizeof(gen8_gpe_sharpening_h_blur), + NULL + }, + { + "vpp: sharpening(vertical blur)", + VPP_GPE_SHARPENING, + gen8_gpe_sharpening_v_blur, + sizeof(gen8_gpe_sharpening_v_blur), + NULL + }, + { + "vpp: sharpening(unmask)", + VPP_GPE_SHARPENING, + gen8_gpe_sharpening_unmask, + sizeof(gen8_gpe_sharpening_unmask), + NULL + }, +}; + static VAStatus -gpe_surfaces_setup(VADriverContextP ctx, +gen75_gpe_process_surfaces_setup(VADriverContextP ctx, struct vpp_gpe_context *vpp_gpe_ctx) { struct object_surface *obj_surface; @@ -111,44 +133,44 @@ gpe_surfaces_setup(VADriverContextP ctx, for( i = 0; i < input_surface_sum; i += 2){ obj_surface = vpp_gpe_ctx->surface_input_object[i/2]; assert(obj_surface); - vpp_gpe_ctx->vpp_media_rw_surface_setup(ctx, - &vpp_gpe_ctx->gpe_ctx, - obj_surface, - BINDING_TABLE_OFFSET(i), - SURFACE_STATE_OFFSET(i)); - - vpp_gpe_ctx->vpp_media_chroma_surface_setup(ctx, - &vpp_gpe_ctx->gpe_ctx, - obj_surface, - BINDING_TABLE_OFFSET(i + 1), - SURFACE_STATE_OFFSET(i + 1)); + gen7_gpe_media_rw_surface_setup(ctx, + &vpp_gpe_ctx->gpe_ctx, + obj_surface, + BINDING_TABLE_OFFSET_GEN7(i), + SURFACE_STATE_OFFSET_GEN7(i)); + + gen75_gpe_media_chroma_surface_setup(ctx, + &vpp_gpe_ctx->gpe_ctx, + obj_surface, + BINDING_TABLE_OFFSET_GEN7(i + 1), + SURFACE_STATE_OFFSET_GEN7(i + 1)); } /* Binding output NV12 surface(Luma + Chroma) */ obj_surface = vpp_gpe_ctx->surface_output_object; assert(obj_surface); - vpp_gpe_ctx->vpp_media_rw_surface_setup(ctx, - &vpp_gpe_ctx->gpe_ctx, - obj_surface, - BINDING_TABLE_OFFSET(input_surface_sum), - SURFACE_STATE_OFFSET(input_surface_sum)); - vpp_gpe_ctx->vpp_media_chroma_surface_setup(ctx, - &vpp_gpe_ctx->gpe_ctx, - obj_surface, - BINDING_TABLE_OFFSET(input_surface_sum + 1), - SURFACE_STATE_OFFSET(input_surface_sum + 1)); + gen7_gpe_media_rw_surface_setup(ctx, + &vpp_gpe_ctx->gpe_ctx, + obj_surface, + BINDING_TABLE_OFFSET_GEN7(input_surface_sum), + SURFACE_STATE_OFFSET_GEN7(input_surface_sum)); + gen75_gpe_media_chroma_surface_setup(ctx, + &vpp_gpe_ctx->gpe_ctx, + obj_surface, + BINDING_TABLE_OFFSET_GEN7(input_surface_sum + 1), + SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 1)); /* Bind kernel return buffer surface */ - vpp_gpe_ctx->vpp_buffer_surface_setup(ctx, - &vpp_gpe_ctx->gpe_ctx, - &vpp_gpe_ctx->vpp_kernel_return, - BINDING_TABLE_OFFSET((input_surface_sum + 2)), - SURFACE_STATE_OFFSET(input_surface_sum + 2)); + gen7_gpe_buffer_suface_setup(ctx, + &vpp_gpe_ctx->gpe_ctx, + &vpp_gpe_ctx->vpp_kernel_return, + BINDING_TABLE_OFFSET_GEN7((input_surface_sum + 2)), + SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 2)); return VA_STATUS_SUCCESS; } static VAStatus -gpe_interface_setup(VADriverContextP ctx, +gen75_gpe_process_interface_setup(VADriverContextP ctx, struct vpp_gpe_context *vpp_gpe_ctx) { struct gen6_interface_descriptor_data *desc; @@ -168,7 +190,7 @@ gpe_interface_setup(VADriverContextP ctx, desc->desc2.sampler_count = 0; /* FIXME: */ desc->desc2.sampler_state_pointer = 0; desc->desc3.binding_table_entry_count = 6; /* FIXME: */ - desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5); + desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN7(0) >> 5); desc->desc4.constant_urb_entry_read_offset = 0; desc->desc4.constant_urb_entry_read_length = 0; @@ -186,23 +208,21 @@ gpe_interface_setup(VADriverContextP ctx, } static VAStatus -gpe_constant_setup(VADriverContextP ctx, - struct vpp_gpe_context *vpp_gpe_ctx){ +gen75_gpe_process_constant_fill(VADriverContextP ctx, + struct vpp_gpe_context *vpp_gpe_ctx) +{ dri_bo_map(vpp_gpe_ctx->gpe_ctx.curbe.bo, 1); assert(vpp_gpe_ctx->gpe_ctx.curbe.bo->virtual); - /*Copy buffer into CURB*/ - /* unsigned char* constant_buffer = vpp_gpe_ctx->gpe_ctx.curbe.bo->virtual; memcpy(constant_buffer, vpp_gpe_ctx->kernel_param, vpp_gpe_ctx->kernel_param_size); - */ dri_bo_unmap(vpp_gpe_ctx->gpe_ctx.curbe.bo); return VA_STATUS_SUCCESS; } static VAStatus -gpe_fill_thread_parameters(VADriverContextP ctx, +gen75_gpe_process_parameters_fill(VADriverContextP ctx, struct vpp_gpe_context *vpp_gpe_ctx) { unsigned int *command_ptr; @@ -237,7 +257,7 @@ gpe_fill_thread_parameters(VADriverContextP ctx, } static VAStatus -gpe_pipeline_setup(VADriverContextP ctx, +gen75_gpe_process_pipeline_setup(VADriverContextP ctx, struct vpp_gpe_context *vpp_gpe_ctx) { intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000); @@ -245,10 +265,10 @@ gpe_pipeline_setup(VADriverContextP ctx, gen6_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch); - gpe_fill_thread_parameters(ctx, vpp_gpe_ctx); + gen75_gpe_process_parameters_fill(ctx, vpp_gpe_ctx); BEGIN_BATCH(vpp_gpe_ctx->batch, 2); - OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (2 << 6)); + OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8)); OUT_RELOC(vpp_gpe_ctx->batch, vpp_gpe_ctx->vpp_batchbuffer.bo, I915_GEM_DOMAIN_COMMAND, 0, @@ -261,7 +281,7 @@ gpe_pipeline_setup(VADriverContextP ctx, } static VAStatus -gpe_process_init(VADriverContextP ctx, +gen75_gpe_process_init(VADriverContextP ctx, struct vpp_gpe_context *vpp_gpe_ctx) { struct i965_driver_data *i965 = i965_driver_data(ctx); @@ -290,28 +310,28 @@ gpe_process_init(VADriverContextP ctx, vpp_gpe_ctx->vpp_kernel_return.bo = bo; dri_bo_reference(vpp_gpe_ctx->vpp_kernel_return.bo); - i965_gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx); + vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx); return VA_STATUS_SUCCESS; } static VAStatus -gpe_process_prepare(VADriverContextP ctx, +gen75_gpe_process_prepare(VADriverContextP ctx, struct vpp_gpe_context *vpp_gpe_ctx) { /*Setup all the memory object*/ - gpe_surfaces_setup(ctx, vpp_gpe_ctx); - gpe_interface_setup(ctx, vpp_gpe_ctx); - gpe_constant_setup(ctx, vpp_gpe_ctx); + gen75_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx); + gen75_gpe_process_interface_setup(ctx, vpp_gpe_ctx); + //gen75_gpe_process_constant_setup(ctx, vpp_gpe_ctx); /*Programing media pipeline*/ - gpe_pipeline_setup(ctx, vpp_gpe_ctx); + gen75_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx); return VA_STATUS_SUCCESS; } static VAStatus -gpe_process_run(VADriverContextP ctx, +gen75_gpe_process_run(VADriverContextP ctx, struct vpp_gpe_context *vpp_gpe_ctx) { intel_batchbuffer_flush(vpp_gpe_ctx->batch); @@ -320,19 +340,285 @@ gpe_process_run(VADriverContextP ctx, } static VAStatus -gen75_gpe_process(VADriverContextP ctx, +gen75_gpe_process(VADriverContextP ctx, struct vpp_gpe_context * vpp_gpe_ctx) { VAStatus va_status = VA_STATUS_SUCCESS; - va_status = gpe_process_init(ctx, vpp_gpe_ctx); - va_status |=gpe_process_prepare(ctx, vpp_gpe_ctx); - va_status |=gpe_process_run(ctx, vpp_gpe_ctx); - - return va_status; + + va_status = gen75_gpe_process_init(ctx, vpp_gpe_ctx); + if (va_status != VA_STATUS_SUCCESS) + return va_status; + + va_status = gen75_gpe_process_prepare(ctx, vpp_gpe_ctx); + if (va_status != VA_STATUS_SUCCESS) + return va_status; + + va_status = gen75_gpe_process_run(ctx, vpp_gpe_ctx); + if (va_status != VA_STATUS_SUCCESS) + return va_status; + + return VA_STATUS_SUCCESS; } static VAStatus -gen75_gpe_process_sharpening(VADriverContextP ctx, +gen8_gpe_process_surfaces_setup(VADriverContextP ctx, + struct vpp_gpe_context *vpp_gpe_ctx) +{ + struct object_surface *obj_surface; + unsigned int i = 0; + unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum + + vpp_gpe_ctx->backward_surf_sum) * 2; + + /* Binding input NV12 surfaces (Luma + Chroma)*/ + for( i = 0; i < input_surface_sum; i += 2){ + obj_surface = vpp_gpe_ctx->surface_input_object[i/2]; + assert(obj_surface); + gen8_gpe_media_rw_surface_setup(ctx, + &vpp_gpe_ctx->gpe_ctx, + obj_surface, + BINDING_TABLE_OFFSET_GEN8(i), + SURFACE_STATE_OFFSET_GEN8(i)); + + gen8_gpe_media_chroma_surface_setup(ctx, + &vpp_gpe_ctx->gpe_ctx, + obj_surface, + BINDING_TABLE_OFFSET_GEN8(i + 1), + SURFACE_STATE_OFFSET_GEN8(i + 1)); + } + + /* Binding output NV12 surface(Luma + Chroma) */ + obj_surface = vpp_gpe_ctx->surface_output_object; + assert(obj_surface); + gen8_gpe_media_rw_surface_setup(ctx, + &vpp_gpe_ctx->gpe_ctx, + obj_surface, + BINDING_TABLE_OFFSET_GEN8(input_surface_sum), + SURFACE_STATE_OFFSET_GEN8(input_surface_sum)); + gen8_gpe_media_chroma_surface_setup(ctx, + &vpp_gpe_ctx->gpe_ctx, + obj_surface, + BINDING_TABLE_OFFSET_GEN8(input_surface_sum + 1), + SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 1)); + /* Bind kernel return buffer surface */ + gen7_gpe_buffer_suface_setup(ctx, + &vpp_gpe_ctx->gpe_ctx, + &vpp_gpe_ctx->vpp_kernel_return, + BINDING_TABLE_OFFSET_GEN8((input_surface_sum + 2)), + SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 2)); + + return VA_STATUS_SUCCESS; +} + +static VAStatus +gen8_gpe_process_interface_setup(VADriverContextP ctx, + struct vpp_gpe_context *vpp_gpe_ctx) +{ + struct gen8_interface_descriptor_data *desc; + dri_bo *bo = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo; + int i; + + dri_bo_map(bo, 1); + assert(bo->virtual); + desc = (struct gen8_interface_descriptor_data *)(bo->virtual + + vpp_gpe_ctx->gpe_ctx.idrt_offset); + + /*Setup the descritor table*/ + for (i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){ + struct i965_kernel *kernel; + kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i]; + assert(sizeof(*desc) == 32); + /*Setup the descritor table*/ + memset(desc, 0, sizeof(*desc)); + desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6; + desc->desc3.sampler_count = 0; /* FIXME: */ + desc->desc3.sampler_state_pointer = 0; + desc->desc4.binding_table_entry_count = 6; /* FIXME: */ + desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN8(0) >> 5); + desc->desc5.constant_urb_entry_read_offset = 0; + desc->desc5.constant_urb_entry_read_length = 0; + + desc++; + } + + dri_bo_unmap(bo); + + return VA_STATUS_SUCCESS; +} + +static VAStatus +gen8_gpe_process_constant_fill(VADriverContextP ctx, + struct vpp_gpe_context *vpp_gpe_ctx) +{ + dri_bo_map(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo, 1); + assert(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo->virtual); + unsigned char* constant_buffer = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo->virtual; + memcpy(constant_buffer, vpp_gpe_ctx->kernel_param, + vpp_gpe_ctx->kernel_param_size); + dri_bo_unmap(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo); + + return VA_STATUS_SUCCESS; +} + +static VAStatus +gen8_gpe_process_parameters_fill(VADriverContextP ctx, + struct vpp_gpe_context *vpp_gpe_ctx) +{ + unsigned int *command_ptr; + unsigned int i, size = vpp_gpe_ctx->thread_param_size; + unsigned char* position = NULL; + + /* Thread inline data setting*/ + dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1); + command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual; + + for(i = 0; i < vpp_gpe_ctx->thread_num; i ++) + { + *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2)); + *command_ptr++ = vpp_gpe_ctx->sub_shader_index; + *command_ptr++ = 0; + *command_ptr++ = 0; + *command_ptr++ = 0; + *command_ptr++ = 0; + + /* copy thread inline data */ + position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i); + memcpy(command_ptr, position, size); + command_ptr += size/sizeof(int); + + *command_ptr++ = CMD_MEDIA_STATE_FLUSH; + *command_ptr++ = 0; + } + + *command_ptr++ = 0; + *command_ptr++ = MI_BATCH_BUFFER_END; + + dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo); + + return VA_STATUS_SUCCESS; +} + +static VAStatus +gen8_gpe_process_pipeline_setup(VADriverContextP ctx, + struct vpp_gpe_context *vpp_gpe_ctx) +{ + intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000); + intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch); + + gen8_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch); + + gen8_gpe_process_parameters_fill(ctx, vpp_gpe_ctx); + + BEGIN_BATCH(vpp_gpe_ctx->batch, 3); + OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0)); + OUT_RELOC(vpp_gpe_ctx->batch, + vpp_gpe_ctx->vpp_batchbuffer.bo, + I915_GEM_DOMAIN_COMMAND, 0, + 0); + OUT_BATCH(vpp_gpe_ctx->batch, 0); + + ADVANCE_BATCH(vpp_gpe_ctx->batch); + + intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch); + + return VA_STATUS_SUCCESS; +} + +static VAStatus +gen8_gpe_process_init(VADriverContextP ctx, + struct vpp_gpe_context *vpp_gpe_ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + dri_bo *bo; + + unsigned int batch_buf_size = vpp_gpe_ctx->thread_num * + (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16; + + vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num; + vpp_gpe_ctx->vpp_kernel_return.size_block = 16; + vpp_gpe_ctx->vpp_kernel_return.pitch = 1; + + unsigned int kernel_return_size = vpp_gpe_ctx->vpp_kernel_return.num_blocks + * vpp_gpe_ctx->vpp_kernel_return.size_block; + + dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "vpp batch buffer", + batch_buf_size, 0x1000); + vpp_gpe_ctx->vpp_batchbuffer.bo = bo; + dri_bo_reference(vpp_gpe_ctx->vpp_batchbuffer.bo); + + dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "vpp kernel return buffer", + kernel_return_size, 0x1000); + vpp_gpe_ctx->vpp_kernel_return.bo = bo; + dri_bo_reference(vpp_gpe_ctx->vpp_kernel_return.bo); + + vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx); + + return VA_STATUS_SUCCESS; +} + +static VAStatus +gen8_gpe_process_prepare(VADriverContextP ctx, + struct vpp_gpe_context *vpp_gpe_ctx) +{ + /*Setup all the memory object*/ + gen8_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx); + gen8_gpe_process_interface_setup(ctx, vpp_gpe_ctx); + //gen8_gpe_process_constant_setup(ctx, vpp_gpe_ctx); + + /*Programing media pipeline*/ + gen8_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx); + + return VA_STATUS_SUCCESS; +} + +static VAStatus +gen8_gpe_process_run(VADriverContextP ctx, + struct vpp_gpe_context *vpp_gpe_ctx) +{ + intel_batchbuffer_flush(vpp_gpe_ctx->batch); + + return VA_STATUS_SUCCESS; +} + +static VAStatus +gen8_gpe_process(VADriverContextP ctx, + struct vpp_gpe_context * vpp_gpe_ctx) +{ + VAStatus va_status = VA_STATUS_SUCCESS; + + va_status = gen8_gpe_process_init(ctx, vpp_gpe_ctx); + if (va_status != VA_STATUS_SUCCESS) + return va_status; + + va_status = gen8_gpe_process_prepare(ctx, vpp_gpe_ctx); + if (va_status != VA_STATUS_SUCCESS) + return va_status; + + va_status = gen8_gpe_process_run(ctx, vpp_gpe_ctx); + if (va_status != VA_STATUS_SUCCESS) + return va_status; + + return VA_STATUS_SUCCESS; +} + +static VAStatus +vpp_gpe_process(VADriverContextP ctx, + struct vpp_gpe_context * vpp_gpe_ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + if (IS_HASWELL(i965->intel.device_info)) + return gen75_gpe_process(ctx, vpp_gpe_ctx); + else if (IS_GEN8(i965->intel.device_info)) + return gen8_gpe_process(ctx, vpp_gpe_ctx); + + return VA_STATUS_ERROR_UNIMPLEMENTED; +} + +static VAStatus +vpp_gpe_process_sharpening(VADriverContextP ctx, struct vpp_gpe_context * vpp_gpe_ctx) { VAStatus va_status = VA_STATUS_SUCCESS; @@ -362,9 +648,15 @@ gen75_gpe_process_sharpening(VADriverContextP ctx, if(vpp_gpe_ctx->is_first_frame){ vpp_gpe_ctx->sub_shader_sum = 3; - i965_gpe_load_kernels(ctx, + struct i965_kernel * vpp_kernels; + if (IS_HASWELL(i965->intel.device_info)) + vpp_kernels = gen75_vpp_sharpening_kernels; + else if (IS_GEN8(i965->intel.device_info)) + vpp_kernels = gen8_vpp_sharpening_kernels; + + vpp_gpe_ctx->gpe_load_kernels(ctx, &vpp_gpe_ctx->gpe_ctx, - gen75_vpp_sharpening_kernels, + vpp_kernels, vpp_gpe_ctx->sub_shader_sum); } @@ -381,7 +673,7 @@ gen75_gpe_process_sharpening(VADriverContextP ctx, assert(obj_surf); if (obj_surf) { - i965_check_alloc_surface_bo(ctx, obj_surf, 1, VA_FOURCC('N','V','1','2'), + i965_check_alloc_surface_bo(ctx, obj_surf, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); vpp_gpe_ctx->surface_tmp_object = obj_surf; } @@ -416,10 +708,10 @@ gen75_gpe_process_sharpening(VADriverContextP ctx, } vpp_gpe_ctx->sub_shader_index = 0; - va_status = gen75_gpe_process(ctx, vpp_gpe_ctx); + va_status = vpp_gpe_process(ctx, vpp_gpe_ctx); free(vpp_gpe_ctx->thread_param); - /* Step 2: vertical blur process */ + /* Step 2: vertical blur process */ vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_output_object; vpp_gpe_ctx->surface_output_object = vpp_gpe_ctx->surface_tmp_object; vpp_gpe_ctx->forward_surf_sum = 0; @@ -443,7 +735,7 @@ gen75_gpe_process_sharpening(VADriverContextP ctx, } vpp_gpe_ctx->sub_shader_index = 1; - gen75_gpe_process(ctx, vpp_gpe_ctx); + vpp_gpe_process(ctx, vpp_gpe_ctx); free(vpp_gpe_ctx->thread_param); /* Step 3: apply the blur to original surface */ @@ -471,7 +763,7 @@ gen75_gpe_process_sharpening(VADriverContextP ctx, } vpp_gpe_ctx->sub_shader_index = 2; - va_status = gen75_gpe_process(ctx, vpp_gpe_ctx); + va_status = vpp_gpe_process(ctx, vpp_gpe_ctx); free(vpp_gpe_ctx->thread_param); return va_status; @@ -480,7 +772,7 @@ error: return VA_STATUS_ERROR_INVALID_PARAMETER; } -VAStatus gen75_gpe_process_picture(VADriverContextP ctx, +VAStatus vpp_gpe_process_picture(VADriverContextP ctx, struct vpp_gpe_context * vpp_gpe_ctx) { VAStatus va_status = VA_STATUS_SUCCESS; @@ -538,7 +830,7 @@ VAStatus gen75_gpe_process_picture(VADriverContextP ctx, vpp_gpe_ctx->in_frame_h = obj_surface->orig_height; if(filter && filter->type == VAProcFilterSharpening) { - va_status = gen75_gpe_process_sharpening(ctx, vpp_gpe_ctx); + va_status = vpp_gpe_process_sharpening(ctx, vpp_gpe_ctx); } else { va_status = VA_STATUS_ERROR_ATTR_NOT_SUPPORTED; } @@ -552,7 +844,7 @@ error: } void -gen75_gpe_context_destroy(VADriverContextP ctx, +vpp_gpe_context_destroy(VADriverContextP ctx, struct vpp_gpe_context *vpp_gpe_ctx) { dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo); @@ -561,7 +853,7 @@ gen75_gpe_context_destroy(VADriverContextP ctx, dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo); vpp_gpe_ctx->vpp_kernel_return.bo = NULL; - i965_gpe_context_destroy(&vpp_gpe_ctx->gpe_ctx); + vpp_gpe_ctx->gpe_context_destroy(&vpp_gpe_ctx->gpe_ctx); if(vpp_gpe_ctx->surface_tmp != VA_INVALID_ID){ assert(vpp_gpe_ctx->surface_tmp_object != NULL); @@ -576,18 +868,19 @@ gen75_gpe_context_destroy(VADriverContextP ctx, } struct vpp_gpe_context * -gen75_gpe_context_init(VADriverContextP ctx) +vpp_gpe_context_init(VADriverContextP ctx) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct vpp_gpe_context *vpp_gpe_ctx = calloc(1, sizeof(struct vpp_gpe_context)); struct i965_gpe_context *gpe_ctx = &(vpp_gpe_ctx->gpe_ctx); - gpe_ctx->surface_state_binding_table.length = - (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; - gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6; - gpe_ctx->idrt.entry_size = sizeof(struct gen6_interface_descriptor_data); + assert(IS_HASWELL(i965->intel.device_info) || + IS_GEN8(i965->intel.device_info)); - gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH; + vpp_gpe_ctx->surface_tmp = VA_INVALID_ID; + vpp_gpe_ctx->surface_tmp_object = NULL; + vpp_gpe_ctx->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0); + vpp_gpe_ctx->is_first_frame = 1; gpe_ctx->vfe_state.max_num_threads = 60 - 1; gpe_ctx->vfe_state.num_urb_entries = 16; @@ -595,16 +888,28 @@ gen75_gpe_context_init(VADriverContextP ctx) gpe_ctx->vfe_state.urb_entry_size = 59 - 1; gpe_ctx->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1; - vpp_gpe_ctx->vpp_surface2_setup = gen7_gpe_surface2_setup; - vpp_gpe_ctx->vpp_media_rw_surface_setup = gen7_gpe_media_rw_surface_setup; - vpp_gpe_ctx->vpp_buffer_surface_setup = gen7_gpe_buffer_suface_setup; - vpp_gpe_ctx->vpp_media_chroma_surface_setup = gen75_gpe_media_chroma_surface_setup; - vpp_gpe_ctx->surface_tmp = VA_INVALID_ID; - vpp_gpe_ctx->surface_tmp_object = NULL; + if (IS_HASWELL(i965->intel.device_info)) { + vpp_gpe_ctx->gpe_context_init = i965_gpe_context_init; + vpp_gpe_ctx->gpe_context_destroy = i965_gpe_context_destroy; + vpp_gpe_ctx->gpe_load_kernels = i965_gpe_load_kernels; + gpe_ctx->surface_state_binding_table.length = + (SURFACE_STATE_PADDED_SIZE_GEN7 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; + + gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH; + gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6; + gpe_ctx->idrt.entry_size = sizeof(struct gen6_interface_descriptor_data); + + } else if (IS_GEN8(i965->intel.device_info)) { + vpp_gpe_ctx->gpe_context_init = gen8_gpe_context_init; + vpp_gpe_ctx->gpe_context_destroy = gen8_gpe_context_destroy; + vpp_gpe_ctx->gpe_load_kernels = gen8_gpe_load_kernels; + gpe_ctx->surface_state_binding_table.length = + (SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; + + gpe_ctx->curbe_size = CURBE_TOTAL_DATA_LENGTH; + gpe_ctx->idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6; - vpp_gpe_ctx->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0); - - vpp_gpe_ctx->is_first_frame = 1; + } return vpp_gpe_ctx; } diff --git a/src/gen75_vpp_gpe.h b/src/gen75_vpp_gpe.h index 5232214..5ffee2c 100644 --- a/src/gen75_vpp_gpe.h +++ b/src/gen75_vpp_gpe.h @@ -79,6 +79,7 @@ struct vpp_gpe_context{ unsigned char * kernel_param; unsigned int kernel_param_size; + unsigned char * thread_param; unsigned int thread_param_size; unsigned int thread_num; @@ -91,46 +92,30 @@ struct vpp_gpe_context{ unsigned int forward_surf_sum; unsigned int backward_surf_sum; - unsigned int x_step; - unsigned int y_step; - unsigned int in_frame_w; unsigned int in_frame_h; unsigned int is_first_frame; - void (*vpp_surface2_setup)(VADriverContextP ctx, - struct i965_gpe_context *gpe_context, - struct object_surface *obj_surface, - unsigned long binding_table_offset, - unsigned long surface_state_offset); - - void (*vpp_media_rw_surface_setup)(VADriverContextP ctx, - struct i965_gpe_context *gpe_context, - struct object_surface *obj_surface, - unsigned long binding_table_offset, - unsigned long surface_state_offset); - - void (*vpp_buffer_surface_setup)(VADriverContextP ctx, - struct i965_gpe_context *gpe_context, - struct i965_buffer_surface *buffer_surface, - unsigned long binding_table_offset, - unsigned long surface_state_offset); - - void (*vpp_media_chroma_surface_setup)(VADriverContextP ctx, - struct i965_gpe_context *gpe_context, - struct object_surface *obj_surface, - unsigned long binding_table_offset, - unsigned long surface_state_offset); + void (*gpe_context_init)(VADriverContextP ctx, + struct i965_gpe_context *gpe_context); + + void (*gpe_context_destroy)(struct i965_gpe_context *gpe_context); + + void (*gpe_load_kernels)(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct i965_kernel *kernel_list, + unsigned int num_kernels); + }; struct vpp_gpe_context * -gen75_gpe_context_init(VADriverContextP ctx); +vpp_gpe_context_init(VADriverContextP ctx); void -gen75_gpe_context_destroy(VADriverContextP ctx, - struct vpp_gpe_context* vpp_context); +vpp_gpe_context_destroy(VADriverContextP ctx, + struct vpp_gpe_context* vpp_context); VAStatus -gen75_gpe_process_picture(VADriverContextP ctx, - struct vpp_gpe_context * vpp_context); +vpp_gpe_process_picture(VADriverContextP ctx, + struct vpp_gpe_context * vpp_context); #endif diff --git a/src/gen75_vpp_vebox.c b/src/gen75_vpp_vebox.c index 50df627..1113c90 100644 --- a/src/gen75_vpp_vebox.c +++ b/src/gen75_vpp_vebox.c @@ -23,6 +23,7 @@ * * Authors: * Li Xiaowei <xiaowei.a.li@intel.com> + * Li Zhong <zhong.li@intel.com> */ #include <stdio.h> @@ -52,18 +53,6 @@ i965_DeriveImage(VADriverContextP ctx, VABufferID surface, VAImage *out_image); extern VAStatus i965_DestroyImage(VADriverContextP ctx, VAImageID image); -extern VAStatus -i965_DestroySurfaces(VADriverContextP ctx, - VASurfaceID *surface_list, - int num_surfaces); - -extern VAStatus -i965_CreateSurfaces(VADriverContextP ctx, - int width, - int height, - int format, - int num_surfaces, - VASurfaceID *surfaces); VAStatus vpp_surface_convert(VADriverContextP ctx, struct object_surface *src_obj_surf, @@ -104,8 +93,8 @@ VAStatus vpp_surface_scaling(VADriverContextP ctx, VAStatus va_status = VA_STATUS_SUCCESS; int flags = I965_PP_FLAG_AVS; - assert(src_obj_surf->fourcc == VA_FOURCC('N','V','1','2')); - assert(dst_obj_surf->fourcc == VA_FOURCC('N','V','1','2')); + assert(src_obj_surf->fourcc == VA_FOURCC_NV12); + assert(dst_obj_surf->fourcc == VA_FOURCC_NV12); VARectangle src_rect, dst_rect; src_rect.x = 0; @@ -130,9 +119,11 @@ VAStatus vpp_surface_scaling(VADriverContextP ctx, void hsw_veb_dndi_table(VADriverContextP ctx, struct intel_vebox_context *proc_ctx) { + struct i965_driver_data *i965 = i965_driver_data(ctx); unsigned int* p_table ; int progressive_dn = 1; int dndi_top_first = 0; + int motion_compensated_enable = 0; if (proc_ctx->filters_mask & VPP_DNDI_DI) { VAProcFilterParameterBufferDeinterlacing *di_param = @@ -140,7 +131,8 @@ void hsw_veb_dndi_table(VADriverContextP ctx, struct intel_vebox_context *proc_c assert(di_param); progressive_dn = 0; - dndi_top_first = !(di_param->flags & VA_DEINTERLACING_BOTTOM_FIELD_FIRST); + dndi_top_first = !(di_param->flags & VA_DEINTERLACING_BOTTOM_FIELD); + motion_compensated_enable = (di_param->algorithm == VAProcDeinterlacingMotionCompensated); } /* @@ -152,7 +144,9 @@ void hsw_veb_dndi_table(VADriverContextP ctx, struct intel_vebox_context *proc_c */ p_table = (unsigned int *)proc_ctx->dndi_state_table.ptr; - *p_table ++ = 0; // reserved . w0 + if (IS_HASWELL(i965->intel.device_info)) + *p_table ++ = 0; // reserved . w0 + *p_table ++ = ( 140 << 24 | // denoise STAD threshold . w1 192 << 16 | // dnmh_history_max 0 << 12 | // reserved @@ -199,7 +193,7 @@ void hsw_veb_dndi_table(VADriverContextP ctx, struct intel_vebox_context *proc_c 100<< 16 | // FMD #2 vertical difference th 0 << 14 | // CAT th1 2 << 8 | // FMD tear threshold - 0 << 7 | // MCDI Enable, use motion compensated deinterlace algorithm + motion_compensated_enable << 7 | // MCDI Enable, use motion compensated deinterlace algorithm progressive_dn << 6 | // progressive DN 0 << 4 | // reserved dndi_top_first << 3 | // DN/DI Top First @@ -222,6 +216,8 @@ void hsw_veb_dndi_table(VADriverContextP ctx, struct intel_vebox_context *proc_c 13 << 6 | // chr temp diff th 7 ); // chr temp diff low + if (IS_GEN8(i965->intel.device_info)) + *p_table ++ = 0; // parameters for hot pixel, } void hsw_veb_iecp_std_table(VADriverContextP ctx, struct intel_vebox_context *proc_ctx) @@ -233,40 +229,179 @@ void hsw_veb_iecp_std_table(VADriverContextP ctx, struct intel_vebox_context *pr if(!(proc_ctx->filters_mask & VPP_IECP_STD_STE)){ memset(p_table, 0, 29 * 4); }else{ - *p_table ++ = 0x9a6e39f0; - *p_table ++ = 0x400c0000; - *p_table ++ = 0x00001180; - *p_table ++ = 0xfe2f2e00; - *p_table ++ = 0x000000ff; - - *p_table ++ = 0x00140000; - *p_table ++ = 0xd82e0000; - *p_table ++ = 0x8285ecec; - *p_table ++ = 0x00008282; - *p_table ++ = 0x00000000; - - *p_table ++ = 0x02117000; - *p_table ++ = 0xa38fec96; - *p_table ++ = 0x0000c8c8; - *p_table ++ = 0x00000000; - *p_table ++ = 0x01478000; - - *p_table ++ = 0x0007c306; - *p_table ++ = 0x00000000; - *p_table ++ = 0x00000000; - *p_table ++ = 0x1c1bd000; - *p_table ++ = 0x00000000; - - *p_table ++ = 0x00000000; - *p_table ++ = 0x00000000; - *p_table ++ = 0x0007cf80; - *p_table ++ = 0x00000000; - *p_table ++ = 0x00000000; - - *p_table ++ = 0x1c080000; - *p_table ++ = 0x00000000; - *p_table ++ = 0x00000000; - *p_table ++ = 0x00000000; + //DWord 0 + *p_table ++ = ( 154 << 24 | // V_Mid + 110 << 16 | // U_Mid + 14 << 10 | // Hue_Max + 31 << 4 | // Sat_Max + 0 << 3 | // Reserved + 0 << 2 | // Output Control is set to output the 1=STD score /0=Output Pixels + 1 << 1 | // Set STE Enable + 1 ); // Set STD Enable + + //DWord 1 + *p_table ++ = ( 0 << 31 | // Reserved + 4 << 28 | // Diamond Margin + 0 << 21 | // Diamond_du + 3 << 18 | // HS_Margin + 79 << 10 | // Cos(alpha) + 0 << 8 | // Reserved + 101 ); // Sin(alpha) + + //DWord 2 + *p_table ++ = ( 0 << 21 | // Reserved + 100 << 13 | // Diamond_alpha + 35 << 7 | // Diamond_Th + 0 ); + + //DWord 3 + *p_table ++ = ( 254 << 24 | // Y_point_3 + 47 << 16 | // Y_point_2 + 46 << 8 | // Y_point_1 + 1 << 7 | // VY_STD_Enable + 0 ); // Reserved + + //DWord 4 + *p_table ++ = ( 0 << 18 | // Reserved + 31 << 13 | // Y_slope_2 + 31 << 8 | // Y_slope_1 + 255 ); // Y_point_4 + + //DWord 5 + *p_table ++ = ( 400 << 16 | // INV_Skin_types_margin = 20* Skin_Type_margin => 20*20 + 3300 ); // INV_Margin_VYL => 1/Margin_VYL + + //DWord 6 + *p_table ++ = ( 216 << 24 | // P1L + 46 << 16 | // P0L + 1600 ); // INV_Margin_VYU + + //DWord 7 + *p_table ++ = ( 130 << 24 | // B1L + 133 << 16 | // B0L + 236 << 8 | // P3L + 236 ); // P2L + + //DWord 8 + *p_table ++ = ( 0 << 27 | // Reserved + 0x7FB << 16 | // S0L (11 bits, Default value: -5 = FBh, pad it with 1s to make it 11bits) + 130 << 8 | // B3L + 130 ); + + //DWord 9 + *p_table ++ = ( 0 << 22 | // Reserved + 0 << 11 | // S2L + 0); // S1L + + //DWord 10 + *p_table ++ = ( 0 << 27 | // Reserved + 66 << 19 | // P1U + 46 << 11 | // P0U + 0 ); // S3 + + //DWord 11 + *p_table ++ = ( 163 << 24 | // B1U + 143 << 16 | // B0U + 236 << 8 | // P3U + 150 ); // P2U + + //DWord 12 + *p_table ++ = ( 0 << 27 | // Reserved + 256 << 16 | // S0U + 200 << 8 | // B3U + 200 ); // B2U + + //DWord 13 + *p_table ++ = ( 0 << 22 | // Reserved + 0x74D << 11 | // S2U (11 bits, Default value -179 = F4Dh) + 113 ); // S1U + + //DWoord 14 + *p_table ++ = ( 0 << 28 | // Reserved + 20 << 20 | // Skin_types_margin + 120 << 12 | // Skin_types_thresh + 1 << 11 | // Skin_Types_Enable + 0 ); // S3U + + //DWord 15 + *p_table ++ = ( 0 << 31 | // Reserved + 0x3F8 << 21 | // SATB1 (10 bits, default 8, optimized value -8) + 31 << 14 | // SATP3 + 6 << 7 | // SATP2 + 0x7A ); // SATP1 (7 bits, default 6, optimized value -6) + + //DWord 16 + *p_table ++ = ( 0 << 31 | // Reserved + 297 << 20 | // SATS0 + 124 << 10 | // SATB3 + 8 ); // SATB2 + + //DWord 17 + *p_table ++ = ( 0 << 22 | // Reserved + 297 << 11 | // SATS2 + 85 ); // SATS1 + + //DWord 18 + *p_table ++ = ( 14 << 25 | // HUEP3 + 6 << 18 | // HUEP2 + 0x7A << 11 | // HUEP1 (7 bits, default value -6 = 7Ah) + 256 ); // SATS3 + + //DWord 19 + *p_table ++ = ( 0 << 30 | // Reserved + 256 << 20 | // HUEB3 + 8 << 10 | // HUEB2 + 0x3F8 ); // HUEB1 (10 bits, default value 8, optimized value -8) + + //DWord 20 + *p_table ++ = ( 0 << 22 | // Reserved + 85 << 11 | // HUES1 + 384 ); // HUES + + //DWord 21 + *p_table ++ = ( 0 << 22 | // Reserved + 256 << 11 | // HUES3 + 384 ); // HUES2 + + //DWord 22 + *p_table ++ = ( 0 << 31 | // Reserved + 0 << 21 | // SATB1_DARK + 31 << 14 | // SATP3_DARK + 31 << 7 | // SATP2_DARK + 0x7B ); // SATP1_DARK (7 bits, default value -11 = FF5h, optimized value -5) + + //DWord 23 + *p_table ++ = ( 0 << 31 | // Reserved + 305 << 20 | // SATS0_DARK + 124 << 10 | // SATB3_DARK + 124 ); // SATB2_DARK + + //DWord 24 + *p_table ++ = ( 0 << 22 | // Reserved + 256 << 11 | // SATS2_DARK + 220 ); // SATS1_DARK + + //DWord 25 + *p_table ++ = ( 14 << 25 | // HUEP3_DARK + 14 << 18 | // HUEP2_DARK + 14 << 11 | // HUEP1_DARK + 256 ); // SATS3_DARK + + //DWord 26 + *p_table ++ = ( 0 << 30 | // Reserved + 56 << 20 | // HUEB3_DARK + 56 << 10 | // HUEB2_DARK + 56 ); // HUEB1_DARK + + //DWord 27 + *p_table ++ = ( 0 << 22 | // Reserved + 256 << 11 | // HUES1_DARK + 256 ); // HUES0_DARK + + //DWord 28 + *p_table ++ = ( 0 << 22 | // Reserved + 256 << 11 | // HUES3_DARK + 256 ); // HUES2_DARK } } @@ -389,11 +524,11 @@ void hsw_veb_iecp_csc_table(VADriverContextP ctx, struct intel_vebox_context *pr return; } - if(proc_ctx->fourcc_input == VA_FOURCC('R','G','B','A') && - (proc_ctx->fourcc_output == VA_FOURCC('N','V','1','2') || - proc_ctx->fourcc_output == VA_FOURCC('Y','V','1','2') || - proc_ctx->fourcc_output == VA_FOURCC('Y','V','Y','2') || - proc_ctx->fourcc_output == VA_FOURCC('A','Y','U','V'))) { + if(proc_ctx->fourcc_input == VA_FOURCC_RGBA && + (proc_ctx->fourcc_output == VA_FOURCC_NV12 || + proc_ctx->fourcc_output == VA_FOURCC_YV12 || + proc_ctx->fourcc_output == VA_FOURCC_YVY2 || + proc_ctx->fourcc_output == VA_FOURCC_AYUV)) { tran_coef[0] = 0.257; tran_coef[1] = 0.504; @@ -410,12 +545,11 @@ void hsw_veb_iecp_csc_table(VADriverContextP ctx, struct intel_vebox_context *pr u_coef[2] = 128 * 4; is_transform_enabled = 1; - }else if((proc_ctx->fourcc_input == VA_FOURCC('N','V','1','2') || - proc_ctx->fourcc_input == VA_FOURCC('Y','V','1','2') || - proc_ctx->fourcc_input == VA_FOURCC('Y','U','Y','2') || - proc_ctx->fourcc_input == VA_FOURCC('A','Y','U','V'))&& - proc_ctx->fourcc_output == VA_FOURCC('R','G','B','A')) { - + }else if((proc_ctx->fourcc_input == VA_FOURCC_NV12 || + proc_ctx->fourcc_input == VA_FOURCC_YV12 || + proc_ctx->fourcc_input == VA_FOURCC_YUY2 || + proc_ctx->fourcc_input == VA_FOURCC_AYUV) && + proc_ctx->fourcc_output == VA_FOURCC_RGBA) { tran_coef[0] = 1.164; tran_coef[1] = 0.000; tran_coef[2] = 1.569; @@ -543,7 +677,8 @@ void hsw_veb_state_command(VADriverContextP ctx, struct intel_vebox_context *pro if (di_param->algorithm == VAProcDeinterlacingBob) is_first_frame = 1; - if (di_param->algorithm == VAProcDeinterlacingMotionAdaptive && + if ((di_param->algorithm == VAProcDeinterlacingMotionAdaptive || + di_param->algorithm == VAProcDeinterlacingMotionCompensated) && proc_ctx->frame_order != -1) di_output_frames_flag = 0; /* Output both Current Frame and Previous Frame */ } @@ -750,7 +885,7 @@ void hsw_veb_resource_prepare(VADriverContextP ctx, } if(obj_surf_in->bo == NULL){ - input_fourcc = VA_FOURCC('N','V','1','2'); + input_fourcc = VA_FOURCC_NV12; input_sampling = SUBSAMPLE_YUV420; input_tiling = 0; i965_check_alloc_surface_bo(ctx, obj_surf_in, input_tiling, input_fourcc, input_sampling); @@ -762,7 +897,7 @@ void hsw_veb_resource_prepare(VADriverContextP ctx, } if(obj_surf_out->bo == NULL){ - output_fourcc = VA_FOURCC('N','V','1','2'); + output_fourcc = VA_FOURCC_NV12; output_sampling = SUBSAMPLE_YUV420; output_tiling = 0; i965_check_alloc_surface_bo(ctx, obj_surf_out, output_tiling, output_fourcc, output_sampling); @@ -844,8 +979,9 @@ void hsw_veb_resource_prepare(VADriverContextP ctx, } -void hsw_veb_surface_reference(VADriverContextP ctx, - struct intel_vebox_context *proc_ctx) +static VAStatus +hsw_veb_surface_reference(VADriverContextP ctx, + struct intel_vebox_context *proc_ctx) { struct object_surface * obj_surf; VEBFrameStore tmp_store; @@ -870,7 +1006,8 @@ void hsw_veb_surface_reference(VADriverContextP ctx, (VAProcFilterParameterBufferDeinterlacing *)proc_ctx->filter_di; if (di_param && - di_param->algorithm == VAProcDeinterlacingMotionAdaptive) { + (di_param->algorithm == VAProcDeinterlacingMotionAdaptive || + di_param->algorithm == VAProcDeinterlacingMotionCompensated)) { if ((proc_ctx->filters_mask & VPP_DNDI_DN) && proc_ctx->frame_order == 0) { /* DNDI */ tmp_store = proc_ctx->frame_store[FRAME_OUT_CURRENT_DN]; @@ -880,9 +1017,14 @@ void hsw_veb_surface_reference(VADriverContextP ctx, VAProcPipelineParameterBuffer *pipe = proc_ctx->pipeline_param; struct object_surface *obj_surf = NULL; struct i965_driver_data * const i965 = i965_driver_data(ctx); - - assert(pipe->num_forward_references == 1); - assert(pipe->forward_references[0] != VA_INVALID_ID); + + if (!pipe || + !pipe->num_forward_references || + pipe->forward_references[0] == VA_INVALID_ID) { + WARN_ONCE("A forward temporal reference is needed for Motion adaptive/compensated deinterlacing !!!\n"); + + return VA_STATUS_ERROR_INVALID_PARAMETER; + } obj_surf = SURFACE(pipe->forward_references[0]); assert(obj_surf && obj_surf->bo); @@ -919,7 +1061,8 @@ void hsw_veb_surface_reference(VADriverContextP ctx, (VAProcFilterParameterBufferDeinterlacing *)proc_ctx->filter_di; if (di_param && - di_param->algorithm == VAProcDeinterlacingMotionAdaptive) { + (di_param->algorithm == VAProcDeinterlacingMotionAdaptive || + di_param->algorithm == VAProcDeinterlacingMotionCompensated)) { if (proc_ctx->frame_order == -1) { proc_ctx->frame_store[FRAME_OUT_CURRENT].surface_id = VA_INVALID_ID; proc_ctx->frame_store[FRAME_OUT_CURRENT].is_internal_surface = 0; @@ -946,6 +1089,8 @@ void hsw_veb_surface_reference(VADriverContextP ctx, proc_ctx->frame_store[FRAME_OUT_CURRENT].obj_surface = obj_surf; proc_ctx->current_output = FRAME_OUT_CURRENT; } + + return VA_STATUS_SUCCESS; } void hsw_veb_surface_unreference(VADriverContextP ctx, @@ -999,17 +1144,17 @@ int hsw_veb_pre_format_convert(VADriverContextP ctx, } /* convert the following format to NV12 format */ - if(obj_surf_input->fourcc == VA_FOURCC('Y','V','1','2') || - obj_surf_input->fourcc == VA_FOURCC('I','4','2','0') || - obj_surf_input->fourcc == VA_FOURCC('I','M','C','1') || - obj_surf_input->fourcc == VA_FOURCC('I','M','C','3') || - obj_surf_input->fourcc == VA_FOURCC('R','G','B','A')){ + if(obj_surf_input->fourcc == VA_FOURCC_YV12 || + obj_surf_input->fourcc == VA_FOURCC_I420 || + obj_surf_input->fourcc == VA_FOURCC_IMC1 || + obj_surf_input->fourcc == VA_FOURCC_IMC3 || + obj_surf_input->fourcc == VA_FOURCC_RGBA){ proc_ctx->format_convert_flags |= PRE_FORMAT_CONVERT; - } else if(obj_surf_input->fourcc == VA_FOURCC('A','Y','U','V') || - obj_surf_input->fourcc == VA_FOURCC('Y','U','Y','2') || - obj_surf_input->fourcc == VA_FOURCC('N','V','1','2')){ + } else if(obj_surf_input->fourcc == VA_FOURCC_AYUV || + obj_surf_input->fourcc == VA_FOURCC_YUY2 || + obj_surf_input->fourcc == VA_FOURCC_NV12){ // nothing to do here } else { /* not support other format as input */ @@ -1030,7 +1175,7 @@ int hsw_veb_pre_format_convert(VADriverContextP ctx, if (obj_surf_input_vebox) { proc_ctx->surface_input_vebox_object = obj_surf_input_vebox; - i965_check_alloc_surface_bo(ctx, obj_surf_input_vebox, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surf_input_vebox, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); } } @@ -1038,16 +1183,16 @@ int hsw_veb_pre_format_convert(VADriverContextP ctx, } /* create one temporary NV12 surfaces for conversion*/ - if(obj_surf_output->fourcc == VA_FOURCC('Y','V','1','2') || - obj_surf_output->fourcc == VA_FOURCC('I','4','2','0') || - obj_surf_output->fourcc == VA_FOURCC('I','M','C','1') || - obj_surf_output->fourcc == VA_FOURCC('I','M','C','3') || - obj_surf_output->fourcc == VA_FOURCC('R','G','B','A')) { + if(obj_surf_output->fourcc == VA_FOURCC_YV12 || + obj_surf_output->fourcc == VA_FOURCC_I420 || + obj_surf_output->fourcc == VA_FOURCC_IMC1 || + obj_surf_output->fourcc == VA_FOURCC_IMC3 || + obj_surf_output->fourcc == VA_FOURCC_RGBA) { proc_ctx->format_convert_flags |= POST_FORMAT_CONVERT; - } else if(obj_surf_output->fourcc == VA_FOURCC('A','Y','U','V') || - obj_surf_output->fourcc == VA_FOURCC('Y','U','Y','2') || - obj_surf_output->fourcc == VA_FOURCC('N','V','1','2')){ + } else if(obj_surf_output->fourcc == VA_FOURCC_AYUV || + obj_surf_output->fourcc == VA_FOURCC_YUY2 || + obj_surf_output->fourcc == VA_FOURCC_NV12){ /* Nothing to do here */ } else { /* not support other format as input */ @@ -1069,7 +1214,7 @@ int hsw_veb_pre_format_convert(VADriverContextP ctx, if (obj_surf_output_vebox) { proc_ctx->surface_output_vebox_object = obj_surf_output_vebox; - i965_check_alloc_surface_bo(ctx, obj_surf_output_vebox, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surf_output_vebox, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); } } } @@ -1088,7 +1233,7 @@ int hsw_veb_pre_format_convert(VADriverContextP ctx, if (obj_surf_output_vebox) { proc_ctx->surface_output_scaled_object = obj_surf_output_vebox; - i965_check_alloc_surface_bo(ctx, obj_surf_output_vebox, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surf_output_vebox, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); } } } @@ -1118,7 +1263,7 @@ int hsw_veb_post_format_convert(VADriverContextP ctx, } else if(proc_ctx->format_convert_flags & POST_SCALING_CONVERT) { /* scaling, convert and copy NV12 to YV12/IMC3/IMC2/RGBA output*/ - assert(obj_surface->fourcc == VA_FOURCC('N','V','1','2')); + assert(obj_surface->fourcc == VA_FOURCC_NV12); /* first step :surface scaling */ vpp_surface_scaling(ctx,proc_ctx->surface_output_scaled_object, obj_surface); @@ -1126,13 +1271,13 @@ int hsw_veb_post_format_convert(VADriverContextP ctx, /* second step: color format convert and copy to output */ obj_surface = proc_ctx->surface_output_object; - if(obj_surface->fourcc == VA_FOURCC('N','V','1','2') || - obj_surface->fourcc == VA_FOURCC('Y','V','1','2') || - obj_surface->fourcc == VA_FOURCC('I','4','2','0') || - obj_surface->fourcc == VA_FOURCC('Y','U','Y','2') || - obj_surface->fourcc == VA_FOURCC('I','M','C','1') || - obj_surface->fourcc == VA_FOURCC('I','M','C','3') || - obj_surface->fourcc == VA_FOURCC('R','G','B','A')) { + if(obj_surface->fourcc == VA_FOURCC_NV12 || + obj_surface->fourcc == VA_FOURCC_YV12 || + obj_surface->fourcc == VA_FOURCC_I420 || + obj_surface->fourcc == VA_FOURCC_YUY2 || + obj_surface->fourcc == VA_FOURCC_IMC1 || + obj_surface->fourcc == VA_FOURCC_IMC3 || + obj_surface->fourcc == VA_FOURCC_RGBA) { vpp_surface_convert(ctx, proc_ctx->surface_output_object, proc_ctx->surface_output_scaled_object); }else { assert(0); @@ -1172,6 +1317,9 @@ VAStatus gen75_vebox_process_picture(VADriverContextP ctx, proc_ctx->filters_mask |= VPP_IECP_PRO_AMP; proc_ctx->filter_iecp_amp = filter; proc_ctx->filter_iecp_amp_num_elements = obj_buf->num_elements; + } else if (filter->type == VAProcFilterSkinToneEnhancement) { + proc_ctx->filters_mask |= VPP_IECP_STD_STE; + proc_ctx->filter_iecp_std = filter; } } @@ -1297,3 +1445,215 @@ struct intel_vebox_context * gen75_vebox_context_init(VADriverContextP ctx) return proc_context; } +void bdw_veb_state_command(VADriverContextP ctx, struct intel_vebox_context *proc_ctx) +{ + struct intel_batchbuffer *batch = proc_ctx->batch; + unsigned int is_dn_enabled = (proc_ctx->filters_mask & 0x01)? 1: 0; + unsigned int is_di_enabled = (proc_ctx->filters_mask & 0x02)? 1: 0; + unsigned int is_iecp_enabled = (proc_ctx->filters_mask & 0xff00)?1:0; + unsigned int is_first_frame = !!((proc_ctx->frame_order == -1) && + (is_di_enabled || + is_dn_enabled)); + unsigned int di_output_frames_flag = 2; /* Output Current Frame Only */ + + if(proc_ctx->fourcc_input != proc_ctx->fourcc_output || + (is_dn_enabled == 0 && is_di_enabled == 0)){ + is_iecp_enabled = 1; + } + + if (is_di_enabled) { + VAProcFilterParameterBufferDeinterlacing *di_param = + (VAProcFilterParameterBufferDeinterlacing *)proc_ctx->filter_di; + + assert(di_param); + + if (di_param->algorithm == VAProcDeinterlacingBob) + is_first_frame = 1; + + if ((di_param->algorithm == VAProcDeinterlacingMotionAdaptive || + di_param->algorithm == VAProcDeinterlacingMotionCompensated) && + proc_ctx->frame_order != -1) + di_output_frames_flag = 0; /* Output both Current Frame and Previous Frame */ + } + + BEGIN_VEB_BATCH(batch, 0xc); + OUT_VEB_BATCH(batch, VEB_STATE | (0xc - 2)); + OUT_VEB_BATCH(batch, + 0 << 25 | // state surface control bits + 0 << 23 | // reserved. + 0 << 22 | // gamut expansion position + 0 << 15 | // reserved. + 0 << 14 | // single slice vebox enable + 0 << 13 | // hot pixel filter enable + 0 << 12 | // alpha plane enable + 0 << 11 | // vignette enable + 0 << 10 | // demosaic enable + di_output_frames_flag << 8 | // DI output frame + 1 << 7 | // 444->422 downsample method + 1 << 6 | // 422->420 downsample method + is_first_frame << 5 | // DN/DI first frame + is_di_enabled << 4 | // DI enable + is_dn_enabled << 3 | // DN enable + is_iecp_enabled << 2 | // global IECP enabled + 0 << 1 | // ColorGamutCompressionEnable + 0 ) ; // ColorGamutExpansionEnable. + + OUT_RELOC(batch, + proc_ctx->dndi_state_table.bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + + OUT_VEB_BATCH(batch, 0); + + OUT_RELOC(batch, + proc_ctx->iecp_state_table.bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + + OUT_VEB_BATCH(batch, 0); + + OUT_RELOC(batch, + proc_ctx->gamut_state_table.bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + + OUT_VEB_BATCH(batch, 0); + + OUT_RELOC(batch, + proc_ctx->vertex_state_table.bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + + OUT_VEB_BATCH(batch, 0); + + OUT_VEB_BATCH(batch, 0);/*caputre pipe state pointer*/ + OUT_VEB_BATCH(batch, 0); + + ADVANCE_VEB_BATCH(batch); +} + +void bdw_veb_dndi_iecp_command(VADriverContextP ctx, struct intel_vebox_context *proc_ctx) +{ + struct intel_batchbuffer *batch = proc_ctx->batch; + unsigned char frame_ctrl_bits = 0; + unsigned int startingX = 0; + unsigned int endingX = (proc_ctx->width_input + 63 ) / 64 * 64; + + BEGIN_VEB_BATCH(batch, 0x14); + OUT_VEB_BATCH(batch, VEB_DNDI_IECP_STATE | (0x14 - 2));//DWord 0 + OUT_VEB_BATCH(batch, + startingX << 16 | + endingX -1);//DWord 1 + + OUT_RELOC(batch, + proc_ctx->frame_store[FRAME_IN_CURRENT].obj_surface->bo, + I915_GEM_DOMAIN_RENDER, 0, frame_ctrl_bits);//DWord 2 + OUT_VEB_BATCH(batch,0);//DWord 3 + + OUT_RELOC(batch, + proc_ctx->frame_store[FRAME_IN_PREVIOUS].obj_surface->bo, + I915_GEM_DOMAIN_RENDER, 0, frame_ctrl_bits);//DWord 4 + OUT_VEB_BATCH(batch,0);//DWord 5 + + OUT_RELOC(batch, + proc_ctx->frame_store[FRAME_IN_STMM].obj_surface->bo, + I915_GEM_DOMAIN_RENDER, 0, frame_ctrl_bits);//DWord 6 + OUT_VEB_BATCH(batch,0);//DWord 7 + + OUT_RELOC(batch, + proc_ctx->frame_store[FRAME_OUT_STMM].obj_surface->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, frame_ctrl_bits);//DWord 8 + OUT_VEB_BATCH(batch,0);//DWord 9 + + OUT_RELOC(batch, + proc_ctx->frame_store[FRAME_OUT_CURRENT_DN].obj_surface->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, frame_ctrl_bits);//DWord 10 + OUT_VEB_BATCH(batch,0);//DWord 11 + + OUT_RELOC(batch, + proc_ctx->frame_store[FRAME_OUT_CURRENT].obj_surface->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, frame_ctrl_bits);//DWord 12 + OUT_VEB_BATCH(batch,0);//DWord 13 + + OUT_RELOC(batch, + proc_ctx->frame_store[FRAME_OUT_PREVIOUS].obj_surface->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, frame_ctrl_bits);//DWord 14 + OUT_VEB_BATCH(batch,0);//DWord 15 + + OUT_RELOC(batch, + proc_ctx->frame_store[FRAME_OUT_STATISTIC].obj_surface->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, frame_ctrl_bits);//DWord 16 + OUT_VEB_BATCH(batch,0);//DWord 17 + + OUT_VEB_BATCH(batch,0);//DWord 18 + OUT_VEB_BATCH(batch,0);//DWord 19 + + ADVANCE_VEB_BATCH(batch); +} + +VAStatus gen8_vebox_process_picture(VADriverContextP ctx, + struct intel_vebox_context *proc_ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + + VAProcPipelineParameterBuffer *pipe = proc_ctx->pipeline_param; + VAProcFilterParameterBuffer* filter = NULL; + struct object_buffer *obj_buf = NULL; + unsigned int i; + + for (i = 0; i < pipe->num_filters; i ++) { + obj_buf = BUFFER(pipe->filters[i]); + + assert(obj_buf && obj_buf->buffer_store); + + if (!obj_buf || !obj_buf->buffer_store) + goto error; + + filter = (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer; + + if (filter->type == VAProcFilterNoiseReduction) { + proc_ctx->filters_mask |= VPP_DNDI_DN; + proc_ctx->filter_dn = filter; + } else if (filter->type == VAProcFilterDeinterlacing) { + proc_ctx->filters_mask |= VPP_DNDI_DI; + proc_ctx->filter_di = filter; + } else if (filter->type == VAProcFilterColorBalance) { + proc_ctx->filters_mask |= VPP_IECP_PRO_AMP; + proc_ctx->filter_iecp_amp = filter; + proc_ctx->filter_iecp_amp_num_elements = obj_buf->num_elements; + } else if (filter->type == VAProcFilterSkinToneEnhancement) { + proc_ctx->filters_mask |= VPP_IECP_STD_STE; + proc_ctx->filter_iecp_std = filter; + } + } + + hsw_veb_pre_format_convert(ctx, proc_ctx); + hsw_veb_surface_reference(ctx, proc_ctx); + + if (proc_ctx->frame_order == -1) { + hsw_veb_resource_prepare(ctx, proc_ctx); + } + + if (proc_ctx->format_convert_flags & POST_COPY_CONVERT) { + assert(proc_ctx->frame_order == 1); + /* directly copy the saved frame in the second call */ + } else { + intel_batchbuffer_start_atomic_veb(proc_ctx->batch, 0x1000); + intel_batchbuffer_emit_mi_flush(proc_ctx->batch); + hsw_veb_surface_state(ctx, proc_ctx, INPUT_SURFACE); + hsw_veb_surface_state(ctx, proc_ctx, OUTPUT_SURFACE); + hsw_veb_state_table_setup(ctx, proc_ctx); + + bdw_veb_state_command(ctx, proc_ctx); + bdw_veb_dndi_iecp_command(ctx, proc_ctx); + intel_batchbuffer_end_atomic(proc_ctx->batch); + intel_batchbuffer_flush(proc_ctx->batch); + } + + hsw_veb_post_format_convert(ctx, proc_ctx); + // hsw_veb_surface_unreference(ctx, proc_ctx); + + proc_ctx->frame_order = (proc_ctx->frame_order + 1) % 2; + + return VA_STATUS_SUCCESS; + +error: + return VA_STATUS_ERROR_INVALID_PARAMETER; +} + diff --git a/src/gen75_vpp_vebox.h b/src/gen75_vpp_vebox.h index f1061c7..a78a165 100644 --- a/src/gen75_vpp_vebox.h +++ b/src/gen75_vpp_vebox.h @@ -150,4 +150,7 @@ void gen75_vebox_context_destroy(VADriverContextP ctx, struct intel_vebox_context * gen75_vebox_context_init(VADriverContextP ctx); +VAStatus gen8_vebox_process_picture(VADriverContextP ctx, + struct intel_vebox_context *proc_ctx); + #endif diff --git a/src/gen7_mfc.c b/src/gen7_mfc.c index 8572b89..78b1096 100644 --- a/src/gen7_mfc.c +++ b/src/gen7_mfc.c @@ -41,12 +41,16 @@ #include "gen6_mfc.h" #include "gen6_vme.h" +#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7) +#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) +#define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index) + extern void gen6_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context); extern void gen6_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, - struct intel_encoder_context *encoder_context); + struct intel_encoder_context *encoder_context); extern void gen6_mfc_init(VADriverContextP ctx, struct encode_state *encode_state, @@ -208,13 +212,13 @@ gen7_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state, BEGIN_BCS_BATCH(batch, 16); OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2)); - /*DW1 frame size */ + /*DW1 frame size */ OUT_BCS_BATCH(batch, - ((width_in_mbs * height_in_mbs) & 0xFFFF)); + ((width_in_mbs * height_in_mbs - 1) & 0xFFFF)); OUT_BCS_BATCH(batch, ((height_in_mbs - 1) << 16) | ((width_in_mbs - 1) << 0)); - /*DW3 Qp setting */ + /*DW3 Qp setting */ OUT_BCS_BATCH(batch, (0 << 24) | /* Second Chroma QP Offset */ (0 << 16) | /* Chroma QP Offset */ @@ -240,20 +244,20 @@ gen7_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state, (1 << 2) | /* Frame MB only flag */ (0 << 1) | /* MBAFF mode is in active */ (0 << 0)); /* Field picture flag */ - /*DW5 trequllis quantization */ + /*DW5 trequllis quantization */ OUT_BCS_BATCH(batch, 0); /* Mainly about MB rate control and debug, just ignoring */ OUT_BCS_BATCH(batch, /* Inter and Intra Conformance Max size limit */ (0xBB8 << 16) | /* InterMbMaxSz */ (0xEE8) ); /* IntraMbMaxSz */ - /* DW7 */ + /* DW7 */ OUT_BCS_BATCH(batch, 0); /* Reserved */ OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */ OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */ - /* DW10 frame bit setting */ + /* DW10 frame bit setting */ OUT_BCS_BATCH(batch, 0x8C000000); OUT_BCS_BATCH(batch, 0x00010000); OUT_BCS_BATCH(batch, 0); - /* DW13 Ref setting */ + /* DW13 Ref setting */ OUT_BCS_BATCH(batch, 0x02010100); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); @@ -373,17 +377,19 @@ va_to_gen7_mpeg2_picture_type[3] = { static void gen7_mfc_mpeg2_pic_state(VADriverContextP ctx, - struct intel_encoder_context *encoder_context, - struct encode_state *encode_state) + struct intel_encoder_context *encoder_context, + struct encode_state *encode_state) { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; VAEncPictureParameterBufferMPEG2 *pic_param; int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; int height_in_mbs = (mfc_context->surface_state.height + 15) / 16; + VAEncSliceParameterBufferMPEG2 *slice_param = NULL; assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer); pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer; + slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer; BEGIN_BCS_BATCH(batch, 13); OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2)); @@ -408,7 +414,12 @@ gen7_mfc_mpeg2_pic_state(VADriverContextP ctx, 1 << 31 | /* slice concealment */ (height_in_mbs - 1) << 16 | (width_in_mbs - 1)); - OUT_BCS_BATCH(batch, 0); + + if (slice_param && slice_param->quantiser_scale_code >= 14) + OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12)); + else + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0xFFF << 16 | /* InterMBMaxSize */ @@ -427,7 +438,7 @@ static void gen7_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { unsigned char intra_qm[64] = { - 8, 16, 19, 22, 26, 27, 29, 34, + 8, 16, 19, 22, 26, 27, 29, 34, 16, 16, 22, 24, 27, 29, 34, 37, 19, 22, 26, 27, 29, 34, 34, 38, 22, 22, 26, 27, 29, 34, 37, 40, @@ -456,14 +467,14 @@ static void gen7_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { unsigned short intra_fqm[64] = { - 65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, - 65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d, - 65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23, - 65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26, - 65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e, - 65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38, - 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45, - 65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53, + 65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, + 65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d, + 65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23, + 65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26, + 65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e, + 65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38, + 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45, + 65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53, }; unsigned short non_intra_fqm[64] = { @@ -483,14 +494,14 @@ gen7_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *enc static void gen7_mfc_mpeg2_slicegroup_state(VADriverContextP ctx, - struct intel_encoder_context *encoder_context, - int x, int y, - int next_x, int next_y, - int is_fisrt_slice_group, - int is_last_slice_group, - int intra_slice, - int qp, - struct intel_batchbuffer *batch) + struct intel_encoder_context *encoder_context, + int x, int y, + int next_x, int next_y, + int is_fisrt_slice_group, + int is_last_slice_group, + int intra_slice, + int qp, + struct intel_batchbuffer *batch) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -530,18 +541,18 @@ gen7_mfc_mpeg2_slicegroup_state(VADriverContextP ctx, static int gen7_mfc_mpeg2_pak_object_intra(VADriverContextP ctx, - struct intel_encoder_context *encoder_context, - int x, int y, - int first_mb_in_slice, - int last_mb_in_slice, - int first_mb_in_slice_group, - int last_mb_in_slice_group, - int mb_type, - int qp_scale_code, - int coded_block_pattern, - unsigned char target_size_in_word, - unsigned char max_size_in_word, - struct intel_batchbuffer *batch) + struct intel_encoder_context *encoder_context, + int x, int y, + int first_mb_in_slice, + int last_mb_in_slice, + int first_mb_in_slice_group, + int last_mb_in_slice_group, + int mb_type, + int qp_scale_code, + int coded_block_pattern, + unsigned char target_size_in_word, + unsigned char max_size_in_word, + struct intel_batchbuffer *batch) { int len_in_dwords = 9; @@ -627,19 +638,19 @@ mpeg2_motion_vector(int mv, int pos, int display_max, int f_code) static int gen7_mfc_mpeg2_pak_object_inter(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - unsigned int *msg, - int width_in_mbs, int height_in_mbs, - int x, int y, - int first_mb_in_slice, - int last_mb_in_slice, - int first_mb_in_slice_group, - int last_mb_in_slice_group, - int qp_scale_code, - unsigned char target_size_in_word, - unsigned char max_size_in_word, - struct intel_batchbuffer *batch) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + unsigned int *msg, + int width_in_mbs, int height_in_mbs, + int x, int y, + int first_mb_in_slice, + int last_mb_in_slice, + int first_mb_in_slice_group, + int last_mb_in_slice_group, + int qp_scale_code, + unsigned char target_size_in_word, + unsigned char max_size_in_word, + struct intel_batchbuffer *batch) { VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer; int len_in_dwords = 9; @@ -697,9 +708,9 @@ gen7_mfc_mpeg2_pak_object_inter(VADriverContextP ctx, static void gen7_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - struct intel_batchbuffer *slice_batch) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + struct intel_batchbuffer *slice_batch) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS); @@ -751,11 +762,11 @@ gen7_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx, static void gen7_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - int slice_index, - VAEncSliceParameterBufferMPEG2 *next_slice_group_param, - struct intel_batchbuffer *slice_batch) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int slice_index, + VAEncSliceParameterBufferMPEG2 *next_slice_group_param, + struct intel_batchbuffer *slice_batch) { struct gen6_vme_context *vme_context = encoder_context->vme_context; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -787,16 +798,16 @@ gen7_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx, } gen7_mfc_mpeg2_slicegroup_state(ctx, - encoder_context, - h_start_pos, - v_start_pos, - h_next_start_pos, - v_next_start_pos, - slice_index == 0, - next_slice_group_param == NULL, - slice_param->is_intra_slice, - slice_param->quantiser_scale_code, - slice_batch); + encoder_context, + h_start_pos, + v_start_pos, + h_next_start_pos, + v_next_start_pos, + slice_index == 0, + next_slice_group_param == NULL, + slice_param->is_intra_slice, + slice_param->quantiser_scale_code, + slice_batch); if (slice_index == 0) gen7_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); @@ -826,36 +837,36 @@ gen7_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx, if (slice_param->is_intra_slice) { gen7_mfc_mpeg2_pak_object_intra(ctx, - encoder_context, - h_pos, v_pos, - first_mb_in_slice, - last_mb_in_slice, - first_mb_in_slice_group, - last_mb_in_slice_group, - 0x1a, - slice_param->quantiser_scale_code, - 0x3f, - 0, - 0xff, - slice_batch); + encoder_context, + h_pos, v_pos, + first_mb_in_slice, + last_mb_in_slice, + first_mb_in_slice_group, + last_mb_in_slice_group, + 0x1a, + slice_param->quantiser_scale_code, + 0x3f, + 0, + 0xff, + slice_batch); } else { msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block); if(msg[32] & INTRA_MB_FLAG_MASK) { - gen7_mfc_mpeg2_pak_object_intra(ctx, - encoder_context, - h_pos, v_pos, - first_mb_in_slice, - last_mb_in_slice, - first_mb_in_slice_group, - last_mb_in_slice_group, - 0x1a, - slice_param->quantiser_scale_code, - 0x3f, - 0, - 0xff, - slice_batch); - } else { + gen7_mfc_mpeg2_pak_object_intra(ctx, + encoder_context, + h_pos, v_pos, + first_mb_in_slice, + last_mb_in_slice, + first_mb_in_slice_group, + last_mb_in_slice_group, + 0x1a, + slice_param->quantiser_scale_code, + 0x3f, + 0, + 0xff, + slice_batch); + } else { gen7_mfc_mpeg2_pak_object_inter(ctx, encode_state, @@ -871,8 +882,8 @@ gen7_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx, 0, 0xff, slice_batch); - } - } + } + } } slice_param++; @@ -913,21 +924,16 @@ gen7_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx, */ static dri_bo * gen7_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { - struct i965_driver_data *i965 = i965_driver_data(ctx); + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct intel_batchbuffer *batch; - VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL; dri_bo *batch_bo; int i; - int buffer_size; - int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; - int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; - buffer_size = width_in_mbs * height_in_mbs * 64; - batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size); + batch = mfc_context->aux_batchbuffer; batch_bo = batch->buffer; for (i = 0; i < encode_state->num_slice_params_ext; i++) { @@ -948,14 +954,15 @@ gen7_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx, dri_bo_reference(batch_bo); intel_batchbuffer_free(batch); + mfc_context->aux_batchbuffer = NULL; return batch_bo; } static void gen7_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -971,8 +978,8 @@ gen7_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx, static void gen7_mfc_mpeg2_pipeline_programing(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; dri_bo *slice_batch_bo; @@ -1002,8 +1009,8 @@ gen7_mfc_mpeg2_pipeline_programing(VADriverContextP ctx, static VAStatus gen7_mfc_mpeg2_prepare(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct object_surface *obj_surface; @@ -1015,7 +1022,7 @@ gen7_mfc_mpeg2_prepare(VADriverContextP ctx, /* reconstructed surface */ obj_surface = encode_state->reconstructed_object; - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); mfc_context->pre_deblocking_output.bo = obj_surface->bo; dri_bo_reference(mfc_context->pre_deblocking_output.bo); mfc_context->surface_state.width = obj_surface->orig_width; @@ -1069,7 +1076,7 @@ gen7_mfc_mpeg2_prepare(VADriverContextP ctx, dri_bo_map(bo, 1); coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual; coded_buffer_segment->mapped = 0; - coded_buffer_segment->codec = CODED_MPEG2; + coded_buffer_segment->codec = encoder_context->codec; dri_bo_unmap(bo); return vaStatus; @@ -1077,8 +1084,8 @@ gen7_mfc_mpeg2_prepare(VADriverContextP ctx, static VAStatus gen7_mfc_mpeg2_encode_picture(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { gen6_mfc_init(ctx, encode_state, encoder_context); gen7_mfc_mpeg2_prepare(ctx, encode_state, encoder_context); @@ -1098,7 +1105,7 @@ gen7_mfc_pipeline(VADriverContextP ctx, VAStatus vaStatus; switch (profile) { - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: vaStatus = gen6_mfc_avc_encode_picture(ctx, encode_state, encoder_context); diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c index 8e0d503..bfb95bf 100755 --- a/src/gen7_mfd.c +++ b/src/gen7_mfd.c @@ -65,6 +65,7 @@ gen7_mfd_init_avc_surface(VADriverContextP ctx, if (!gen7_avc_surface) { gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1); + gen7_avc_surface->frame_store_id = -1; assert((obj_surface->size & 0x3f) == 0); obj_surface->private_data = gen7_avc_surface; } @@ -135,12 +136,16 @@ gen7_mfd_surface_state(VADriverContextP ctx, struct object_surface *obj_surface = decode_state->render_object; unsigned int y_cb_offset; unsigned int y_cr_offset; + unsigned int surface_format; assert(obj_surface); y_cb_offset = obj_surface->y_cb_offset; y_cr_offset = obj_surface->y_cr_offset; + surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ? + MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; + BEGIN_BCS_BATCH(batch, 6); OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); OUT_BCS_BATCH(batch, 0); @@ -148,7 +153,7 @@ gen7_mfd_surface_state(VADriverContextP ctx, ((obj_surface->orig_height - 1) << 18) | ((obj_surface->orig_width - 1) << 4)); OUT_BCS_BATCH(batch, - (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */ + (surface_format << 28) | /* 420 planar YUV surface */ ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */ (0 << 22) | /* surface object control state, ignored */ ((obj_surface->width - 1) << 3) | /* pitch */ @@ -353,7 +358,7 @@ gen7_mfd_avc_img_state(VADriverContextP ctx, BEGIN_BCS_BATCH(batch, 16); OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2)); OUT_BCS_BATCH(batch, - width_in_mbs * height_in_mbs); + (width_in_mbs * height_in_mbs - 1)); OUT_BCS_BATCH(batch, ((height_in_mbs - 1) << 16) | ((width_in_mbs - 1) << 0)); @@ -425,7 +430,7 @@ gen7_mfd_avc_directmode_state(VADriverContextP ctx, struct object_surface *obj_surface; GenAvcSurface *gen7_avc_surface; VAPictureH264 *va_pic; - int i, j; + int i; BEGIN_BCS_BATCH(batch, 69); OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2)); @@ -477,26 +482,14 @@ gen7_mfd_avc_directmode_state(VADriverContextP ctx, /* POC List */ for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) { - if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) { - int found = 0; - - assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL); - - for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) { - va_pic = &pic_param->ReferenceFrames[j]; - - if (va_pic->flags & VA_PICTURE_H264_INVALID) - continue; + obj_surface = gen7_mfd_context->reference_surface[i].obj_surface; - if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) { - found = 1; - break; - } - } + if (obj_surface) { + const VAPictureH264 * const va_pic = avc_find_picture( + obj_surface->base.id, pic_param->ReferenceFrames, + ARRAY_ELEMS(pic_param->ReferenceFrames)); - assert(found == 1); - assert(!(va_pic->flags & VA_PICTURE_H264_INVALID)); - + assert(va_pic != NULL); OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt); OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt); } else { @@ -513,6 +506,15 @@ gen7_mfd_avc_directmode_state(VADriverContextP ctx, } static void +gen7_mfd_avc_phantom_slice_first(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *next_slice_param, + struct gen7_mfd_context *gen7_mfd_context) +{ + gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch); +} + +static void gen7_mfd_avc_slice_state(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param, VASliceParameterBufferH264 *slice_param, @@ -748,7 +750,8 @@ gen7_mfd_avc_decode_init(VADriverContextP ctx, assert(decode_state->pic_param && decode_state->pic_param->buffer); pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer; - intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface); + intel_update_avc_frame_store_index(ctx, decode_state, pic_param, + gen7_mfd_context->reference_surface, &gen7_mfd_context->fs_ctx); width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1; height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */ @@ -756,20 +759,12 @@ gen7_mfd_avc_decode_init(VADriverContextP ctx, /* Current decoded picture */ obj_surface = decode_state->render_object; - obj_surface->flags &= ~SURFACE_REF_DIS_MASK; - obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0); - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); - - /* initial uv component for YUV400 case */ - if (pic_param->seq_fields.bits.chroma_format_idc == 0) { - unsigned int uv_offset = obj_surface->width * obj_surface->height; - unsigned int uv_size = obj_surface->width * obj_surface->height / 2; - - drm_intel_gem_bo_map_gtt(obj_surface->bo); - memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size); - drm_intel_gem_bo_unmap_gtt(obj_surface->bo); - } + if (pic_param->pic_fields.bits.reference_pic_flag) + obj_surface->flags |= SURFACE_REFERENCED; + else + obj_surface->flags &= ~SURFACE_REFERENCED; + avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param); gen7_mfd_init_avc_surface(ctx, pic_param, obj_surface); dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo); @@ -856,6 +851,9 @@ gen7_mfd_avc_decode_picture(VADriverContextP ctx, else next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer; + if (j == 0 && slice_param->first_mb_in_slice) + gen7_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context); + for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) { assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL); assert((slice_param->slice_type == SLICE_TYPE_I) || @@ -906,7 +904,7 @@ gen7_mfd_mpeg2_decode_init(VADriverContextP ctx, /* Current decoded picture */ obj_surface = decode_state->render_object; - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo); gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo; @@ -942,7 +940,7 @@ gen7_mfd_mpeg2_pic_state(VADriverContextP ctx, assert(decode_state->pic_param && decode_state->pic_param->buffer); pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer; - if (IS_HASWELL(i965->intel.device_id)) { + if (IS_HASWELL(i965->intel.device_info)) { /* XXX: disable concealment for now */ slice_concealment_disable_bit = 1; } @@ -1040,10 +1038,35 @@ gen7_mfd_mpeg2_qm_state(VADriverContextP ctx, } } +uint32_t mpeg2_get_slice_data_length(dri_bo *slice_data_bo, VASliceParameterBufferMPEG2 *slice_param) +{ + uint8_t *buf; + uint32_t buf_offset = slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3); + uint32_t buf_size = slice_param->slice_data_size - (slice_param->macroblock_offset >> 3); + uint32_t i; + + dri_bo_map(slice_data_bo, 0); + buf = (uint8_t *)slice_data_bo->virtual + buf_offset; + + for (i = 3; i < buf_size; i++) { + if (buf[i - 3] && + !buf[i - 2] && + !buf[i - 1] && + !buf[i]) { + dri_bo_unmap(slice_data_bo); + return i - 3 + 1; + } + } + + dri_bo_unmap(slice_data_bo); + return buf_size; +} + static void gen7_mfd_mpeg2_bsd_object(VADriverContextP ctx, VAPictureParameterBufferMPEG2 *pic_param, VASliceParameterBufferMPEG2 *slice_param, + dri_bo *slice_data_bo, VASliceParameterBufferMPEG2 *next_slice_param, struct gen7_mfd_context *gen7_mfd_context) { @@ -1074,7 +1097,7 @@ gen7_mfd_mpeg2_bsd_object(VADriverContextP ctx, BEGIN_BCS_BATCH(batch, 5); OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2)); OUT_BCS_BATCH(batch, - slice_param->slice_data_size - (slice_param->macroblock_offset >> 3)); + mpeg2_get_slice_data_length(slice_data_bo, slice_param)); OUT_BCS_BATCH(batch, slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3)); OUT_BCS_BATCH(batch, @@ -1086,7 +1109,7 @@ gen7_mfd_mpeg2_bsd_object(VADriverContextP ctx, (slice_param->macroblock_offset & 0x7)); OUT_BCS_BATCH(batch, (slice_param->quantiser_scale_code << 24) | - (IS_HASWELL(i965->intel.device_id) ? (vpos1 << 8 | hpos1) : 0)); + (IS_HASWELL(i965->intel.device_info) ? (vpos1 << 8 | hpos1) : 0)); ADVANCE_BCS_BATCH(batch); } @@ -1137,7 +1160,7 @@ gen7_mfd_mpeg2_decode_picture(VADriverContextP ctx, else next_slice_param = next_slice_group_param; - gen7_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context); + gen7_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context); slice_param++; } } @@ -1247,7 +1270,7 @@ gen7_mfd_vc1_decode_init(VADriverContextP ctx, /* Current decoded picture */ obj_surface = decode_state->render_object; - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); gen7_mfd_init_vc1_surface(ctx, pic_param, obj_surface); dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo); @@ -1798,13 +1821,13 @@ gen7_mfd_jpeg_decode_init(VADriverContextP ctx, struct object_surface *obj_surface; VAPictureParameterBufferJPEGBaseline *pic_param; int subsampling = SUBSAMPLE_YUV420; - int fourcc = VA_FOURCC('I', 'M', 'C', '3'); + int fourcc = VA_FOURCC_IMC3; pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer; if (pic_param->num_components == 1) { subsampling = SUBSAMPLE_YUV400; - fourcc = VA_FOURCC('Y', '8', '0', '0'); + fourcc = VA_FOURCC_Y800; } else if (pic_param->num_components == 3) { int h1 = pic_param->components[0].h_sampling_factor; int h2 = pic_param->components[1].h_sampling_factor; @@ -1816,31 +1839,31 @@ gen7_mfd_jpeg_decode_init(VADriverContextP ctx, if (h1 == 2 && h2 == 1 && h3 == 1 && v1 == 2 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV420; - fourcc = VA_FOURCC('I', 'M', 'C', '3'); + fourcc = VA_FOURCC_IMC3; } else if (h1 == 2 && h2 == 1 && h3 == 1 && v1 == 1 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV422H; - fourcc = VA_FOURCC('4', '2', '2', 'H'); + fourcc = VA_FOURCC_422H; } else if (h1 == 1 && h2 == 1 && h3 == 1 && v1 == 1 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV444; - fourcc = VA_FOURCC('4', '4', '4', 'P'); + fourcc = VA_FOURCC_444P; } else if (h1 == 4 && h2 == 1 && h3 == 1 && v1 == 1 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV411; - fourcc = VA_FOURCC('4', '1', '1', 'P'); + fourcc = VA_FOURCC_411P; } else if (h1 == 1 && h2 == 1 && h3 == 1 && v1 == 2 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV422V; - fourcc = VA_FOURCC('4', '2', '2', 'V'); + fourcc = VA_FOURCC_422V; } else if (h1 == 2 && h2 == 1 && h3 == 1 && v1 == 2 && v2 == 2 && v3 == 2) { subsampling = SUBSAMPLE_YUV422H; - fourcc = VA_FOURCC('4', '2', '2', 'H'); + fourcc = VA_FOURCC_422H; } else if (h2 == 2 && h2 == 2 && h3 == 2 && v1 == 2 && v2 == 1 && v3 == 1) { subsampling = SUBSAMPLE_YUV422V; - fourcc = VA_FOURCC('4', '2', '2', 'V'); + fourcc = VA_FOURCC_422V; } else assert(0); } else { @@ -2091,18 +2114,6 @@ gen7_mfd_jpeg_bsd_object(VADriverContextP ctx, /* Workaround for JPEG decoding on Ivybridge */ -VAStatus -i965_DestroySurfaces(VADriverContextP ctx, - VASurfaceID *surface_list, - int num_surfaces); -VAStatus -i965_CreateSurfaces(VADriverContextP ctx, - int width, - int height, - int format, - int num_surfaces, - VASurfaceID *surfaces); - static struct { int width; int height; @@ -2145,7 +2156,7 @@ gen7_jpeg_wa_init(VADriverContextP ctx, obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id); assert(obj_surface); - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); gen7_mfd_context->jpeg_wa_surface_object = obj_surface; if (!gen7_mfd_context->jpeg_wa_slice_data_bo) { @@ -2320,7 +2331,7 @@ gen7_jpeg_wa_avc_img_state(VADriverContextP ctx, BEGIN_BCS_BATCH(batch, 16); OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2)); OUT_BCS_BATCH(batch, - width_in_mbs * height_in_mbs); + (width_in_mbs * height_in_mbs - 1)); OUT_BCS_BATCH(batch, ((height_in_mbs - 1) << 16) | ((width_in_mbs - 1) << 0)); @@ -2614,9 +2625,10 @@ gen7_mfd_decode_picture(VADriverContextP ctx, gen7_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context); break; - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: + case VAProfileH264StereoHigh: gen7_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context); break; @@ -2708,9 +2720,10 @@ gen7_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config) gen7_mfd_mpeg2_context_init(ctx, gen7_mfd_context); break; - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: + case VAProfileH264StereoHigh: gen7_mfd_avc_context_init(ctx, gen7_mfd_context); break; default: diff --git a/src/gen7_mfd.h b/src/gen7_mfd.h index e3111ab..af8e960 100644 --- a/src/gen7_mfd.h +++ b/src/gen7_mfd.h @@ -77,6 +77,7 @@ struct gen7_mfd_context VAIQMatrixBufferH264 h264; /* flat scaling lists (default) */ } iq_matrix; + GenFrameStoreContext fs_ctx; GenFrameStore reference_surface[MAX_GEN_REFERENCE_FRAMES]; GenBuffer post_deblocking_output; GenBuffer pre_deblocking_output; @@ -85,6 +86,7 @@ struct gen7_mfd_context GenBuffer bsd_mpc_row_store_scratch_buffer; GenBuffer mpr_row_store_scratch_buffer; GenBuffer bitplane_read_buffer; + GenBuffer segmentation_buffer; VASurfaceID jpeg_wa_surface_id; struct object_surface *jpeg_wa_surface_object; diff --git a/src/gen7_vme.c b/src/gen7_vme.c index 88eb484..dc15445 100644 --- a/src/gen7_vme.c +++ b/src/gen7_vme.c @@ -45,9 +45,6 @@ #endif #define VME_MSG_LENGTH 32 -#define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32) -#define SURFACE_STATE_PADDED_SIZE_GEN7 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7) #define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN7 #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) @@ -126,7 +123,7 @@ static struct i965_kernel gen7_vme_kernels[] = { }; static const uint32_t gen7_vme_mpeg2_inter_frame[][4] = { -#include "shaders/vme/mpeg2_inter_frame.g7b" +#include "shaders/vme/mpeg2_inter_ivb.g7b" }; static const uint32_t gen7_vme_mpeg2_batchbuffer[][4] = { @@ -250,7 +247,6 @@ gen7_vme_surface_setup(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { struct object_surface *obj_surface; - struct i965_driver_data *i965 = i965_driver_data(ctx); /*Setup surfaces state*/ /* current picture for encoding */ @@ -261,43 +257,14 @@ gen7_vme_surface_setup(VADriverContextP ctx, if (!is_intra) { VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; int slice_type; - struct object_surface *slice_obj_surface; - int ref_surface_id; slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI); - if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) { - slice_obj_surface = NULL; - ref_surface_id = slice_param->RefPicList0[0].picture_id; - if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { - slice_obj_surface = SURFACE(ref_surface_id); - } - if (slice_obj_surface && slice_obj_surface->bo) { - obj_surface = slice_obj_surface; - } else { - obj_surface = encode_state->reference_objects[0]; - } - /* reference 0 */ - if (obj_surface && obj_surface->bo) - gen7_vme_source_surface_state(ctx, 1, obj_surface, encoder_context); - } - if (slice_type == SLICE_TYPE_B) { - /* reference 1 */ - slice_obj_surface = NULL; - ref_surface_id = slice_param->RefPicList1[0].picture_id; - if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) { - slice_obj_surface = SURFACE(ref_surface_id); - } - if (slice_obj_surface && slice_obj_surface->bo) { - obj_surface = slice_obj_surface; - } else { - obj_surface = encode_state->reference_objects[0]; - } - - obj_surface = encode_state->reference_objects[1]; - if (obj_surface && obj_surface->bo) - gen7_vme_source_surface_state(ctx, 2, obj_surface, encoder_context); - } + intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen7_vme_source_surface_state); + + if (slice_type == SLICE_TYPE_B) + intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen7_vme_source_surface_state); } /* VME output */ @@ -359,100 +326,39 @@ static VAStatus gen7_vme_constant_setup(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { struct gen6_vme_context *vme_context = encoder_context->vme_context; - // unsigned char *constant_buffer; + unsigned char *constant_buffer; unsigned int *vme_state_message; - int mv_num = 32; - if (vme_context->h264_level >= 30) { - mv_num = 16; - if (vme_context->h264_level >= 31) - mv_num = 8; - } + int mv_num; + + vme_state_message = (unsigned int *)vme_context->vme_state_message; + mv_num = 32; + + if (encoder_context->codec == CODEC_H264) { + if (vme_context->h264_level >= 30) { + mv_num = 16; + + if (vme_context->h264_level >= 31) + mv_num = 8; + } + } else if (encoder_context->codec == CODEC_MPEG2) { + mv_num = 2; + } + + + vme_state_message[31] = mv_num; dri_bo_map(vme_context->gpe_context.curbe.bo, 1); assert(vme_context->gpe_context.curbe.bo->virtual); - // constant_buffer = vme_context->curbe.bo->virtual; - vme_state_message = (unsigned int *)vme_context->gpe_context.curbe.bo->virtual; - vme_state_message[31] = mv_num; - - /*TODO copy buffer into CURB*/ + constant_buffer = vme_context->gpe_context.curbe.bo->virtual; + /* Pass the required constant info into the constant buffer */ + memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128); + dri_bo_unmap( vme_context->gpe_context.curbe.bo); return VA_STATUS_SUCCESS; } -static const unsigned int intra_mb_mode_cost_table[] = { - 0x31110001, // for qp0 - 0x09110001, // for qp1 - 0x15030001, // for qp2 - 0x0b030001, // for qp3 - 0x0d030011, // for qp4 - 0x17210011, // for qp5 - 0x41210011, // for qp6 - 0x19210011, // for qp7 - 0x25050003, // for qp8 - 0x1b130003, // for qp9 - 0x1d130003, // for qp10 - 0x27070021, // for qp11 - 0x51310021, // for qp12 - 0x29090021, // for qp13 - 0x35150005, // for qp14 - 0x2b0b0013, // for qp15 - 0x2d0d0013, // for qp16 - 0x37170007, // for qp17 - 0x61410031, // for qp18 - 0x39190009, // for qp19 - 0x45250015, // for qp20 - 0x3b1b000b, // for qp21 - 0x3d1d000d, // for qp22 - 0x47270017, // for qp23 - 0x71510041, // for qp24 ! center for qp=0..30 - 0x49290019, // for qp25 - 0x55350025, // for qp26 - 0x4b2b001b, // for qp27 - 0x4d2d001d, // for qp28 - 0x57370027, // for qp29 - 0x81610051, // for qp30 - 0x57270017, // for qp31 - 0x81510041, // for qp32 ! center for qp=31..51 - 0x59290019, // for qp33 - 0x65350025, // for qp34 - 0x5b2b001b, // for qp35 - 0x5d2d001d, // for qp36 - 0x67370027, // for qp37 - 0x91610051, // for qp38 - 0x69390029, // for qp39 - 0x75450035, // for qp40 - 0x6b3b002b, // for qp41 - 0x6d3d002d, // for qp42 - 0x77470037, // for qp43 - 0xa1710061, // for qp44 - 0x79490039, // for qp45 - 0x85550045, // for qp46 - 0x7b4b003b, // for qp47 - 0x7d4d003d, // for qp48 - 0x87570047, // for qp49 - 0xb1810071, // for qp50 - 0x89590049 // for qp51 -}; - -static void gen7_vme_state_setup_fixup(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - unsigned int *vme_state_message) -{ - struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; - VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; - - if (slice_param->slice_type != SLICE_TYPE_I && - slice_param->slice_type != SLICE_TYPE_SI) - return; - if (encoder_context->rate_control_mode == VA_RC_CQP) - vme_state_message[16] = intra_mb_mode_cost_table[pic_param->pic_init_qp + slice_param->slice_qp_delta]; - else - vme_state_message[16] = intra_mb_mode_cost_table[mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY]; -} static VAStatus gen7_vme_avc_state_setup(VADriverContextP ctx, struct encode_state *encode_state, @@ -461,48 +367,50 @@ static VAStatus gen7_vme_avc_state_setup(VADriverContextP ctx, { struct gen6_vme_context *vme_context = encoder_context->vme_context; unsigned int *vme_state_message; - unsigned int *mb_cost_table; + unsigned int *mb_cost_table; int i; VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY); - mb_cost_table = (unsigned int *)vme_context->vme_state_message; + mb_cost_table = (unsigned int *)vme_context->vme_state_message; //building VME state message dri_bo_map(vme_context->vme_state.bo, 1); assert(vme_context->vme_state.bo->virtual); vme_state_message = (unsigned int *)vme_context->vme_state.bo->virtual; - if ((slice_param->slice_type == SLICE_TYPE_P) || - (slice_param->slice_type == SLICE_TYPE_SP)) { - vme_state_message[0] = 0x01010101; - vme_state_message[1] = 0x10010101; - vme_state_message[2] = 0x0F0F0F0F; - vme_state_message[3] = 0x100F0F0F; - vme_state_message[4] = 0x01010101; - vme_state_message[5] = 0x10010101; - vme_state_message[6] = 0x0F0F0F0F; - vme_state_message[7] = 0x100F0F0F; - vme_state_message[8] = 0x01010101; - vme_state_message[9] = 0x10010101; - vme_state_message[10] = 0x0F0F0F0F; - vme_state_message[11] = 0x000F0F0F; - vme_state_message[12] = 0x00; - vme_state_message[13] = 0x00; - } else { - vme_state_message[0] = 0x10010101; - vme_state_message[1] = 0x100F0F0F; - vme_state_message[2] = 0x10010101; - vme_state_message[3] = 0x000F0F0F; - vme_state_message[4] = 0; - vme_state_message[5] = 0; - vme_state_message[6] = 0; - vme_state_message[7] = 0; - vme_state_message[8] = 0; - vme_state_message[9] = 0; - vme_state_message[10] = 0; - vme_state_message[11] = 0; - vme_state_message[12] = 0; - vme_state_message[13] = 0; - } + if (((slice_param->slice_type == SLICE_TYPE_P) || + (slice_param->slice_type == SLICE_TYPE_SP) && + !is_low_quality)) { + vme_state_message[0] = 0x01010101; + vme_state_message[1] = 0x10010101; + vme_state_message[2] = 0x0F0F0F0F; + vme_state_message[3] = 0x100F0F0F; + vme_state_message[4] = 0x01010101; + vme_state_message[5] = 0x10010101; + vme_state_message[6] = 0x0F0F0F0F; + vme_state_message[7] = 0x100F0F0F; + vme_state_message[8] = 0x01010101; + vme_state_message[9] = 0x10010101; + vme_state_message[10] = 0x0F0F0F0F; + vme_state_message[11] = 0x000F0F0F; + vme_state_message[12] = 0x00; + vme_state_message[13] = 0x00; + } else { + vme_state_message[0] = 0x10010101; + vme_state_message[1] = 0x100F0F0F; + vme_state_message[2] = 0x10010101; + vme_state_message[3] = 0x000F0F0F; + vme_state_message[4] = 0; + vme_state_message[5] = 0; + vme_state_message[6] = 0; + vme_state_message[7] = 0; + vme_state_message[8] = 0; + vme_state_message[9] = 0; + vme_state_message[10] = 0; + vme_state_message[11] = 0; + vme_state_message[12] = 0; + vme_state_message[13] = 0; + } vme_state_message[14] = (mb_cost_table[2] & 0xFFFF); vme_state_message[15] = 0; @@ -519,14 +427,17 @@ static VAStatus gen7_vme_avc_state_setup(VADriverContextP ctx, return VA_STATUS_SUCCESS; } -static VAStatus gen7_vme_vme_state_setup(VADriverContextP ctx, - struct encode_state *encode_state, - int is_intra, - struct intel_encoder_context *encoder_context) +static VAStatus gen7_vme_mpeg2_state_setup(VADriverContextP ctx, + struct encode_state *encode_state, + int is_intra, + struct intel_encoder_context *encoder_context) { struct gen6_vme_context *vme_context = encoder_context->vme_context; unsigned int *vme_state_message; int i; + unsigned int *mb_cost_table; + + mb_cost_table = (unsigned int *)vme_context->vme_state_message; //building VME state message dri_bo_map(vme_context->vme_state.bo, 1); @@ -548,20 +459,18 @@ static VAStatus gen7_vme_vme_state_setup(VADriverContextP ctx, vme_state_message[12] = 0x00; vme_state_message[13] = 0x00; - vme_state_message[14] = 0x4a4a; - vme_state_message[15] = 0x0; - vme_state_message[16] = 0x4a4a4a4a; - vme_state_message[17] = 0x4a4a4a4a; - vme_state_message[18] = 0x21110100; - vme_state_message[19] = 0x61514131; + vme_state_message[14] = (mb_cost_table[2] & 0xFFFF); + vme_state_message[15] = 0; + vme_state_message[16] = mb_cost_table[0]; + vme_state_message[17] = 0; + vme_state_message[18] = mb_cost_table[3]; + vme_state_message[19] = mb_cost_table[4]; for(i = 20; i < 32; i++) { vme_state_message[i] = 0; } //vme_state_message[16] = 0x42424242; //cost function LUT set 0 for Intra - gen7_vme_state_setup_fixup(ctx, encode_state, encoder_context, vme_state_message); - dri_bo_unmap( vme_context->vme_state.bo); return VA_STATUS_SUCCESS; } @@ -637,7 +546,7 @@ gen7_vme_fill_vme_batchbuffer(VADriverContextP ctx, /*inline data */ *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x); - *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); + *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); i += 1; } @@ -691,47 +600,52 @@ static void gen7_vme_pipeline_programing(VADriverContextP ctx, int s; bool allow_hwscore = true; int kernel_shader; - - for (s = 0; s < encode_state->num_slice_params_ext; s++) { - pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; - if ((pSliceParameter->macroblock_address % width_in_mbs)) { - allow_hwscore = false; - break; - } + unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY); + + if (is_low_quality) + allow_hwscore = false; + else { + for (s = 0; s < encode_state->num_slice_params_ext; s++) { + pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; + if ((pSliceParameter->macroblock_address % width_in_mbs)) { + allow_hwscore = false; + break; + } + } } if ((pSliceParameter->slice_type == SLICE_TYPE_I) || (pSliceParameter->slice_type == SLICE_TYPE_I)) { kernel_shader = AVC_VME_INTRA_SHADER; } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) || - (pSliceParameter->slice_type == SLICE_TYPE_SP)) { + (pSliceParameter->slice_type == SLICE_TYPE_SP)) { kernel_shader = AVC_VME_INTER_SHADER; } else { kernel_shader = AVC_VME_BINTER_SHADER; if (!allow_hwscore) - kernel_shader = AVC_VME_INTER_SHADER; + kernel_shader = AVC_VME_INTER_SHADER; } if (allow_hwscore) gen7_vme_walker_fill_vme_batchbuffer(ctx, - encode_state, - width_in_mbs, height_in_mbs, - kernel_shader, - pPicParameter->pic_fields.bits.transform_8x8_mode_flag, - encoder_context); + encode_state, + width_in_mbs, height_in_mbs, + kernel_shader, + pPicParameter->pic_fields.bits.transform_8x8_mode_flag, + encoder_context); else gen7_vme_fill_vme_batchbuffer(ctx, - encode_state, - width_in_mbs, height_in_mbs, - kernel_shader, - pPicParameter->pic_fields.bits.transform_8x8_mode_flag, - encoder_context); + encode_state, + width_in_mbs, height_in_mbs, + kernel_shader, + pPicParameter->pic_fields.bits.transform_8x8_mode_flag, + encoder_context); intel_batchbuffer_start_atomic(batch, 0x1000); gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch); BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6)); + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8)); OUT_RELOC(batch, vme_context->vme_batchbuffer.bo, I915_GEM_DOMAIN_COMMAND, 0, @@ -752,7 +666,7 @@ static VAStatus gen7_vme_prepare(VADriverContextP ctx, struct gen6_vme_context *vme_context = encoder_context->vme_context; if (!vme_context->h264_level || - (vme_context->h264_level != pSequenceParameter->level_idc)) { + (vme_context->h264_level != pSequenceParameter->level_idc)) { vme_context->h264_level = pSequenceParameter->level_idc; } @@ -803,10 +717,10 @@ gen7_vme_pipeline(VADriverContextP ctx, static void gen7_vme_mpeg2_output_buffer_setup(VADriverContextP ctx, - struct encode_state *encode_state, - int index, - int is_intra, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + int index, + int is_intra, + struct intel_encoder_context *encoder_context) { struct i965_driver_data *i965 = i965_driver_data(ctx); @@ -837,9 +751,9 @@ gen7_vme_mpeg2_output_buffer_setup(VADriverContextP ctx, static void gen7_vme_mpeg2_output_vme_batchbuffer_setup(VADriverContextP ctx, - struct encode_state *encode_state, - int index, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + int index, + struct intel_encoder_context *encoder_context) { struct i965_driver_data *i965 = i965_driver_data(ctx); @@ -864,9 +778,9 @@ gen7_vme_mpeg2_output_vme_batchbuffer_setup(VADriverContextP ctx, static VAStatus gen7_vme_mpeg2_surface_setup(VADriverContextP ctx, - struct encode_state *encode_state, - int is_intra, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + int is_intra, + struct intel_encoder_context *encoder_context) { struct object_surface *obj_surface; @@ -897,14 +811,13 @@ gen7_vme_mpeg2_surface_setup(VADriverContextP ctx, static void gen7_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx, - struct encode_state *encode_state, - int mb_width, int mb_height, - int kernel, - int transform_8x8_mode_flag, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + int mb_width, int mb_height, + int kernel, + int transform_8x8_mode_flag, + struct intel_encoder_context *encoder_context) { struct gen6_vme_context *vme_context = encoder_context->vme_context; - int number_mb_cmds; int mb_x = 0, mb_y = 0; int i, s, j; unsigned int *command_ptr; @@ -918,33 +831,43 @@ gen7_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx, for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) { int slice_mb_begin = slice_param->macroblock_address; int slice_mb_number = slice_param->num_macroblocks; + unsigned int mb_intra_ub; for (i = 0; i < slice_mb_number;) { - int mb_count = i + slice_mb_begin; + int mb_count = i + slice_mb_begin; mb_x = mb_count % mb_width; mb_y = mb_count / mb_width; + mb_intra_ub = 0; - if( i == 0) { - number_mb_cmds = mb_width; - } else if ((i + 128) <= slice_mb_number) { - number_mb_cmds = 128; - } else { - number_mb_cmds = slice_mb_number - i; + if (mb_x != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE; } + if (mb_y != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B; + + if (mb_x != 0) + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D; + + if (mb_x != (mb_width -1)) + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; + } + + + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); *command_ptr++ = kernel; *command_ptr++ = 0; *command_ptr++ = 0; *command_ptr++ = 0; *command_ptr++ = 0; - + /*inline data */ *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x); - *command_ptr++ = ( (number_mb_cmds << 16) | transform_8x8_mode_flag | ((i == 0) << 1)); + *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); - i += number_mb_cmds; + i += 1; } slice_param++; @@ -959,9 +882,9 @@ gen7_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx, static void gen7_vme_mpeg2_pipeline_programing(VADriverContextP ctx, - struct encode_state *encode_state, - int is_intra, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + int is_intra, + struct intel_encoder_context *encoder_context) { struct gen6_vme_context *vme_context = encoder_context->vme_context; struct intel_batchbuffer *batch = encoder_context->base.batch; @@ -969,17 +892,39 @@ gen7_vme_mpeg2_pipeline_programing(VADriverContextP ctx, int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; - gen7_vme_mpeg2_fill_vme_batchbuffer(ctx, - encode_state, - width_in_mbs, height_in_mbs, - MPEG2_VME_INTER_SHADER, - 0, - encoder_context); + bool allow_hwscore = true; + int s; + + for (s = 0; s < encode_state->num_slice_params_ext; s++) { + int j; + VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer; + + for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) { + if (slice_param->macroblock_address % width_in_mbs) { + allow_hwscore = false; + break; + } + } + } + + if (allow_hwscore) + gen7_vme_mpeg2_walker_fill_vme_batchbuffer(ctx, + encode_state, + width_in_mbs, height_in_mbs, + MPEG2_VME_INTER_SHADER, + encoder_context); + else + gen7_vme_mpeg2_fill_vme_batchbuffer(ctx, + encode_state, + width_in_mbs, height_in_mbs, + MPEG2_VME_INTER_SHADER, + 0, + encoder_context); intel_batchbuffer_start_atomic(batch, 0x1000); gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch); BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6)); + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8)); OUT_RELOC(batch, vme_context->vme_batchbuffer.bo, I915_GEM_DOMAIN_COMMAND, 0, @@ -991,16 +936,25 @@ gen7_vme_mpeg2_pipeline_programing(VADriverContextP ctx, static VAStatus gen7_vme_mpeg2_prepare(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { VAStatus vaStatus = VA_STATUS_SUCCESS; + VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; + struct gen6_vme_context *vme_context = encoder_context->vme_context; + + if ((!vme_context->mpeg2_level) || + (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) { + vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK; + } - /*Setup all the memory object*/ + /*Setup all the memory object*/ + + intel_vme_mpeg2_state_setup(ctx, encode_state, encoder_context); gen7_vme_mpeg2_surface_setup(ctx, encode_state, 0, encoder_context); gen7_vme_interface_setup(ctx, encode_state, encoder_context); - gen7_vme_vme_state_setup(ctx, encode_state, 0, encoder_context); gen7_vme_constant_setup(ctx, encode_state, encoder_context); + gen7_vme_mpeg2_state_setup(ctx, encode_state, 0, encoder_context); /*Programing media pipeline*/ gen7_vme_mpeg2_pipeline_programing(ctx, encode_state, 0, encoder_context); @@ -1010,34 +964,34 @@ gen7_vme_mpeg2_prepare(VADriverContextP ctx, static VAStatus gen7_vme_mpeg2_pipeline(VADriverContextP ctx, - VAProfile profile, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + VAProfile profile, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct gen6_vme_context *vme_context = encoder_context->vme_context; VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer; VAEncSequenceParameterBufferMPEG2 *seq_param = - (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; + (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; /*No need of to exec VME for Intra slice */ if (slice_param->is_intra_slice) { - if(!vme_context->vme_output.bo) { - int w_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; - int h_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; - - vme_context->vme_output.num_blocks = w_in_mbs * h_in_mbs; - vme_context->vme_output.pitch = 16; /* in bytes, always 16 */ - vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES; - vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr, - "MPEG2 VME output buffer", - vme_context->vme_output.num_blocks - * vme_context->vme_output.size_block, - 0x1000); - } - - return VA_STATUS_SUCCESS; + if(!vme_context->vme_output.bo) { + int w_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; + int h_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; + + vme_context->vme_output.num_blocks = w_in_mbs * h_in_mbs; + vme_context->vme_output.pitch = 16; /* in bytes, always 16 */ + vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES; + vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr, + "MPEG2 VME output buffer", + vme_context->vme_output.num_blocks + * vme_context->vme_output.size_block, + 0x1000); + } + + return VA_STATUS_SUCCESS; } gen7_vme_media_init(ctx, encoder_context); @@ -1078,7 +1032,7 @@ Bool gen7_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e struct i965_kernel *vme_kernel_list = NULL; vme_context->gpe_context.surface_state_binding_table.length = - (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; + (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6; vme_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data); @@ -1092,21 +1046,18 @@ Bool gen7_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e gen7_vme_scoreboard_init(ctx, vme_context); - if(encoder_context->profile == VAProfileH264Baseline || - encoder_context->profile == VAProfileH264Main || - encoder_context->profile == VAProfileH264High ){ + if (encoder_context->codec == CODEC_H264) { vme_kernel_list = gen7_vme_kernels; vme_context->video_coding_type = VIDEO_CODING_AVC; vme_context->vme_kernel_sum = AVC_VME_KERNEL_SUM; encoder_context->vme_pipeline = gen7_vme_pipeline; - } else if (encoder_context->profile == VAProfileMPEG2Simple || - encoder_context->profile == VAProfileMPEG2Main ){ + } else if (encoder_context->codec == CODEC_MPEG2) { vme_kernel_list = gen7_vme_mpeg2_kernels; vme_context->video_coding_type = VIDEO_CODING_MPEG2; vme_context->vme_kernel_sum = MPEG2_VME_KERNEL_SUM; encoder_context->vme_pipeline = gen7_vme_mpeg2_pipeline; } else { - /* Unsupported encoding profile */ + /* Unsupported codec */ assert(0); } diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c new file mode 100644 index 0000000..2d76816 --- /dev/null +++ b/src/gen8_mfc.c @@ -0,0 +1,2478 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Zhao Yakui <yakui.zhao@intel.com> + * Xiang Haihao <haihao.xiang@intel.com> + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> +#include <assert.h> + +#include "intel_batchbuffer.h" +#include "i965_defines.h" +#include "i965_structs.h" +#include "i965_drv_video.h" +#include "i965_encoder.h" +#include "i965_encoder_utils.h" +#include "gen6_mfc.h" +#include "gen6_vme.h" +#include "intel_media.h" + +#define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8 +#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) +#define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index) + +#define MFC_SOFTWARE_HASWELL 1 + +#define B0_STEP_REV 2 +#define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV) + +static const uint32_t gen8_mfc_batchbuffer_avc_intra[][4] = { +#include "shaders/utils/mfc_batchbuffer_avc_intra.g7b" +}; + +static const uint32_t gen8_mfc_batchbuffer_avc_inter[][4] = { +#include "shaders/utils/mfc_batchbuffer_avc_inter.g7b" +}; + +static struct i965_kernel gen8_mfc_kernels[] = { + { + "MFC AVC INTRA BATCHBUFFER ", + MFC_BATCHBUFFER_AVC_INTRA, + gen8_mfc_batchbuffer_avc_intra, + sizeof(gen8_mfc_batchbuffer_avc_intra), + NULL + }, + + { + "MFC AVC INTER BATCHBUFFER ", + MFC_BATCHBUFFER_AVC_INTER, + gen8_mfc_batchbuffer_avc_inter, + sizeof(gen8_mfc_batchbuffer_avc_inter), + NULL + }, +}; + +#define INTER_MODE_MASK 0x03 +#define INTER_8X8 0x03 +#define INTER_16X8 0x01 +#define INTER_8X16 0x02 +#define SUBMB_SHAPE_MASK 0x00FF00 + +#define INTER_MV8 (4 << 20) +#define INTER_MV32 (6 << 20) + + +static void +gen8_mfc_pipe_mode_select(VADriverContextP ctx, + int standard_select, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + + assert(standard_select == MFX_FORMAT_MPEG2 || + standard_select == MFX_FORMAT_AVC); + + BEGIN_BCS_BATCH(batch, 5); + + OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2)); + OUT_BCS_BATCH(batch, + (MFX_LONG_MODE << 17) | /* Must be long format for encoder */ + (MFD_MODE_VLD << 15) | /* VLD mode */ + (0 << 10) | /* Stream-Out Enable */ + ((!!mfc_context->post_deblocking_output.bo) << 9) | /* Post Deblocking Output */ + ((!!mfc_context->pre_deblocking_output.bo) << 8) | /* Pre Deblocking Output */ + (0 << 5) | /* not in stitch mode */ + (1 << 4) | /* encoding mode */ + (standard_select << 0)); /* standard select: avc or mpeg2 */ + OUT_BCS_BATCH(batch, + (0 << 7) | /* expand NOA bus flag */ + (0 << 6) | /* disable slice-level clock gating */ + (0 << 5) | /* disable clock gating for NOA */ + (0 << 4) | /* terminate if AVC motion and POC table error occurs */ + (0 << 3) | /* terminate if AVC mbdata error occurs */ + (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */ + (0 << 1) | + (0 << 0)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + + BEGIN_BCS_BATCH(batch, 6); + + OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, + ((mfc_context->surface_state.height - 1) << 18) | + ((mfc_context->surface_state.width - 1) << 4)); + OUT_BCS_BATCH(batch, + (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */ + (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */ + (0 << 22) | /* surface object control state, FIXME??? */ + ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */ + (0 << 2) | /* must be 0 for interleave U/V */ + (1 << 1) | /* must be tiled */ + (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */ + OUT_BCS_BATCH(batch, + (0 << 16) | /* must be 0 for interleave U/V */ + (mfc_context->surface_state.h_pitch)); /* y offset for U(cb) */ + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfc_ind_obj_base_addr_state(VADriverContextP ctx, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + struct gen6_vme_context *vme_context = encoder_context->vme_context; + int vme_size; + + BEGIN_BCS_BATCH(batch, 26); + + OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2)); + /* the DW1-3 is for the MFX indirect bistream offset */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + /* the DW4-5 is the MFX upper bound */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + vme_size = vme_context->vme_output.size_block * vme_context->vme_output.num_blocks; + /* the DW6-10 is for MFX Indirect MV Object Base Address */ + OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, vme_size); + OUT_BCS_BATCH(batch, 0); + + /* the DW11-15 is for MFX IT-COFF. Not used on encoder */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/ + OUT_BCS_RELOC(batch, + mfc_context->mfc_indirect_pak_bse_object.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + OUT_BCS_RELOC(batch, + mfc_context->mfc_indirect_pak_bse_object.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + mfc_context->mfc_indirect_pak_bse_object.end_offset); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + + int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; + int height_in_mbs = (mfc_context->surface_state.height + 15) / 16; + + BEGIN_BCS_BATCH(batch, 16); + + OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2)); + /*DW1. MB setting of frame */ + OUT_BCS_BATCH(batch, + ((width_in_mbs * height_in_mbs - 1) & 0xFFFF)); + OUT_BCS_BATCH(batch, + ((height_in_mbs - 1) << 16) | + ((width_in_mbs - 1) << 0)); + /* DW3 QP setting */ + OUT_BCS_BATCH(batch, + (0 << 24) | /* Second Chroma QP Offset */ + (0 << 16) | /* Chroma QP Offset */ + (0 << 14) | /* Max-bit conformance Intra flag */ + (0 << 13) | /* Max Macroblock size conformance Inter flag */ + (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) | /*Weighted_Pred_Flag */ + (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) | /* Weighted_BiPred_Idc */ + (0 << 8) | /* FIXME: Image Structure */ + (0 << 0) ); /* Current Decoed Image Frame Store ID, reserved in Encode mode */ + OUT_BCS_BATCH(batch, + (0 << 16) | /* Mininum Frame size */ + (0 << 15) | /* Disable reading of Macroblock Status Buffer */ + (0 << 14) | /* Load BitStream Pointer only once, 1 slic 1 frame */ + (0 << 13) | /* CABAC 0 word insertion test enable */ + (1 << 12) | /* MVUnpackedEnable,compliant to DXVA */ + (1 << 10) | /* Chroma Format IDC, 4:2:0 */ + (0 << 8) | /* FIXME: MbMvFormatFlag */ + (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7) | /*0:CAVLC encoding mode,1:CABAC*/ + (0 << 6) | /* Only valid for VLD decoding mode */ + (0 << 5) | /* Constrained Intra Predition Flag, from PPS */ + (0 << 4) | /* Direct 8x8 inference flag */ + (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3) | /*8x8 or 4x4 IDCT Transform Mode Flag*/ + (1 << 2) | /* Frame MB only flag */ + (0 << 1) | /* MBAFF mode is in active */ + (0 << 0)); /* Field picture flag */ + /* DW5 Trellis quantization */ + OUT_BCS_BATCH(batch, 0); /* Mainly about MB rate control and debug, just ignoring */ + OUT_BCS_BATCH(batch, /* Inter and Intra Conformance Max size limit */ + (0xBB8 << 16) | /* InterMbMaxSz */ + (0xEE8) ); /* IntraMbMaxSz */ + OUT_BCS_BATCH(batch, 0); /* Reserved */ + /* DW8. QP delta */ + OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */ + OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */ + /* DW10. Bit setting for MB */ + OUT_BCS_BATCH(batch, 0x8C000000); + OUT_BCS_BATCH(batch, 0x00010000); + /* DW12. */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0x02010100); + /* DW14. For short format */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfc_qm_state(VADriverContextP ctx, + int qm_type, + unsigned int *qm, + int qm_length, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + unsigned int qm_buffer[16]; + + assert(qm_length <= 16); + assert(sizeof(*qm) == 4); + memcpy(qm_buffer, qm, qm_length * 4); + + BEGIN_BCS_BATCH(batch, 18); + OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2)); + OUT_BCS_BATCH(batch, qm_type << 0); + intel_batchbuffer_data(batch, qm_buffer, 16 * 4); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) +{ + unsigned int qm[16] = { + 0x10101010, 0x10101010, 0x10101010, 0x10101010, + 0x10101010, 0x10101010, 0x10101010, 0x10101010, + 0x10101010, 0x10101010, 0x10101010, 0x10101010, + 0x10101010, 0x10101010, 0x10101010, 0x10101010 + }; + + gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context); + gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context); + gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context); + gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context); +} + +static void +gen8_mfc_fqm_state(VADriverContextP ctx, + int fqm_type, + unsigned int *fqm, + int fqm_length, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + unsigned int fqm_buffer[32]; + + assert(fqm_length <= 32); + assert(sizeof(*fqm) == 4); + memcpy(fqm_buffer, fqm, fqm_length * 4); + + BEGIN_BCS_BATCH(batch, 34); + OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2)); + OUT_BCS_BATCH(batch, fqm_type << 0); + intel_batchbuffer_data(batch, fqm_buffer, 32 * 4); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) +{ + unsigned int qm[32] = { + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000 + }; + + gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context); + gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context); + gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context); + gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context); +} + +static void +gen8_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context, + unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw, + int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag, + struct intel_batchbuffer *batch) +{ + if (batch == NULL) + batch = encoder_context->base.batch; + + if (data_bits_in_last_dw == 0) + data_bits_in_last_dw = 32; + + BEGIN_BCS_BATCH(batch, lenght_in_dws + 2); + + OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2)); + OUT_BCS_BATCH(batch, + (0 << 16) | /* always start at offset 0 */ + (data_bits_in_last_dw << 8) | + (skip_emul_byte_count << 4) | + (!!emulation_flag << 3) | + ((!!is_last_header) << 2) | + ((!!is_end_of_slice) << 1) | + (0 << 0)); /* FIXME: ??? */ + intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4); + + ADVANCE_BCS_BATCH(batch); +} + + +static void gen8_mfc_init(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + dri_bo *bo; + int i; + int width_in_mbs = 0; + int height_in_mbs = 0; + int slice_batchbuffer_size; + + if (encoder_context->codec == CODEC_H264 || + encoder_context->codec == CODEC_H264_MVC) { + VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + width_in_mbs = pSequenceParameter->picture_width_in_mbs; + height_in_mbs = pSequenceParameter->picture_height_in_mbs; + } else { + VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; + + assert(encoder_context->codec == CODEC_MPEG2); + + width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16; + height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16; + } + + slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 + + (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext; + + /*Encode common setup for MFC*/ + dri_bo_unreference(mfc_context->post_deblocking_output.bo); + mfc_context->post_deblocking_output.bo = NULL; + + dri_bo_unreference(mfc_context->pre_deblocking_output.bo); + mfc_context->pre_deblocking_output.bo = NULL; + + dri_bo_unreference(mfc_context->uncompressed_picture_source.bo); + mfc_context->uncompressed_picture_source.bo = NULL; + + dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); + mfc_context->mfc_indirect_pak_bse_object.bo = NULL; + + for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){ + if ( mfc_context->direct_mv_buffers[i].bo != NULL); + dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo); + mfc_context->direct_mv_buffers[i].bo = NULL; + } + + for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){ + if (mfc_context->reference_surfaces[i].bo != NULL) + dri_bo_unreference(mfc_context->reference_surfaces[i].bo); + mfc_context->reference_surfaces[i].bo = NULL; + } + + dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "Buffer", + width_in_mbs * 64, + 64); + assert(bo); + mfc_context->intra_row_store_scratch_buffer.bo = bo; + + dri_bo_unreference(mfc_context->macroblock_status_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "Buffer", + width_in_mbs * height_in_mbs * 16, + 64); + assert(bo); + mfc_context->macroblock_status_buffer.bo = bo; + + dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "Buffer", + 4 * width_in_mbs * 64, /* 4 * width_in_mbs * 64 */ + 64); + assert(bo); + mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo; + + dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "Buffer", + 2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */ + 0x1000); + assert(bo); + mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo; + + dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo); + mfc_context->mfc_batchbuffer_surface.bo = NULL; + + dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo); + mfc_context->aux_batchbuffer_surface.bo = NULL; + + if (mfc_context->aux_batchbuffer) + intel_batchbuffer_free(mfc_context->aux_batchbuffer); + + mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size); + mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer; + dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo); + mfc_context->aux_batchbuffer_surface.pitch = 16; + mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16; + mfc_context->aux_batchbuffer_surface.size_block = 16; + + i965_gpe_context_init(ctx, &mfc_context->gpe_context); +} + +static void +gen8_mfc_pipe_buf_addr_state(VADriverContextP ctx, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + int i; + + BEGIN_BCS_BATCH(batch, 61); + + OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2)); + + /* the DW1-3 is for pre_deblocking */ + if (mfc_context->pre_deblocking_output.bo) + OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + else + OUT_BCS_BATCH(batch, 0); /* pre output addr */ + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + /* the DW4-6 is for the post_deblocking */ + + if (mfc_context->post_deblocking_output.bo) + OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); /* post output addr */ + else + OUT_BCS_BATCH(batch, 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* the DW7-9 is for the uncompressed_picture */ + OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); /* uncompressed data */ + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* the DW10-12 is for the mb status */ + OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); /* StreamOut data*/ + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* the DW13-15 is for the intra_row_store_scratch */ + OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* the DW16-18 is for the deblocking filter */ + OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* the DW 19-50 is for Reference pictures*/ + for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) { + if ( mfc_context->reference_surfaces[i].bo != NULL) { + OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + } else { + OUT_BCS_BATCH(batch, 0); + } + + OUT_BCS_BATCH(batch, 0); + } + + OUT_BCS_BATCH(batch, 0); + + /* The DW 52-54 is for the MB status buffer */ + OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); /* Macroblock status buffer*/ + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* the DW 55-57 is the ILDB buffer */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* the DW 58-60 is the second ILDB buffer */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfc_avc_directmode_state(VADriverContextP ctx, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + + int i; + + BEGIN_BCS_BATCH(batch, 71); + + OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2)); + + /* Reference frames and Current frames */ + /* the DW1-32 is for the direct MV for reference */ + for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) { + if ( mfc_context->direct_mv_buffers[i].bo != NULL) { + OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + OUT_BCS_BATCH(batch, 0); + } else { + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + } + } + + OUT_BCS_BATCH(batch, 0); + + /* the DW34-36 is the MV for the current reference */ + OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* POL list */ + for(i = 0; i < 32; i++) { + OUT_BCS_BATCH(batch, i/2); + } + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + + +static void +gen8_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + + BEGIN_BCS_BATCH(batch, 10); + + OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2)); + OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* the DW7-9 is for Bitplane Read Buffer Base Address */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + + +static void gen8_mfc_avc_pipeline_picture_programing( VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + + mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context); + mfc_context->set_surface_state(ctx, encoder_context); + mfc_context->ind_obj_base_addr_state(ctx, encoder_context); + gen8_mfc_pipe_buf_addr_state(ctx, encoder_context); + gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context); + mfc_context->avc_img_state(ctx, encode_state, encoder_context); + mfc_context->avc_qm_state(ctx, encoder_context); + mfc_context->avc_fqm_state(ctx, encoder_context); + gen8_mfc_avc_directmode_state(ctx, encoder_context); + intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context); +} + + +static VAStatus gen8_mfc_run(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + + intel_batchbuffer_flush(batch); //run the pipeline + + return VA_STATUS_SUCCESS; +} + + +static VAStatus +gen8_mfc_stop(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int *encoded_bits_size) +{ + VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN; + VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + VACodedBufferSegment *coded_buffer_segment; + + vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment); + assert(vaStatus == VA_STATUS_SUCCESS); + *encoded_bits_size = coded_buffer_segment->size * 8; + i965_UnmapBuffer(ctx, pPicParameter->coded_buf); + + return VA_STATUS_SUCCESS; +} + + +static void +gen8_mfc_avc_slice_state(VADriverContextP ctx, + VAEncPictureParameterBufferH264 *pic_param, + VAEncSliceParameterBufferH264 *slice_param, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int rate_control_enable, + int qp, + struct intel_batchbuffer *batch) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; + int height_in_mbs = (mfc_context->surface_state.height + 15) / 16; + int beginmb = slice_param->macroblock_address; + int endmb = beginmb + slice_param->num_macroblocks; + int beginx = beginmb % width_in_mbs; + int beginy = beginmb / width_in_mbs; + int nextx = endmb % width_in_mbs; + int nexty = endmb / width_in_mbs; + int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + int last_slice = (endmb == (width_in_mbs * height_in_mbs)); + int maxQpN, maxQpP; + unsigned char correct[6], grow, shrink; + int i; + int weighted_pred_idc = 0; + unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom; + unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom; + int num_ref_l0 = 0, num_ref_l1 = 0; + + if (batch == NULL) + batch = encoder_context->base.batch; + + if (slice_type == SLICE_TYPE_I) { + luma_log2_weight_denom = 0; + chroma_log2_weight_denom = 0; + } else if (slice_type == SLICE_TYPE_P) { + weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag; + num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1; + + if (slice_param->num_ref_idx_active_override_flag) + num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1; + } else if (slice_type == SLICE_TYPE_B) { + weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc; + num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1; + num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1; + + if (slice_param->num_ref_idx_active_override_flag) { + num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1; + num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1; + } + + if (weighted_pred_idc == 2) { + /* 8.4.3 - Derivation process for prediction weights (8-279) */ + luma_log2_weight_denom = 5; + chroma_log2_weight_denom = 5; + } + } + + maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier; + maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier; + + for (i = 0; i < 6; i++) + correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i]; + + grow = mfc_context->bit_rate_control_context[slice_type].GrowInit + + (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4); + shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit + + (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4); + + BEGIN_BCS_BATCH(batch, 11);; + + OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) ); + OUT_BCS_BATCH(batch, slice_type); /*Slice Type: I:P:B Slice*/ + + OUT_BCS_BATCH(batch, + (num_ref_l0 << 16) | + (num_ref_l1 << 24) | + (chroma_log2_weight_denom << 8) | + (luma_log2_weight_denom << 0)); + + OUT_BCS_BATCH(batch, + (weighted_pred_idc << 30) | + (slice_param->direct_spatial_mv_pred_flag<<29) | /*Direct Prediction Type*/ + (slice_param->disable_deblocking_filter_idc << 27) | + (slice_param->cabac_init_idc << 24) | + (qp<<16) | /*Slice Quantization Parameter*/ + ((slice_param->slice_beta_offset_div2 & 0xf) << 8) | + ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0)); + OUT_BCS_BATCH(batch, + (beginy << 24) | /*First MB X&Y , the begin postion of current slice*/ + (beginx << 16) | + slice_param->macroblock_address ); + OUT_BCS_BATCH(batch, (nexty << 16) | nextx); /*Next slice first MB X&Y*/ + OUT_BCS_BATCH(batch, + (0/*rate_control_enable*/ << 31) | /*in CBR mode RateControlCounterEnable = enable*/ + (1 << 30) | /*ResetRateControlCounter*/ + (0 << 28) | /*RC Triggle Mode = Always Rate Control*/ + (4 << 24) | /*RC Stable Tolerance, middle level*/ + (0/*rate_control_enable*/ << 23) | /*RC Panic Enable*/ + (0 << 22) | /*QP mode, don't modfiy CBP*/ + (0 << 21) | /*MB Type Direct Conversion Enabled*/ + (0 << 20) | /*MB Type Skip Conversion Enabled*/ + (last_slice << 19) | /*IsLastSlice*/ + (0 << 18) | /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/ + (1 << 17) | /*HeaderPresentFlag*/ + (1 << 16) | /*SliceData PresentFlag*/ + (1 << 15) | /*TailPresentFlag*/ + (1 << 13) | /*RBSP NAL TYPE*/ + (0 << 12) ); /*CabacZeroWordInsertionEnable*/ + OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset); + OUT_BCS_BATCH(batch, + (maxQpN << 24) | /*Target QP - 24 is lowest QP*/ + (maxQpP << 16) | /*Target QP + 20 is highest QP*/ + (shrink << 8) | + (grow << 0)); + OUT_BCS_BATCH(batch, + (correct[5] << 20) | + (correct[4] << 16) | + (correct[3] << 12) | + (correct[2] << 8) | + (correct[1] << 4) | + (correct[0] << 0)); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + + +#ifdef MFC_SOFTWARE_HASWELL + +static int +gen8_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, + int qp,unsigned int *msg, + struct intel_encoder_context *encoder_context, + unsigned char target_mb_size, unsigned char max_mb_size, + struct intel_batchbuffer *batch) +{ + int len_in_dwords = 12; + unsigned int intra_msg; +#define INTRA_MSG_FLAG (1 << 13) +#define INTRA_MBTYPE_MASK (0x1F0000) + if (batch == NULL) + batch = encoder_context->base.batch; + + BEGIN_BCS_BATCH(batch, len_in_dwords); + + intra_msg = msg[0] & 0xC0FF; + intra_msg |= INTRA_MSG_FLAG; + intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8); + OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, + (0 << 24) | /* PackedMvNum, Debug*/ + (0 << 20) | /* No motion vector */ + (1 << 19) | /* CbpDcY */ + (1 << 18) | /* CbpDcU */ + (1 << 17) | /* CbpDcV */ + intra_msg); + + OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x); /* Code Block Pattern for Y*/ + OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */ + OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp); /* Last MB */ + + /*Stuff for Intra MB*/ + OUT_BCS_BATCH(batch, msg[1]); /* We using Intra16x16 no 4x4 predmode*/ + OUT_BCS_BATCH(batch, msg[2]); + OUT_BCS_BATCH(batch, msg[3]&0xFF); + + /*MaxSizeInWord and TargetSzieInWord*/ + OUT_BCS_BATCH(batch, (max_mb_size << 24) | + (target_mb_size << 16) ); + + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); + + return len_in_dwords; +} + +static int +gen8_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp, + unsigned int *msg, unsigned int offset, + struct intel_encoder_context *encoder_context, + unsigned char target_mb_size,unsigned char max_mb_size, int slice_type, + struct intel_batchbuffer *batch) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + int len_in_dwords = 12; + unsigned int inter_msg = 0; + if (batch == NULL) + batch = encoder_context->base.batch; + { +#define MSG_MV_OFFSET 4 + unsigned int *mv_ptr; + mv_ptr = msg + MSG_MV_OFFSET; + /* MV of VME output is based on 16 sub-blocks. So it is necessary + * to convert them to be compatible with the format of AVC_PAK + * command. + */ + if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) { + /* MV[0] and MV[2] are replicated */ + mv_ptr[4] = mv_ptr[0]; + mv_ptr[5] = mv_ptr[1]; + mv_ptr[2] = mv_ptr[8]; + mv_ptr[3] = mv_ptr[9]; + mv_ptr[6] = mv_ptr[8]; + mv_ptr[7] = mv_ptr[9]; + } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) { + /* MV[0] and MV[1] are replicated */ + mv_ptr[2] = mv_ptr[0]; + mv_ptr[3] = mv_ptr[1]; + mv_ptr[4] = mv_ptr[16]; + mv_ptr[5] = mv_ptr[17]; + mv_ptr[6] = mv_ptr[24]; + mv_ptr[7] = mv_ptr[25]; + } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) && + !(msg[1] & SUBMB_SHAPE_MASK)) { + /* Don't touch MV[0] or MV[1] */ + mv_ptr[2] = mv_ptr[8]; + mv_ptr[3] = mv_ptr[9]; + mv_ptr[4] = mv_ptr[16]; + mv_ptr[5] = mv_ptr[17]; + mv_ptr[6] = mv_ptr[24]; + mv_ptr[7] = mv_ptr[25]; + } + } + + BEGIN_BCS_BATCH(batch, len_in_dwords); + + OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2)); + + inter_msg = 32; + /* MV quantity */ + if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) { + if (msg[1] & SUBMB_SHAPE_MASK) + inter_msg = 128; + } + OUT_BCS_BATCH(batch, inter_msg); /* 32 MV*/ + OUT_BCS_BATCH(batch, offset); + inter_msg = msg[0] & (0x1F00FFFF); + inter_msg |= INTER_MV8; + inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17)); + if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) && + (msg[1] & SUBMB_SHAPE_MASK)) { + inter_msg |= INTER_MV32; + } + + OUT_BCS_BATCH(batch, inter_msg); + + OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/ + OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */ +#if 0 + if ( slice_type == SLICE_TYPE_B) { + OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp); /* Last MB */ + } else { + OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */ + } +#else + OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */ +#endif + + inter_msg = msg[1] >> 8; + /*Stuff for Inter MB*/ + OUT_BCS_BATCH(batch, inter_msg); + OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]); + OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]); + + /*MaxSizeInWord and TargetSzieInWord*/ + OUT_BCS_BATCH(batch, (max_mb_size << 24) | + (target_mb_size << 16) ); + + OUT_BCS_BATCH(batch, 0x0); + + ADVANCE_BCS_BATCH(batch); + + return len_in_dwords; +} + +#define AVC_INTRA_RDO_OFFSET 4 +#define AVC_INTER_RDO_OFFSET 10 +#define AVC_INTER_MSG_OFFSET 8 +#define AVC_INTER_MV_OFFSET 48 +#define AVC_RDO_MASK 0xFFFF + +static void +gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int slice_index, + struct intel_batchbuffer *slice_batch) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + struct gen6_vme_context *vme_context = encoder_context->vme_context; + VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; + unsigned int *msg = NULL, offset = 0; + unsigned char *msg_ptr = NULL; + int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; + int height_in_mbs = (mfc_context->surface_state.height + 15) / 16; + int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs); + int i,x,y; + int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta; + unsigned int rate_control_mode = encoder_context->rate_control_mode; + unsigned int tail_data[] = { 0x0, 0x0 }; + int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); + int is_intra = slice_type == SLICE_TYPE_I; + int qp_slice; + + qp_slice = qp; + if (rate_control_mode == VA_RC_CBR) { + qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; + if (encode_state->slice_header_index[slice_index] == 0) { + pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; + qp_slice = qp; + } + } + + /* only support for 8-bit pixel bit-depth */ + assert(pSequenceParameter->bit_depth_luma_minus8 == 0); + assert(pSequenceParameter->bit_depth_chroma_minus8 == 0); + assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52); + assert(qp >= 0 && qp < 52); + + gen8_mfc_avc_slice_state(ctx, + pPicParameter, + pSliceParameter, + encode_state, encoder_context, + (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch); + + if ( slice_index == 0) + intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); + + intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch); + + dri_bo_map(vme_context->vme_output.bo , 1); + msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual; + + if (is_intra) { + msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block); + } else { + msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block); + } + + for (i = pSliceParameter->macroblock_address; + i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) { + int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) ); + x = i % width_in_mbs; + y = i / width_in_mbs; + msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block); + + if (is_intra) { + assert(msg); + gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch); + } else { + int inter_rdo, intra_rdo; + inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK; + intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK; + offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET; + if (intra_rdo < inter_rdo) { + gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch); + } else { + msg += AVC_INTER_MSG_OFFSET; + gen8_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch); + } + } + } + + dri_bo_unmap(vme_context->vme_output.bo); + + if ( last_slice ) { + mfc_context->insert_object(ctx, encoder_context, + tail_data, 2, 8, + 2, 1, 1, 0, slice_batch); + } else { + mfc_context->insert_object(ctx, encoder_context, + tail_data, 1, 8, + 1, 1, 1, 0, slice_batch); + } +} + +static dri_bo * +gen8_mfc_avc_software_batchbuffer(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch; + dri_bo *batch_bo; + int i; + + batch = mfc_context->aux_batchbuffer; + batch_bo = batch->buffer; + for (i = 0; i < encode_state->num_slice_params_ext; i++) { + gen8_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch); + } + + intel_batchbuffer_align(batch, 8); + + BEGIN_BCS_BATCH(batch, 2); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END); + ADVANCE_BCS_BATCH(batch); + + dri_bo_reference(batch_bo); + intel_batchbuffer_free(batch); + mfc_context->aux_batchbuffer = NULL; + + return batch_bo; +} + +#else + +static void +gen8_mfc_batchbuffer_surfaces_input(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) + +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + + assert(vme_context->vme_output.bo); + mfc_context->buffer_suface_setup(ctx, + &mfc_context->gpe_context, + &vme_context->vme_output, + BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT), + SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT)); + assert(mfc_context->aux_batchbuffer_surface.bo); + mfc_context->buffer_suface_setup(ctx, + &mfc_context->gpe_context, + &mfc_context->aux_batchbuffer_surface, + BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER), + SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER)); +} + +static void +gen8_mfc_batchbuffer_surfaces_output(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) + +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + int width_in_mbs = pSequenceParameter->picture_width_in_mbs; + int height_in_mbs = pSequenceParameter->picture_height_in_mbs; + mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1; + mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */ + mfc_context->mfc_batchbuffer_surface.pitch = 16; + mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, + "MFC batchbuffer", + mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block, + 0x1000); + mfc_context->buffer_suface_setup(ctx, + &mfc_context->gpe_context, + &mfc_context->mfc_batchbuffer_surface, + BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER), + SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER)); +} + +static void +gen8_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + gen8_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context); + gen8_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context); +} + +static void +gen8_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + struct gen6_interface_descriptor_data *desc; + int i; + dri_bo *bo; + + bo = mfc_context->gpe_context.idrt.bo; + dri_bo_map(bo, 1); + assert(bo->virtual); + desc = bo->virtual; + + for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) { + struct i965_kernel *kernel; + + kernel = &mfc_context->gpe_context.kernels[i]; + assert(sizeof(*desc) == 32); + + /*Setup the descritor table*/ + memset(desc, 0, sizeof(*desc)); + desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6); + desc->desc2.sampler_count = 0; + desc->desc2.sampler_state_pointer = 0; + desc->desc3.binding_table_entry_count = 2; + desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5); + desc->desc4.constant_urb_entry_read_offset = 0; + desc->desc4.constant_urb_entry_read_length = 4; + + /*kernel start*/ + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0, + i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0), + kernel->bo); + desc++; + } + + dri_bo_unmap(bo); +} + +static void +gen8_mfc_batchbuffer_constant_setup(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + + (void)mfc_context; +} + +static void +gen8_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch, + int index, + int head_offset, + int batchbuffer_offset, + int head_size, + int tail_size, + int number_mb_cmds, + int first_object, + int last_object, + int last_slice, + int mb_x, + int mb_y, + int width_in_mbs, + int qp) +{ + BEGIN_BATCH(batch, 12); + + OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2)); + OUT_BATCH(batch, index); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + + /*inline data */ + OUT_BATCH(batch, head_offset); + OUT_BATCH(batch, batchbuffer_offset); + OUT_BATCH(batch, + head_size << 16 | + tail_size); + OUT_BATCH(batch, + number_mb_cmds << 16 | + first_object << 2 | + last_object << 1 | + last_slice); + OUT_BATCH(batch, + mb_y << 8 | + mb_x); + OUT_BATCH(batch, + qp << 16 | + width_in_mbs); + + ADVANCE_BATCH(batch); +} + +static void +gen8_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx, + struct intel_encoder_context *encoder_context, + VAEncSliceParameterBufferH264 *slice_param, + int head_offset, + unsigned short head_size, + unsigned short tail_size, + int batchbuffer_offset, + int qp, + int last_slice) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; + int total_mbs = slice_param->num_macroblocks; + int number_mb_cmds = 128; + int starting_mb = 0; + int last_object = 0; + int first_object = 1; + int i; + int mb_x, mb_y; + int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER; + + for (i = 0; i < total_mbs / number_mb_cmds; i++) { + last_object = (total_mbs - starting_mb) == number_mb_cmds; + mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs; + mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs; + assert(mb_x <= 255 && mb_y <= 255); + + starting_mb += number_mb_cmds; + + gen8_mfc_batchbuffer_emit_object_command(batch, + index, + head_offset, + batchbuffer_offset, + head_size, + tail_size, + number_mb_cmds, + first_object, + last_object, + last_slice, + mb_x, + mb_y, + width_in_mbs, + qp); + + if (first_object) { + head_offset += head_size; + batchbuffer_offset += head_size; + } + + if (last_object) { + head_offset += tail_size; + batchbuffer_offset += tail_size; + } + + batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD; + + first_object = 0; + } + + if (!last_object) { + last_object = 1; + number_mb_cmds = total_mbs % number_mb_cmds; + mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs; + mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs; + assert(mb_x <= 255 && mb_y <= 255); + starting_mb += number_mb_cmds; + + gen8_mfc_batchbuffer_emit_object_command(batch, + index, + head_offset, + batchbuffer_offset, + head_size, + tail_size, + number_mb_cmds, + first_object, + last_object, + last_slice, + mb_x, + mb_y, + width_in_mbs, + qp); + } +} + +/* + * return size in Owords (16bytes) + */ +static int +gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int slice_index, + int batchbuffer_offset) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer; + VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; + int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; + int height_in_mbs = (mfc_context->surface_state.height + 15) / 16; + int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs); + int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta; + unsigned int rate_control_mode = encoder_context->rate_control_mode; + unsigned int tail_data[] = { 0x0, 0x0 }; + long head_offset; + int old_used = intel_batchbuffer_used_size(slice_batch), used; + unsigned short head_size, tail_size; + int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); + int qp_slice; + + qp_slice = qp; + if (rate_control_mode == VA_RC_CBR) { + qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; + if (encode_state->slice_header_index[slice_index] == 0) { + pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; + qp_slice = qp; + } + } + + /* only support for 8-bit pixel bit-depth */ + assert(pSequenceParameter->bit_depth_luma_minus8 == 0); + assert(pSequenceParameter->bit_depth_chroma_minus8 == 0); + assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52); + assert(qp >= 0 && qp < 52); + + head_offset = old_used / 16; + gen8_mfc_avc_slice_state(ctx, + pPicParameter, + pSliceParameter, + encode_state, + encoder_context, + (rate_control_mode == VA_RC_CBR), + qp_slice, + slice_batch); + + if (slice_index == 0) + intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); + + intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch); + + intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */ + used = intel_batchbuffer_used_size(slice_batch); + head_size = (used - old_used) / 16; + old_used = used; + + /* tail */ + if (last_slice) { + mfc_context->insert_object(ctx, + encoder_context, + tail_data, + 2, + 8, + 2, + 1, + 1, + 0, + slice_batch); + } else { + mfc_context->insert_object(ctx, + encoder_context, + tail_data, + 1, + 8, + 1, + 1, + 1, + 0, + slice_batch); + } + + intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */ + used = intel_batchbuffer_used_size(slice_batch); + tail_size = (used - old_used) / 16; + + + gen8_mfc_avc_batchbuffer_slice_command(ctx, + encoder_context, + pSliceParameter, + head_offset, + head_size, + tail_size, + batchbuffer_offset, + qp, + last_slice); + + return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD; +} + +static void +gen8_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + struct intel_batchbuffer *batch = encoder_context->base.batch; + int i, size, offset = 0; + intel_batchbuffer_start_atomic(batch, 0x4000); + gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch); + + for ( i = 0; i < encode_state->num_slice_params_ext; i++) { + size = gen8_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset); + offset += size; + } + + intel_batchbuffer_end_atomic(batch); + intel_batchbuffer_flush(batch); +} + +static void +gen8_mfc_build_avc_batchbuffer(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + gen8_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context); + gen8_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context); + gen8_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context); + gen8_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context); +} + +static dri_bo * +gen8_mfc_avc_hardware_batchbuffer(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + + gen8_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context); + dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo); + + return mfc_context->mfc_batchbuffer_surface.bo; +} + +#endif + +static void +gen8_mfc_avc_pipeline_programing(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + dri_bo *slice_batch_bo; + + if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) { + fprintf(stderr, "Current VA driver don't support interlace mode!\n"); + assert(0); + return; + } + +#ifdef MFC_SOFTWARE_HASWELL + slice_batch_bo = gen8_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context); +#else + slice_batch_bo = gen8_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context); +#endif + + // begin programing + intel_batchbuffer_start_atomic_bcs(batch, 0x4000); + intel_batchbuffer_emit_mi_flush(batch); + + // picture level programing + gen8_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context); + + BEGIN_BCS_BATCH(batch, 3); + OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0)); + OUT_BCS_RELOC(batch, + slice_batch_bo, + I915_GEM_DOMAIN_COMMAND, 0, + 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); + + // end programing + intel_batchbuffer_end_atomic(batch); + + dri_bo_unreference(slice_batch_bo); +} + + +static VAStatus +gen8_mfc_avc_encode_picture(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + unsigned int rate_control_mode = encoder_context->rate_control_mode; + int current_frame_bits_size; + int sts; + + for (;;) { + gen8_mfc_init(ctx, encode_state, encoder_context); + intel_mfc_avc_prepare(ctx, encode_state, encoder_context); + /*Programing bcs pipeline*/ + gen8_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline + gen8_mfc_run(ctx, encode_state, encoder_context); + if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) { + gen8_mfc_stop(ctx, encode_state, encoder_context, ¤t_frame_bits_size); + sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size); + if (sts == BRC_NO_HRD_VIOLATION) { + intel_mfc_hrd_context_update(encode_state, mfc_context); + break; + } + else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) { + if (!mfc_context->hrd.violation_noted) { + fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow"); + mfc_context->hrd.violation_noted = 1; + } + return VA_STATUS_SUCCESS; + } + } else { + break; + } + } + + return VA_STATUS_SUCCESS; +} + +/* + * MPEG-2 + */ + +static const int +va_to_gen8_mpeg2_picture_type[3] = { + 1, /* I */ + 2, /* P */ + 3 /* B */ +}; + +static void +gen8_mfc_mpeg2_pic_state(VADriverContextP ctx, + struct intel_encoder_context *encoder_context, + struct encode_state *encode_state) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + VAEncPictureParameterBufferMPEG2 *pic_param; + int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; + int height_in_mbs = (mfc_context->surface_state.height + 15) / 16; + VAEncSliceParameterBufferMPEG2 *slice_param = NULL; + + assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer); + pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer; + slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer; + + BEGIN_BCS_BATCH(batch, 13); + OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2)); + OUT_BCS_BATCH(batch, + (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */ + (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */ + (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */ + (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */ + pic_param->picture_coding_extension.bits.intra_dc_precision << 14 | + pic_param->picture_coding_extension.bits.picture_structure << 12 | + pic_param->picture_coding_extension.bits.top_field_first << 11 | + pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 | + pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 | + pic_param->picture_coding_extension.bits.q_scale_type << 8 | + pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | + pic_param->picture_coding_extension.bits.alternate_scan << 6); + OUT_BCS_BATCH(batch, + 0 << 14 | /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */ + va_to_gen8_mpeg2_picture_type[pic_param->picture_type] << 9 | + 0); + OUT_BCS_BATCH(batch, + 1 << 31 | /* slice concealment */ + (height_in_mbs - 1) << 16 | + (width_in_mbs - 1)); + + if (slice_param && slice_param->quantiser_scale_code >= 14) + OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12)); + else + OUT_BCS_BATCH(batch, 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, + 0xFFF << 16 | /* InterMBMaxSize */ + 0xFFF << 0 | /* IntraMBMaxSize */ + 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) +{ + unsigned char intra_qm[64] = { + 8, 16, 19, 22, 26, 27, 29, 34, + 16, 16, 22, 24, 27, 29, 34, 37, + 19, 22, 26, 27, 29, 34, 34, 38, + 22, 22, 26, 27, 29, 34, 37, 40, + 22, 26, 27, 29, 32, 35, 40, 48, + 26, 27, 29, 32, 35, 40, 48, 58, + 26, 27, 29, 34, 38, 46, 56, 69, + 27, 29, 35, 38, 46, 56, 69, 83 + }; + + unsigned char non_intra_qm[64] = { + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16 + }; + + gen8_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context); + gen8_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context); +} + +static void +gen8_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) +{ + unsigned short intra_fqm[64] = { + 65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, + 65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d, + 65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23, + 65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26, + 65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e, + 65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38, + 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45, + 65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53, + }; + + unsigned short non_intra_fqm[64] = { + 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, + 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, + 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, + 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, + 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, + 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, + 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, + 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, + }; + + gen8_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context); + gen8_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context); +} + +static void +gen8_mfc_mpeg2_slicegroup_state(VADriverContextP ctx, + struct intel_encoder_context *encoder_context, + int x, int y, + int next_x, int next_y, + int is_fisrt_slice_group, + int is_last_slice_group, + int intra_slice, + int qp, + struct intel_batchbuffer *batch) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + + if (batch == NULL) + batch = encoder_context->base.batch; + + BEGIN_BCS_BATCH(batch, 8); + + OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2)); + OUT_BCS_BATCH(batch, + 0 << 31 | /* MbRateCtrlFlag */ + !!is_last_slice_group << 19 | /* IsLastSliceGrp */ + 1 << 17 | /* Insert Header before the first slice group data */ + 1 << 16 | /* SliceData PresentFlag: always 1 */ + 1 << 15 | /* TailPresentFlag: always 1 */ + 0 << 14 | /* FirstSliceHdrDisabled: slice header for each slice */ + !!intra_slice << 13 | /* IntraSlice */ + !!intra_slice << 12 | /* IntraSliceFlag */ + 0); + OUT_BCS_BATCH(batch, + next_y << 24 | + next_x << 16 | + y << 8 | + x << 0 | + 0); + OUT_BCS_BATCH(batch, qp); /* FIXME: SliceGroupQp */ + /* bitstream pointer is only loaded once for the first slice of a frame when + * LoadSlicePointerFlag is 0 + */ + OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset); + OUT_BCS_BATCH(batch, 0); /* FIXME: */ + OUT_BCS_BATCH(batch, 0); /* FIXME: CorrectPoints */ + OUT_BCS_BATCH(batch, 0); /* FIXME: CVxxx */ + + ADVANCE_BCS_BATCH(batch); +} + +static int +gen8_mfc_mpeg2_pak_object_intra(VADriverContextP ctx, + struct intel_encoder_context *encoder_context, + int x, int y, + int first_mb_in_slice, + int last_mb_in_slice, + int first_mb_in_slice_group, + int last_mb_in_slice_group, + int mb_type, + int qp_scale_code, + int coded_block_pattern, + unsigned char target_size_in_word, + unsigned char max_size_in_word, + struct intel_batchbuffer *batch) +{ + int len_in_dwords = 9; + + if (batch == NULL) + batch = encoder_context->base.batch; + + BEGIN_BCS_BATCH(batch, len_in_dwords); + + OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2)); + OUT_BCS_BATCH(batch, + 0 << 24 | /* PackedMvNum */ + 0 << 20 | /* MvFormat */ + 7 << 17 | /* CbpDcY/CbpDcU/CbpDcV */ + 0 << 15 | /* TransformFlag: frame DCT */ + 0 << 14 | /* FieldMbFlag */ + 1 << 13 | /* IntraMbFlag */ + mb_type << 8 | /* MbType: Intra */ + 0 << 2 | /* SkipMbFlag */ + 0 << 0 | /* InterMbMode */ + 0); + OUT_BCS_BATCH(batch, y << 16 | x); + OUT_BCS_BATCH(batch, + max_size_in_word << 24 | + target_size_in_word << 16 | + coded_block_pattern << 6 | /* CBP */ + 0); + OUT_BCS_BATCH(batch, + last_mb_in_slice << 31 | + first_mb_in_slice << 30 | + 0 << 27 | /* EnableCoeffClamp */ + last_mb_in_slice_group << 26 | + 0 << 25 | /* MbSkipConvDisable */ + first_mb_in_slice_group << 24 | + 0 << 16 | /* MvFieldSelect */ + qp_scale_code << 0 | + 0); + OUT_BCS_BATCH(batch, 0); /* MV[0][0] */ + OUT_BCS_BATCH(batch, 0); /* MV[1][0] */ + OUT_BCS_BATCH(batch, 0); /* MV[0][1] */ + OUT_BCS_BATCH(batch, 0); /* MV[1][1] */ + + ADVANCE_BCS_BATCH(batch); + + return len_in_dwords; +} + +/* Byte offset */ +#define MPEG2_INTER_MV_OFFSET 48 + +static struct _mv_ranges +{ + int low; /* in the unit of 1/2 pixel */ + int high; /* in the unit of 1/2 pixel */ +} mv_ranges[] = { + {0, 0}, + {-16, 15}, + {-32, 31}, + {-64, 63}, + {-128, 127}, + {-256, 255}, + {-512, 511}, + {-1024, 1023}, + {-2048, 2047}, + {-4096, 4095} +}; + +static int +mpeg2_motion_vector(int mv, int pos, int display_max, int f_code) +{ + if (mv + pos * 16 * 2 < 0 || + mv + (pos + 1) * 16 * 2 > display_max * 2) + mv = 0; + + if (f_code > 0 && f_code < 10) { + if (mv < mv_ranges[f_code].low) + mv = mv_ranges[f_code].low; + + if (mv > mv_ranges[f_code].high) + mv = mv_ranges[f_code].high; + } + + return mv; +} + +static int +gen8_mfc_mpeg2_pak_object_inter(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + unsigned int *msg, + int width_in_mbs, int height_in_mbs, + int x, int y, + int first_mb_in_slice, + int last_mb_in_slice, + int first_mb_in_slice_group, + int last_mb_in_slice_group, + int qp_scale_code, + unsigned char target_size_in_word, + unsigned char max_size_in_word, + struct intel_batchbuffer *batch) +{ + VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer; + int len_in_dwords = 9; + short *mvptr, mvx0, mvy0, mvx1, mvy1; + + if (batch == NULL) + batch = encoder_context->base.batch; + + mvptr = (short *)((unsigned char *)msg + MPEG2_INTER_MV_OFFSET);; + mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]); + mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]); + mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]); + mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]); + + BEGIN_BCS_BATCH(batch, len_in_dwords); + + OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2)); + OUT_BCS_BATCH(batch, + 2 << 24 | /* PackedMvNum */ + 7 << 20 | /* MvFormat */ + 7 << 17 | /* CbpDcY/CbpDcU/CbpDcV */ + 0 << 15 | /* TransformFlag: frame DCT */ + 0 << 14 | /* FieldMbFlag */ + 0 << 13 | /* IntraMbFlag */ + 1 << 8 | /* MbType: Frame-based */ + 0 << 2 | /* SkipMbFlag */ + 0 << 0 | /* InterMbMode */ + 0); + OUT_BCS_BATCH(batch, y << 16 | x); + OUT_BCS_BATCH(batch, + max_size_in_word << 24 | + target_size_in_word << 16 | + 0x3f << 6 | /* CBP */ + 0); + OUT_BCS_BATCH(batch, + last_mb_in_slice << 31 | + first_mb_in_slice << 30 | + 0 << 27 | /* EnableCoeffClamp */ + last_mb_in_slice_group << 26 | + 0 << 25 | /* MbSkipConvDisable */ + first_mb_in_slice_group << 24 | + 0 << 16 | /* MvFieldSelect */ + qp_scale_code << 0 | + 0); + + OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16); /* MV[0][0] */ + OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16); /* MV[1][0] */ + OUT_BCS_BATCH(batch, 0); /* MV[0][1] */ + OUT_BCS_BATCH(batch, 0); /* MV[1][1] */ + + ADVANCE_BCS_BATCH(batch); + + return len_in_dwords; +} + +static void +intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + struct intel_batchbuffer *slice_batch) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS); + + if (encode_state->packed_header_data[idx]) { + VAEncPackedHeaderParameterBuffer *param = NULL; + unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer; + unsigned int length_in_bits; + + assert(encode_state->packed_header_param[idx]); + param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer; + length_in_bits = param->bit_length; + + mfc_context->insert_object(ctx, + encoder_context, + header_data, + ALIGN(length_in_bits, 32) >> 5, + length_in_bits & 0x1f, + 5, /* FIXME: check it */ + 0, + 0, + 0, /* Needn't insert emulation bytes for MPEG-2 */ + slice_batch); + } + + idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS); + + if (encode_state->packed_header_data[idx]) { + VAEncPackedHeaderParameterBuffer *param = NULL; + unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer; + unsigned int length_in_bits; + + assert(encode_state->packed_header_param[idx]); + param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer; + length_in_bits = param->bit_length; + + mfc_context->insert_object(ctx, + encoder_context, + header_data, + ALIGN(length_in_bits, 32) >> 5, + length_in_bits & 0x1f, + 5, /* FIXME: check it */ + 0, + 0, + 0, /* Needn't insert emulation bytes for MPEG-2 */ + slice_batch); + } +} + +static void +gen8_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int slice_index, + VAEncSliceParameterBufferMPEG2 *next_slice_group_param, + struct intel_batchbuffer *slice_batch) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; + VAEncSliceParameterBufferMPEG2 *slice_param = NULL; + unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0}; + unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0}; + int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; + int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; + int i, j; + int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos; + unsigned int *msg = NULL; + unsigned char *msg_ptr = NULL; + + slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer; + h_start_pos = slice_param->macroblock_address % width_in_mbs; + v_start_pos = slice_param->macroblock_address / width_in_mbs; + assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs); + + dri_bo_map(vme_context->vme_output.bo , 0); + msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual; + + if (next_slice_group_param) { + h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs; + v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs; + } else { + h_next_start_pos = 0; + v_next_start_pos = height_in_mbs; + } + + gen8_mfc_mpeg2_slicegroup_state(ctx, + encoder_context, + h_start_pos, + v_start_pos, + h_next_start_pos, + v_next_start_pos, + slice_index == 0, + next_slice_group_param == NULL, + slice_param->is_intra_slice, + slice_param->quantiser_scale_code, + slice_batch); + + if (slice_index == 0) + intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); + + /* Insert '00' to make sure the header is valid */ + mfc_context->insert_object(ctx, + encoder_context, + (unsigned int*)section_delimiter, + 1, + 8, /* 8bits in the last DWORD */ + 1, /* 1 byte */ + 1, + 0, + 0, + slice_batch); + + for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) { + /* PAK for each macroblocks */ + for (j = 0; j < slice_param->num_macroblocks; j++) { + int h_pos = (slice_param->macroblock_address + j) % width_in_mbs; + int v_pos = (slice_param->macroblock_address + j) / width_in_mbs; + int first_mb_in_slice = (j == 0); + int last_mb_in_slice = (j == slice_param->num_macroblocks - 1); + int first_mb_in_slice_group = (i == 0 && j == 0); + int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 && + j == slice_param->num_macroblocks - 1); + + msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block); + + if (slice_param->is_intra_slice) { + gen8_mfc_mpeg2_pak_object_intra(ctx, + encoder_context, + h_pos, v_pos, + first_mb_in_slice, + last_mb_in_slice, + first_mb_in_slice_group, + last_mb_in_slice_group, + 0x1a, + slice_param->quantiser_scale_code, + 0x3f, + 0, + 0xff, + slice_batch); + } else { + int inter_rdo, intra_rdo; + inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK; + intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK; + + if (intra_rdo < inter_rdo) + gen8_mfc_mpeg2_pak_object_intra(ctx, + encoder_context, + h_pos, v_pos, + first_mb_in_slice, + last_mb_in_slice, + first_mb_in_slice_group, + last_mb_in_slice_group, + 0x1a, + slice_param->quantiser_scale_code, + 0x3f, + 0, + 0xff, + slice_batch); + else + gen8_mfc_mpeg2_pak_object_inter(ctx, + encode_state, + encoder_context, + msg, + width_in_mbs, height_in_mbs, + h_pos, v_pos, + first_mb_in_slice, + last_mb_in_slice, + first_mb_in_slice_group, + last_mb_in_slice_group, + slice_param->quantiser_scale_code, + 0, + 0xff, + slice_batch); + } + } + + slice_param++; + } + + dri_bo_unmap(vme_context->vme_output.bo); + + /* tail data */ + if (next_slice_group_param == NULL) { /* end of a picture */ + mfc_context->insert_object(ctx, + encoder_context, + (unsigned int *)tail_delimiter, + 2, + 8, /* 8bits in the last DWORD */ + 5, /* 5 bytes */ + 1, + 1, + 0, + slice_batch); + } else { /* end of a lsice group */ + mfc_context->insert_object(ctx, + encoder_context, + (unsigned int *)section_delimiter, + 1, + 8, /* 8bits in the last DWORD */ + 1, /* 1 byte */ + 1, + 1, + 0, + slice_batch); + } +} + +/* + * A batch buffer for all slices, including slice state, + * slice insert object and slice pak object commands + * + */ +static dri_bo * +gen8_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch; + VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL; + dri_bo *batch_bo; + int i; + + batch = mfc_context->aux_batchbuffer; + batch_bo = batch->buffer; + + for (i = 0; i < encode_state->num_slice_params_ext; i++) { + if (i == encode_state->num_slice_params_ext - 1) + next_slice_group_param = NULL; + else + next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer; + + gen8_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch); + } + + intel_batchbuffer_align(batch, 8); + + BEGIN_BCS_BATCH(batch, 2); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END); + ADVANCE_BCS_BATCH(batch); + + dri_bo_reference(batch_bo); + intel_batchbuffer_free(batch); + mfc_context->aux_batchbuffer = NULL; + + return batch_bo; +} + +static void +gen8_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + + mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context); + mfc_context->set_surface_state(ctx, encoder_context); + mfc_context->ind_obj_base_addr_state(ctx, encoder_context); + gen8_mfc_pipe_buf_addr_state(ctx, encoder_context); + gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context); + gen8_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state); + gen8_mfc_mpeg2_qm_state(ctx, encoder_context); + gen8_mfc_mpeg2_fqm_state(ctx, encoder_context); +} + +static void +gen8_mfc_mpeg2_pipeline_programing(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + dri_bo *slice_batch_bo; + + slice_batch_bo = gen8_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context); + + // begin programing + intel_batchbuffer_start_atomic_bcs(batch, 0x4000); + intel_batchbuffer_emit_mi_flush(batch); + + // picture level programing + gen8_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context); + + BEGIN_BCS_BATCH(batch, 4); + OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0)); + OUT_BCS_RELOC(batch, + slice_batch_bo, + I915_GEM_DOMAIN_COMMAND, 0, + 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); + + // end programing + intel_batchbuffer_end_atomic(batch); + + dri_bo_unreference(slice_batch_bo); +} + +static VAStatus +intel_mfc_mpeg2_prepare(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + struct object_surface *obj_surface; + struct object_buffer *obj_buffer; + struct i965_coded_buffer_segment *coded_buffer_segment; + VAStatus vaStatus = VA_STATUS_SUCCESS; + dri_bo *bo; + int i; + + /* reconstructed surface */ + obj_surface = encode_state->reconstructed_object; + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); + mfc_context->pre_deblocking_output.bo = obj_surface->bo; + dri_bo_reference(mfc_context->pre_deblocking_output.bo); + mfc_context->surface_state.width = obj_surface->orig_width; + mfc_context->surface_state.height = obj_surface->orig_height; + mfc_context->surface_state.w_pitch = obj_surface->width; + mfc_context->surface_state.h_pitch = obj_surface->height; + + /* forward reference */ + obj_surface = encode_state->reference_objects[0]; + + if (obj_surface && obj_surface->bo) { + mfc_context->reference_surfaces[0].bo = obj_surface->bo; + dri_bo_reference(mfc_context->reference_surfaces[0].bo); + } else + mfc_context->reference_surfaces[0].bo = NULL; + + /* backward reference */ + obj_surface = encode_state->reference_objects[1]; + + if (obj_surface && obj_surface->bo) { + mfc_context->reference_surfaces[1].bo = obj_surface->bo; + dri_bo_reference(mfc_context->reference_surfaces[1].bo); + } else { + mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo; + + if (mfc_context->reference_surfaces[1].bo) + dri_bo_reference(mfc_context->reference_surfaces[1].bo); + } + + for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) { + mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo; + + if (mfc_context->reference_surfaces[i].bo) + dri_bo_reference(mfc_context->reference_surfaces[i].bo); + } + + /* input YUV surface */ + obj_surface = encode_state->input_yuv_object; + mfc_context->uncompressed_picture_source.bo = obj_surface->bo; + dri_bo_reference(mfc_context->uncompressed_picture_source.bo); + + /* coded buffer */ + obj_buffer = encode_state->coded_buf_object; + bo = obj_buffer->buffer_store->bo; + mfc_context->mfc_indirect_pak_bse_object.bo = bo; + mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE; + mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000); + dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo); + + /* set the internal flag to 0 to indicate the coded size is unknown */ + dri_bo_map(bo, 1); + coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual; + coded_buffer_segment->mapped = 0; + coded_buffer_segment->codec = encoder_context->codec; + dri_bo_unmap(bo); + + return vaStatus; +} + +static VAStatus +gen8_mfc_mpeg2_encode_picture(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + gen8_mfc_init(ctx, encode_state, encoder_context); + intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context); + /*Programing bcs pipeline*/ + gen8_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context); + gen8_mfc_run(ctx, encode_state, encoder_context); + + return VA_STATUS_SUCCESS; +} + +static void +gen8_mfc_context_destroy(void *context) +{ + struct gen6_mfc_context *mfc_context = context; + int i; + + dri_bo_unreference(mfc_context->post_deblocking_output.bo); + mfc_context->post_deblocking_output.bo = NULL; + + dri_bo_unreference(mfc_context->pre_deblocking_output.bo); + mfc_context->pre_deblocking_output.bo = NULL; + + dri_bo_unreference(mfc_context->uncompressed_picture_source.bo); + mfc_context->uncompressed_picture_source.bo = NULL; + + dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); + mfc_context->mfc_indirect_pak_bse_object.bo = NULL; + + for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){ + dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo); + mfc_context->direct_mv_buffers[i].bo = NULL; + } + + dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo); + mfc_context->intra_row_store_scratch_buffer.bo = NULL; + + dri_bo_unreference(mfc_context->macroblock_status_buffer.bo); + mfc_context->macroblock_status_buffer.bo = NULL; + + dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo); + mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL; + + dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo); + mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL; + + + for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){ + dri_bo_unreference(mfc_context->reference_surfaces[i].bo); + mfc_context->reference_surfaces[i].bo = NULL; + } + + i965_gpe_context_destroy(&mfc_context->gpe_context); + + dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo); + mfc_context->mfc_batchbuffer_surface.bo = NULL; + + dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo); + mfc_context->aux_batchbuffer_surface.bo = NULL; + + if (mfc_context->aux_batchbuffer) + intel_batchbuffer_free(mfc_context->aux_batchbuffer); + + mfc_context->aux_batchbuffer = NULL; + + free(mfc_context); +} + +static VAStatus gen8_mfc_pipeline(VADriverContextP ctx, + VAProfile profile, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + VAStatus vaStatus; + + switch (profile) { + case VAProfileH264ConstrainedBaseline: + case VAProfileH264Main: + case VAProfileH264High: + case VAProfileH264MultiviewHigh: + case VAProfileH264StereoHigh: + vaStatus = gen8_mfc_avc_encode_picture(ctx, encode_state, encoder_context); + break; + + /* FIXME: add for other profile */ + case VAProfileMPEG2Simple: + case VAProfileMPEG2Main: + vaStatus = gen8_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context); + break; + + default: + vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE; + break; + } + + return vaStatus; +} + +Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context) +{ + struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context)); + + mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; + + mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS; + mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data); + + mfc_context->gpe_context.curbe.length = 32 * 4; + + mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1; + mfc_context->gpe_context.vfe_state.num_urb_entries = 16; + mfc_context->gpe_context.vfe_state.gpgpu_mode = 0; + mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1; + mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1; + + i965_gpe_load_kernels(ctx, + &mfc_context->gpe_context, + gen8_mfc_kernels, + NUM_MFC_KERNEL); + + mfc_context->pipe_mode_select = gen8_mfc_pipe_mode_select; + mfc_context->set_surface_state = gen8_mfc_surface_state; + mfc_context->ind_obj_base_addr_state = gen8_mfc_ind_obj_base_addr_state; + mfc_context->avc_img_state = gen8_mfc_avc_img_state; + mfc_context->avc_qm_state = gen8_mfc_avc_qm_state; + mfc_context->avc_fqm_state = gen8_mfc_avc_fqm_state; + mfc_context->insert_object = gen8_mfc_avc_insert_object; + mfc_context->buffer_suface_setup = gen8_gpe_buffer_suface_setup; + + encoder_context->mfc_context = mfc_context; + encoder_context->mfc_context_destroy = gen8_mfc_context_destroy; + encoder_context->mfc_pipeline = gen8_mfc_pipeline; + encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare; + + return True; +} diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c new file mode 100644 index 0000000..b482846 --- /dev/null +++ b/src/gen8_mfd.c @@ -0,0 +1,3190 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Xiang Haihao <haihao.xiang@intel.com> + * Zhao Yakui <yakui.zhao@intel.com> + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <math.h> +#include <va/va_dec_jpeg.h> +#include <va/va_dec_vp8.h> + +#include "intel_batchbuffer.h" +#include "intel_driver.h" + +#include "i965_defines.h" +#include "i965_drv_video.h" +#include "i965_decoder_utils.h" + +#include "gen7_mfd.h" +#include "intel_media.h" + +#define B0_STEP_REV 2 +#define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV) + +static const uint32_t zigzag_direct[64] = { + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63 +}; + +static void +gen8_mfd_init_avc_surface(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + struct object_surface *obj_surface) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + GenAvcSurface *gen7_avc_surface = obj_surface->private_data; + int width_in_mbs, height_in_mbs; + + obj_surface->free_private_data = gen_free_avc_surface; + width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1; + height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */ + + if (!gen7_avc_surface) { + gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1); + gen7_avc_surface->frame_store_id = -1; + assert((obj_surface->size & 0x3f) == 0); + obj_surface->private_data = gen7_avc_surface; + } + + /* DMV buffers now relate to the whole frame, irrespective of + field coding modes */ + if (gen7_avc_surface->dmv_top == NULL) { + gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr, + "direct mv w/r buffer", + width_in_mbs * height_in_mbs * 128, + 0x1000); + assert(gen7_avc_surface->dmv_top); + } +} + +static void +gen8_mfd_pipe_mode_select(VADriverContextP ctx, + struct decode_state *decode_state, + int standard_select, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + + assert(standard_select == MFX_FORMAT_MPEG2 || + standard_select == MFX_FORMAT_AVC || + standard_select == MFX_FORMAT_VC1 || + standard_select == MFX_FORMAT_JPEG || + standard_select == MFX_FORMAT_VP8); + + BEGIN_BCS_BATCH(batch, 5); + OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2)); + OUT_BCS_BATCH(batch, + (MFX_LONG_MODE << 17) | /* Currently only support long format */ + (MFD_MODE_VLD << 15) | /* VLD mode */ + (0 << 10) | /* disable Stream-Out */ + (gen7_mfd_context->post_deblocking_output.valid << 9) | /* Post Deblocking Output */ + (gen7_mfd_context->pre_deblocking_output.valid << 8) | /* Pre Deblocking Output */ + (0 << 5) | /* not in stitch mode */ + (MFX_CODEC_DECODE << 4) | /* decoding mode */ + (standard_select << 0)); + OUT_BCS_BATCH(batch, + (0 << 4) | /* terminate if AVC motion and POC table error occurs */ + (0 << 3) | /* terminate if AVC mbdata error occurs */ + (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */ + (0 << 1) | + (0 << 0)); + OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ + OUT_BCS_BATCH(batch, 0); /* reserved */ + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_surface_state(VADriverContextP ctx, + struct decode_state *decode_state, + int standard_select, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + struct object_surface *obj_surface = decode_state->render_object; + unsigned int y_cb_offset; + unsigned int y_cr_offset; + unsigned int surface_format; + + assert(obj_surface); + + y_cb_offset = obj_surface->y_cb_offset; + y_cr_offset = obj_surface->y_cr_offset; + + surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ? + MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; + + BEGIN_BCS_BATCH(batch, 6); + OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, + ((obj_surface->orig_height - 1) << 18) | + ((obj_surface->orig_width - 1) << 4)); + OUT_BCS_BATCH(batch, + (surface_format << 28) | /* 420 planar YUV surface */ + ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */ + (0 << 22) | /* surface object control state, ignored */ + ((obj_surface->width - 1) << 3) | /* pitch */ + (0 << 2) | /* must be 0 */ + (1 << 1) | /* must be tiled */ + (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */ + OUT_BCS_BATCH(batch, + (0 << 16) | /* X offset for U(Cb), must be 0 */ + (y_cb_offset << 0)); /* Y offset for U(Cb) */ + OUT_BCS_BATCH(batch, + (0 << 16) | /* X offset for V(Cr), must be 0 */ + (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */ + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx, + struct decode_state *decode_state, + int standard_select, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + int i; + + BEGIN_BCS_BATCH(batch, 61); + OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2)); + /* Pre-deblock 1-3 */ + if (gen7_mfd_context->pre_deblocking_output.valid) + OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + else + OUT_BCS_BATCH(batch, 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + /* Post-debloing 4-6 */ + if (gen7_mfd_context->post_deblocking_output.valid) + OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + else + OUT_BCS_BATCH(batch, 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* uncompressed-video & stream out 7-12 */ + OUT_BCS_BATCH(batch, 0); /* ignore for decoding */ + OUT_BCS_BATCH(batch, 0); /* ignore for decoding */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* intra row-store scratch 13-15 */ + if (gen7_mfd_context->intra_row_store_scratch_buffer.valid) + OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + else + OUT_BCS_BATCH(batch, 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + /* deblocking-filter-row-store 16-18 */ + if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid) + OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + else + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* DW 19..50 */ + for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) { + struct object_surface *obj_surface; + + if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID && + gen7_mfd_context->reference_surface[i].obj_surface && + gen7_mfd_context->reference_surface[i].obj_surface->bo) { + obj_surface = gen7_mfd_context->reference_surface[i].obj_surface; + + OUT_BCS_RELOC(batch, obj_surface->bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + } else { + OUT_BCS_BATCH(batch, 0); + } + + OUT_BCS_BATCH(batch, 0); + } + + /* reference property 51 */ + OUT_BCS_BATCH(batch, 0); + + /* Macroblock status & ILDB 52-57 */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* the second Macroblock status 58-60 */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx, + dri_bo *slice_data_bo, + int standard_select, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + + BEGIN_BCS_BATCH(batch, 26); + OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2)); + /* MFX In BS 1-5 */ + OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + /* Upper bound 4-5 */ + OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */ + OUT_BCS_BATCH(batch, 0); + + /* MFX indirect MV 6-10 */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* MFX IT_COFF 11-15 */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* MFX IT_DBLK 16-20 */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* MFX PAK_BSE object for encoder 21-25 */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx, + struct decode_state *decode_state, + int standard_select, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + + BEGIN_BCS_BATCH(batch, 10); + OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2)); + + if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid) + OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + else + OUT_BCS_BATCH(batch, 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + /* MPR Row Store Scratch buffer 4-6 */ + if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid) + OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + else + OUT_BCS_BATCH(batch, 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* Bitplane 7-9 */ + if (gen7_mfd_context->bitplane_read_buffer.valid) + OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + else + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_qm_state(VADriverContextP ctx, + int qm_type, + unsigned char *qm, + int qm_length, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + unsigned int qm_buffer[16]; + + assert(qm_length <= 16 * 4); + memcpy(qm_buffer, qm, qm_length); + + BEGIN_BCS_BATCH(batch, 18); + OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2)); + OUT_BCS_BATCH(batch, qm_type << 0); + intel_batchbuffer_data(batch, qm_buffer, 16 * 4); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_avc_img_state(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + int img_struct; + int mbaff_frame_flag; + unsigned int width_in_mbs, height_in_mbs; + VAPictureParameterBufferH264 *pic_param; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer; + assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID)); + + if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD) + img_struct = 1; + else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD) + img_struct = 3; + else + img_struct = 0; + + if ((img_struct & 0x1) == 0x1) { + assert(pic_param->pic_fields.bits.field_pic_flag == 0x1); + } else { + assert(pic_param->pic_fields.bits.field_pic_flag == 0x0); + } + + if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */ + assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0); + assert(pic_param->pic_fields.bits.field_pic_flag == 0); + } else { + assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */ + } + + mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag && + !pic_param->pic_fields.bits.field_pic_flag); + + width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1; + height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */ + + /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */ + assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */ + pic_param->seq_fields.bits.chroma_format_idc == 1); /* 4:2:0 */ + assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */ + + BEGIN_BCS_BATCH(batch, 17); + OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2)); + OUT_BCS_BATCH(batch, + (width_in_mbs * height_in_mbs - 1)); + OUT_BCS_BATCH(batch, + ((height_in_mbs - 1) << 16) | + ((width_in_mbs - 1) << 0)); + OUT_BCS_BATCH(batch, + ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) | + ((pic_param->chroma_qp_index_offset & 0x1f) << 16) | + (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */ + (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */ + (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */ + (pic_param->pic_fields.bits.weighted_bipred_idc << 10) | + (img_struct << 8)); + OUT_BCS_BATCH(batch, + (pic_param->seq_fields.bits.chroma_format_idc << 10) | + (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) | + ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) | + (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) | + (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) | + (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) | + (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) | + (mbaff_frame_flag << 1) | + (pic_param->pic_fields.bits.field_pic_flag << 0)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_avc_qm_state(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + VAIQMatrixBufferH264 *iq_matrix; + VAPictureParameterBufferH264 *pic_param; + + if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) + iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer; + else + iq_matrix = &gen7_mfd_context->iq_matrix.h264; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer; + + gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context); + gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context); + + if (pic_param->pic_fields.bits.transform_8x8_mode_flag) { + gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context); + gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context); + } +} + +static inline void +gen8_mfd_avc_picid_state(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + gen75_send_avc_picid_state(gen7_mfd_context->base.batch, + gen7_mfd_context->reference_surface); +} + +static void +gen8_mfd_avc_directmode_state(VADriverContextP ctx, + struct decode_state *decode_state, + VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *slice_param, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + struct object_surface *obj_surface; + GenAvcSurface *gen7_avc_surface; + VAPictureH264 *va_pic; + int i; + + BEGIN_BCS_BATCH(batch, 71); + OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2)); + + /* reference surfaces 0..15 */ + for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) { + if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID && + gen7_mfd_context->reference_surface[i].obj_surface && + gen7_mfd_context->reference_surface[i].obj_surface->private_data) { + + obj_surface = gen7_mfd_context->reference_surface[i].obj_surface; + gen7_avc_surface = obj_surface->private_data; + + OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + OUT_BCS_BATCH(batch, 0); + } else { + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + } + } + + OUT_BCS_BATCH(batch, 0); + + /* the current decoding frame/field */ + va_pic = &pic_param->CurrPic; + obj_surface = decode_state->render_object; + assert(obj_surface->bo && obj_surface->private_data); + gen7_avc_surface = obj_surface->private_data; + + OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* POC List */ + for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) { + obj_surface = gen7_mfd_context->reference_surface[i].obj_surface; + + if (obj_surface) { + const VAPictureH264 * const va_pic = avc_find_picture( + obj_surface->base.id, pic_param->ReferenceFrames, + ARRAY_ELEMS(pic_param->ReferenceFrames)); + + assert(va_pic != NULL); + OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt); + OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt); + } else { + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + } + } + + va_pic = &pic_param->CurrPic; + OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt); + OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt); + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_avc_phantom_slice_first(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *next_slice_param, + struct gen7_mfd_context *gen7_mfd_context) +{ + gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch); +} + +static void +gen8_mfd_avc_slice_state(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *slice_param, + VASliceParameterBufferH264 *next_slice_param, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1; + int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; + int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos; + int num_ref_idx_l0, num_ref_idx_l1; + int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag && + pic_param->seq_fields.bits.mb_adaptive_frame_field_flag); + int first_mb_in_slice = 0, first_mb_in_next_slice = 0; + int slice_type; + + if (slice_param->slice_type == SLICE_TYPE_I || + slice_param->slice_type == SLICE_TYPE_SI) { + slice_type = SLICE_TYPE_I; + } else if (slice_param->slice_type == SLICE_TYPE_P || + slice_param->slice_type == SLICE_TYPE_SP) { + slice_type = SLICE_TYPE_P; + } else { + assert(slice_param->slice_type == SLICE_TYPE_B); + slice_type = SLICE_TYPE_B; + } + + if (slice_type == SLICE_TYPE_I) { + assert(slice_param->num_ref_idx_l0_active_minus1 == 0); + assert(slice_param->num_ref_idx_l1_active_minus1 == 0); + num_ref_idx_l0 = 0; + num_ref_idx_l1 = 0; + } else if (slice_type == SLICE_TYPE_P) { + assert(slice_param->num_ref_idx_l1_active_minus1 == 0); + num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1; + num_ref_idx_l1 = 0; + } else { + num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1; + num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1; + } + + first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture; + slice_hor_pos = first_mb_in_slice % width_in_mbs; + slice_ver_pos = first_mb_in_slice / width_in_mbs; + + if (next_slice_param) { + first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture; + next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; + next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs; + } else { + next_slice_hor_pos = 0; + next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag); + } + + BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */ + OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2)); + OUT_BCS_BATCH(batch, slice_type); + OUT_BCS_BATCH(batch, + (num_ref_idx_l1 << 24) | + (num_ref_idx_l0 << 16) | + (slice_param->chroma_log2_weight_denom << 8) | + (slice_param->luma_log2_weight_denom << 0)); + OUT_BCS_BATCH(batch, + (slice_param->direct_spatial_mv_pred_flag << 29) | + (slice_param->disable_deblocking_filter_idc << 27) | + (slice_param->cabac_init_idc << 24) | + ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) | + ((slice_param->slice_beta_offset_div2 & 0xf) << 8) | + ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0)); + OUT_BCS_BATCH(batch, + (slice_ver_pos << 24) | + (slice_hor_pos << 16) | + (first_mb_in_slice << 0)); + OUT_BCS_BATCH(batch, + (next_slice_ver_pos << 16) | + (next_slice_hor_pos << 0)); + OUT_BCS_BATCH(batch, + (next_slice_param == NULL) << 19); /* last slice flag */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} + +static inline void +gen8_mfd_avc_ref_idx_state(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *slice_param, + struct gen7_mfd_context *gen7_mfd_context) +{ + gen6_send_avc_ref_idx_state( + gen7_mfd_context->base.batch, + slice_param, + gen7_mfd_context->reference_surface + ); +} + +static void +gen8_mfd_avc_weightoffset_state(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *slice_param, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + int i, j, num_weight_offset_table = 0; + short weightoffsets[32 * 6]; + + if ((slice_param->slice_type == SLICE_TYPE_P || + slice_param->slice_type == SLICE_TYPE_SP) && + (pic_param->pic_fields.bits.weighted_pred_flag == 1)) { + num_weight_offset_table = 1; + } + + if ((slice_param->slice_type == SLICE_TYPE_B) && + (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) { + num_weight_offset_table = 2; + } + + for (i = 0; i < num_weight_offset_table; i++) { + BEGIN_BCS_BATCH(batch, 98); + OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2)); + OUT_BCS_BATCH(batch, i); + + if (i == 0) { + for (j = 0; j < 32; j++) { + weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j]; + weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j]; + weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0]; + weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0]; + weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1]; + weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1]; + } + } else { + for (j = 0; j < 32; j++) { + weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j]; + weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j]; + weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0]; + weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0]; + weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1]; + weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1]; + } + } + + intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets)); + ADVANCE_BCS_BATCH(batch); + } +} + +static void +gen8_mfd_avc_bsd_object(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *slice_param, + dri_bo *slice_data_bo, + VASliceParameterBufferH264 *next_slice_param, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo, + slice_param, + pic_param->pic_fields.bits.entropy_coding_mode_flag); + + /* the input bitsteam format on GEN7 differs from GEN6 */ + BEGIN_BCS_BATCH(batch, 6); + OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2)); + OUT_BCS_BATCH(batch, + (slice_param->slice_data_size)); + OUT_BCS_BATCH(batch, slice_param->slice_data_offset); + OUT_BCS_BATCH(batch, + (0 << 31) | + (0 << 14) | + (0 << 12) | + (0 << 10) | + (0 << 8)); + OUT_BCS_BATCH(batch, + ((slice_data_bit_offset >> 3) << 16) | + (1 << 7) | + (0 << 5) | + (0 << 4) | + ((next_slice_param == NULL) << 3) | /* LastSlice Flag */ + (slice_data_bit_offset & 0x7)); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} + +static inline void +gen8_mfd_avc_context_init( + VADriverContextP ctx, + struct gen7_mfd_context *gen7_mfd_context +) +{ + /* Initialize flat scaling lists */ + avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264); +} + +static void +gen8_mfd_avc_decode_init(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + VAPictureParameterBufferH264 *pic_param; + VASliceParameterBufferH264 *slice_param; + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct object_surface *obj_surface; + dri_bo *bo; + int i, j, enable_avc_ildb = 0; + unsigned int width_in_mbs, height_in_mbs; + + for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) { + assert(decode_state->slice_params && decode_state->slice_params[j]->buffer); + slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer; + + for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) { + assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL); + assert((slice_param->slice_type == SLICE_TYPE_I) || + (slice_param->slice_type == SLICE_TYPE_SI) || + (slice_param->slice_type == SLICE_TYPE_P) || + (slice_param->slice_type == SLICE_TYPE_SP) || + (slice_param->slice_type == SLICE_TYPE_B)); + + if (slice_param->disable_deblocking_filter_idc != 1) { + enable_avc_ildb = 1; + break; + } + + slice_param++; + } + } + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer; + gen75_update_avc_frame_store_index(ctx, decode_state, pic_param, + gen7_mfd_context->reference_surface); + width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1; + height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; + assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */ + assert(height_in_mbs > 0 && height_in_mbs <= 256); + + /* Current decoded picture */ + obj_surface = decode_state->render_object; + if (pic_param->pic_fields.bits.reference_pic_flag) + obj_surface->flags |= SURFACE_REFERENCED; + else + obj_surface->flags &= ~SURFACE_REFERENCED; + + avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param); + gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface); + + dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo); + gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo; + dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo); + gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb; + + dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo); + gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo; + dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo); + gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb; + + dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "intra row store", + width_in_mbs * 64, + 0x1000); + assert(bo); + gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo; + gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1; + + dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "deblocking filter row store", + width_in_mbs * 64 * 4, + 0x1000); + assert(bo); + gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo; + gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1; + + dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "bsd mpc row store", + width_in_mbs * 64 * 2, + 0x1000); + assert(bo); + gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo; + gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1; + + dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "mpr row store", + width_in_mbs * 64 * 2, + 0x1000); + assert(bo); + gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo; + gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1; + + gen7_mfd_context->bitplane_read_buffer.valid = 0; +} + +static void +gen8_mfd_avc_decode_picture(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + VAPictureParameterBufferH264 *pic_param; + VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param; + dri_bo *slice_data_bo; + int i, j; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer; + gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context); + + intel_batchbuffer_start_atomic_bcs(batch, 0x1000); + intel_batchbuffer_emit_mi_flush(batch); + gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context); + gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context); + gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context); + gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context); + gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context); + gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context); + gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context); + + for (j = 0; j < decode_state->num_slice_params; j++) { + assert(decode_state->slice_params && decode_state->slice_params[j]->buffer); + slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer; + slice_data_bo = decode_state->slice_datas[j]->bo; + gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context); + + if (j == decode_state->num_slice_params - 1) + next_slice_group_param = NULL; + else + next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer; + + if (j == 0 && slice_param->first_mb_in_slice) + gen8_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context); + + for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) { + assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL); + assert((slice_param->slice_type == SLICE_TYPE_I) || + (slice_param->slice_type == SLICE_TYPE_SI) || + (slice_param->slice_type == SLICE_TYPE_P) || + (slice_param->slice_type == SLICE_TYPE_SP) || + (slice_param->slice_type == SLICE_TYPE_B)); + + if (i < decode_state->slice_params[j]->num_elements - 1) + next_slice_param = slice_param + 1; + else + next_slice_param = next_slice_group_param; + + gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context); + gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context); + gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context); + gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context); + gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context); + slice_param++; + } + } + + intel_batchbuffer_end_atomic(batch); + intel_batchbuffer_flush(batch); +} + +static void +gen8_mfd_mpeg2_decode_init(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + VAPictureParameterBufferMPEG2 *pic_param; + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct object_surface *obj_surface; + dri_bo *bo; + unsigned int width_in_mbs; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer; + width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16; + + mpeg2_set_reference_surfaces( + ctx, + gen7_mfd_context->reference_surface, + decode_state, + pic_param + ); + + /* Current decoded picture */ + obj_surface = decode_state->render_object; + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); + + dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo); + gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo; + dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo); + gen7_mfd_context->pre_deblocking_output.valid = 1; + + dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "bsd mpc row store", + width_in_mbs * 96, + 0x1000); + assert(bo); + gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo; + gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1; + + gen7_mfd_context->post_deblocking_output.valid = 0; + gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0; + gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0; + gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0; + gen7_mfd_context->bitplane_read_buffer.valid = 0; +} + +static void +gen8_mfd_mpeg2_pic_state(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + VAPictureParameterBufferMPEG2 *pic_param; + unsigned int slice_concealment_disable_bit = 0; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer; + + slice_concealment_disable_bit = 1; + + BEGIN_BCS_BATCH(batch, 13); + OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2)); + OUT_BCS_BATCH(batch, + (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */ + ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */ + ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */ + ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */ + pic_param->picture_coding_extension.bits.intra_dc_precision << 14 | + pic_param->picture_coding_extension.bits.picture_structure << 12 | + pic_param->picture_coding_extension.bits.top_field_first << 11 | + pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 | + pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 | + pic_param->picture_coding_extension.bits.q_scale_type << 8 | + pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | + pic_param->picture_coding_extension.bits.alternate_scan << 6); + OUT_BCS_BATCH(batch, + pic_param->picture_coding_type << 9); + OUT_BCS_BATCH(batch, + (slice_concealment_disable_bit << 31) | + ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 | + ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_mpeg2_qm_state(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2; + int i, j; + + /* Update internal QM state */ + if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) { + VAIQMatrixBufferMPEG2 * const iq_matrix = + (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer; + + if (gen_iq_matrix->load_intra_quantiser_matrix == -1 || + iq_matrix->load_intra_quantiser_matrix) { + gen_iq_matrix->load_intra_quantiser_matrix = + iq_matrix->load_intra_quantiser_matrix; + if (iq_matrix->load_intra_quantiser_matrix) { + for (j = 0; j < 64; j++) + gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] = + iq_matrix->intra_quantiser_matrix[j]; + } + } + + if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 || + iq_matrix->load_non_intra_quantiser_matrix) { + gen_iq_matrix->load_non_intra_quantiser_matrix = + iq_matrix->load_non_intra_quantiser_matrix; + if (iq_matrix->load_non_intra_quantiser_matrix) { + for (j = 0; j < 64; j++) + gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] = + iq_matrix->non_intra_quantiser_matrix[j]; + } + } + } + + /* Commit QM state to HW */ + for (i = 0; i < 2; i++) { + unsigned char *qm = NULL; + int qm_type; + + if (i == 0) { + if (gen_iq_matrix->load_intra_quantiser_matrix) { + qm = gen_iq_matrix->intra_quantiser_matrix; + qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX; + } + } else { + if (gen_iq_matrix->load_non_intra_quantiser_matrix) { + qm = gen_iq_matrix->non_intra_quantiser_matrix; + qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX; + } + } + + if (!qm) + continue; + + gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context); + } +} + +static void +gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx, + VAPictureParameterBufferMPEG2 *pic_param, + VASliceParameterBufferMPEG2 *slice_param, + VASliceParameterBufferMPEG2 *next_slice_param, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16; + int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0; + + if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD || + pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD) + is_field_pic = 1; + is_field_pic_wa = is_field_pic && + gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0; + + vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa); + hpos0 = slice_param->slice_horizontal_position; + + if (next_slice_param == NULL) { + vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic); + hpos1 = 0; + } else { + vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa); + hpos1 = next_slice_param->slice_horizontal_position; + } + + mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0); + + BEGIN_BCS_BATCH(batch, 5); + OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2)); + OUT_BCS_BATCH(batch, + slice_param->slice_data_size - (slice_param->macroblock_offset >> 3)); + OUT_BCS_BATCH(batch, + slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3)); + OUT_BCS_BATCH(batch, + hpos0 << 24 | + vpos0 << 16 | + mb_count << 8 | + (next_slice_param == NULL) << 5 | + (next_slice_param == NULL) << 3 | + (slice_param->macroblock_offset & 0x7)); + OUT_BCS_BATCH(batch, + (slice_param->quantiser_scale_code << 24) | + (vpos1 << 8 | hpos1)); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + VAPictureParameterBufferMPEG2 *pic_param; + VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param; + dri_bo *slice_data_bo; + int i, j; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer; + + gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context); + intel_batchbuffer_start_atomic_bcs(batch, 0x1000); + intel_batchbuffer_emit_mi_flush(batch); + gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context); + gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context); + gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context); + gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context); + gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context); + gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context); + + if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0) + gen7_mfd_context->wa_mpeg2_slice_vertical_position = + mpeg2_wa_slice_vertical_position(decode_state, pic_param); + + for (j = 0; j < decode_state->num_slice_params; j++) { + assert(decode_state->slice_params && decode_state->slice_params[j]->buffer); + slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer; + slice_data_bo = decode_state->slice_datas[j]->bo; + gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context); + + if (j == decode_state->num_slice_params - 1) + next_slice_group_param = NULL; + else + next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer; + + for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) { + assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL); + + if (i < decode_state->slice_params[j]->num_elements - 1) + next_slice_param = slice_param + 1; + else + next_slice_param = next_slice_group_param; + + gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context); + slice_param++; + } + } + + intel_batchbuffer_end_atomic(batch); + intel_batchbuffer_flush(batch); +} + +static const int va_to_gen7_vc1_pic_type[5] = { + GEN7_VC1_I_PICTURE, + GEN7_VC1_P_PICTURE, + GEN7_VC1_B_PICTURE, + GEN7_VC1_BI_PICTURE, + GEN7_VC1_P_PICTURE, +}; + +static const int va_to_gen7_vc1_mv[4] = { + 1, /* 1-MV */ + 2, /* 1-MV half-pel */ + 3, /* 1-MV half-pef bilinear */ + 0, /* Mixed MV */ +}; + +static const int b_picture_scale_factor[21] = { + 128, 85, 170, 64, 192, + 51, 102, 153, 204, 43, + 215, 37, 74, 111, 148, + 185, 222, 32, 96, 160, + 224, +}; + +static const int va_to_gen7_vc1_condover[3] = { + 0, + 2, + 3 +}; + +static const int va_to_gen7_vc1_profile[4] = { + GEN7_VC1_SIMPLE_PROFILE, + GEN7_VC1_MAIN_PROFILE, + GEN7_VC1_RESERVED_PROFILE, + GEN7_VC1_ADVANCED_PROFILE +}; + +static void +gen8_mfd_free_vc1_surface(void **data) +{ + struct gen7_vc1_surface *gen7_vc1_surface = *data; + + if (!gen7_vc1_surface) + return; + + dri_bo_unreference(gen7_vc1_surface->dmv); + free(gen7_vc1_surface); + *data = NULL; +} + +static void +gen8_mfd_init_vc1_surface(VADriverContextP ctx, + VAPictureParameterBufferVC1 *pic_param, + struct object_surface *obj_surface) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data; + int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16; + int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16; + + obj_surface->free_private_data = gen8_mfd_free_vc1_surface; + + if (!gen7_vc1_surface) { + gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1); + assert((obj_surface->size & 0x3f) == 0); + obj_surface->private_data = gen7_vc1_surface; + } + + gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type; + + if (gen7_vc1_surface->dmv == NULL) { + gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr, + "direct mv w/r buffer", + width_in_mbs * height_in_mbs * 64, + 0x1000); + } +} + +static void +gen8_mfd_vc1_decode_init(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + VAPictureParameterBufferVC1 *pic_param; + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct object_surface *obj_surface; + dri_bo *bo; + int width_in_mbs; + int picture_type; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer; + width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16; + picture_type = pic_param->picture_fields.bits.picture_type; + + intel_update_vc1_frame_store_index(ctx, + decode_state, + pic_param, + gen7_mfd_context->reference_surface); + + /* Current decoded picture */ + obj_surface = decode_state->render_object; + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); + gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface); + + dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo); + gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo; + dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo); + gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter; + + dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo); + gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo; + dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo); + gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter; + + dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "intra row store", + width_in_mbs * 64, + 0x1000); + assert(bo); + gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo; + gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1; + + dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "deblocking filter row store", + width_in_mbs * 7 * 64, + 0x1000); + assert(bo); + gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo; + gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1; + + dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "bsd mpc row store", + width_in_mbs * 96, + 0x1000); + assert(bo); + gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo; + gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1; + + gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0; + + gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value; + dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo); + + if (gen7_mfd_context->bitplane_read_buffer.valid) { + int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16; + int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16; + int bitplane_width = ALIGN(width_in_mbs, 2) / 2; + int src_w, src_h; + uint8_t *src = NULL, *dst = NULL; + + assert(decode_state->bit_plane->buffer); + src = decode_state->bit_plane->buffer; + + bo = dri_bo_alloc(i965->intel.bufmgr, + "VC-1 Bitplane", + bitplane_width * height_in_mbs, + 0x1000); + assert(bo); + gen7_mfd_context->bitplane_read_buffer.bo = bo; + + dri_bo_map(bo, True); + assert(bo->virtual); + dst = bo->virtual; + + for (src_h = 0; src_h < height_in_mbs; src_h++) { + for(src_w = 0; src_w < width_in_mbs; src_w++) { + int src_index, dst_index; + int src_shift; + uint8_t src_value; + + src_index = (src_h * width_in_mbs + src_w) / 2; + src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4; + src_value = ((src[src_index] >> src_shift) & 0xf); + + if (picture_type == GEN7_VC1_SKIPPED_PICTURE){ + src_value |= 0x2; + } + + dst_index = src_w / 2; + dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4)); + } + + if (src_w & 1) + dst[src_w / 2] >>= 4; + + dst += bitplane_width; + } + + dri_bo_unmap(bo); + } else + gen7_mfd_context->bitplane_read_buffer.bo = NULL; +} + +static void +gen8_mfd_vc1_pic_state(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + VAPictureParameterBufferVC1 *pic_param; + struct object_surface *obj_surface; + int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq; + int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel; + int unified_mv_mode; + int ref_field_pic_polarity = 0; + int scale_factor = 0; + int trans_ac_y = 0; + int dmv_surface_valid = 0; + int brfd = 0; + int fcm = 0; + int picture_type; + int profile; + int overlap; + int interpolation_mode = 0; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer; + + profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile]; + dquant = pic_param->pic_quantizer_fields.bits.dquant; + dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame; + dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile; + dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge; + dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge; + dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level; + alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer; + + if (dquant == 0) { + alt_pquant_config = 0; + alt_pquant_edge_mask = 0; + } else if (dquant == 2) { + alt_pquant_config = 1; + alt_pquant_edge_mask = 0xf; + } else { + assert(dquant == 1); + if (dquantfrm == 0) { + alt_pquant_config = 0; + alt_pquant_edge_mask = 0; + alt_pq = 0; + } else { + assert(dquantfrm == 1); + alt_pquant_config = 1; + + switch (dqprofile) { + case 3: + if (dqbilevel == 0) { + alt_pquant_config = 2; + alt_pquant_edge_mask = 0; + } else { + assert(dqbilevel == 1); + alt_pquant_config = 3; + alt_pquant_edge_mask = 0; + } + break; + + case 0: + alt_pquant_edge_mask = 0xf; + break; + + case 1: + if (dqdbedge == 3) + alt_pquant_edge_mask = 0x9; + else + alt_pquant_edge_mask = (0x3 << dqdbedge); + + break; + + case 2: + alt_pquant_edge_mask = (0x1 << dqsbedge); + break; + + default: + assert(0); + } + } + } + + if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) { + assert(pic_param->mv_fields.bits.mv_mode2 < 4); + unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2]; + } else { + assert(pic_param->mv_fields.bits.mv_mode < 4); + unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode]; + } + + if (pic_param->sequence_fields.bits.interlace == 1 && + pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */ + /* FIXME: calculate reference field picture polarity */ + assert(0); + ref_field_pic_polarity = 0; + } + + if (pic_param->b_picture_fraction < 21) + scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction]; + + picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type]; + + if (profile == GEN7_VC1_ADVANCED_PROFILE && + picture_type == GEN7_VC1_I_PICTURE) + picture_type = GEN7_VC1_BI_PICTURE; + + if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */ + trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2; + else { + trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1; + + /* + * 8.3.6.2.1 Transform Type Selection + * If variable-sized transform coding is not enabled, + * then the 8x8 transform shall be used for all blocks. + * it is also MFX_VC1_PIC_STATE requirement. + */ + if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) { + pic_param->transform_fields.bits.mb_level_transform_type_flag = 1; + pic_param->transform_fields.bits.frame_level_transform_type = 0; + } + } + + if (picture_type == GEN7_VC1_B_PICTURE) { + struct gen7_vc1_surface *gen7_vc1_surface = NULL; + + obj_surface = decode_state->reference_objects[1]; + + if (obj_surface) + gen7_vc1_surface = obj_surface->private_data; + + if (!gen7_vc1_surface || + (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE || + va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE)) + dmv_surface_valid = 0; + else + dmv_surface_valid = 1; + } + + assert(pic_param->picture_fields.bits.frame_coding_mode < 3); + + if (pic_param->picture_fields.bits.frame_coding_mode < 2) + fcm = pic_param->picture_fields.bits.frame_coding_mode; + else { + if (pic_param->picture_fields.bits.top_field_first) + fcm = 2; + else + fcm = 3; + } + + if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */ + brfd = pic_param->reference_fields.bits.reference_distance; + brfd = (scale_factor * brfd) >> 8; + brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1; + + if (brfd < 0) + brfd = 0; + } + + overlap = 0; + if (profile != GEN7_VC1_ADVANCED_PROFILE){ + if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 && + pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) { + overlap = 1; + } + }else { + if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE && + pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){ + overlap = 1; + } + if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE || + pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){ + if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){ + overlap = 1; + } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 || + va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) { + overlap = 1; + } + } + } + + assert(pic_param->conditional_overlap_flag < 3); + assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */ + + if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear || + (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation && + pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear)) + interpolation_mode = 9; /* Half-pel bilinear */ + else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel || + (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation && + pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel)) + interpolation_mode = 1; /* Half-pel bicubic */ + else + interpolation_mode = 0; /* Quarter-pel bicubic */ + + BEGIN_BCS_BATCH(batch, 6); + OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2)); + OUT_BCS_BATCH(batch, + (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) | + ((ALIGN(pic_param->coded_width, 16) / 16) - 1)); + OUT_BCS_BATCH(batch, + ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 | + dmv_surface_valid << 15 | + (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */ + pic_param->rounding_control << 13 | + pic_param->sequence_fields.bits.syncmarker << 12 | + interpolation_mode << 8 | + 0 << 7 | /* FIXME: scale up or down ??? */ + pic_param->range_reduction_frame << 6 | + pic_param->entrypoint_fields.bits.loopfilter << 5 | + overlap << 4 | + !pic_param->picture_fields.bits.is_first_field << 3 | + (pic_param->sequence_fields.bits.profile == 3) << 0); + OUT_BCS_BATCH(batch, + va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 | + picture_type << 26 | + fcm << 24 | + alt_pq << 16 | + pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 | + scale_factor << 0); + OUT_BCS_BATCH(batch, + unified_mv_mode << 28 | + pic_param->mv_fields.bits.four_mv_switch << 27 | + pic_param->fast_uvmc_flag << 26 | + ref_field_pic_polarity << 25 | + pic_param->reference_fields.bits.num_reference_pictures << 24 | + pic_param->reference_fields.bits.reference_distance << 20 | + pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */ + pic_param->mv_fields.bits.extended_dmv_range << 10 | + pic_param->mv_fields.bits.extended_mv_range << 8 | + alt_pquant_edge_mask << 4 | + alt_pquant_config << 2 | + pic_param->pic_quantizer_fields.bits.half_qp << 1 | + pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0); + OUT_BCS_BATCH(batch, + !!pic_param->bitplane_present.value << 31 | + !pic_param->bitplane_present.flags.bp_forward_mb << 30 | + !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 | + !pic_param->bitplane_present.flags.bp_skip_mb << 28 | + !pic_param->bitplane_present.flags.bp_direct_mb << 27 | + !pic_param->bitplane_present.flags.bp_overflags << 26 | + !pic_param->bitplane_present.flags.bp_ac_pred << 25 | + !pic_param->bitplane_present.flags.bp_field_tx << 24 | + pic_param->mv_fields.bits.mv_table << 20 | + pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 | + pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 | + pic_param->transform_fields.bits.frame_level_transform_type << 12 | + pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 | + pic_param->mb_mode_table << 8 | + trans_ac_y << 6 | + pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 | + pic_param->transform_fields.bits.intra_transform_dc_table << 3 | + pic_param->cbp_table << 0); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + VAPictureParameterBufferVC1 *pic_param; + int intensitycomp_single; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer; + intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation); + + BEGIN_BCS_BATCH(batch, 6); + OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2)); + OUT_BCS_BATCH(batch, + 0 << 14 | /* FIXME: double ??? */ + 0 << 12 | + intensitycomp_single << 10 | + intensitycomp_single << 8 | + 0 << 4 | /* FIXME: interlace mode */ + 0); + OUT_BCS_BATCH(batch, + pic_param->luma_shift << 16 | + pic_param->luma_scale << 0); /* FIXME: Luma Scaling */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_vc1_directmode_state(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + struct object_surface *obj_surface; + dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL; + + obj_surface = decode_state->render_object; + + if (obj_surface && obj_surface->private_data) { + dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv; + } + + obj_surface = decode_state->reference_objects[1]; + + if (obj_surface && obj_surface->private_data) { + dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv; + } + + BEGIN_BCS_BATCH(batch, 7); + OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2)); + + if (dmv_write_buffer) + OUT_BCS_RELOC(batch, dmv_write_buffer, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + else + OUT_BCS_BATCH(batch, 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + if (dmv_read_buffer) + OUT_BCS_RELOC(batch, dmv_read_buffer, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + else + OUT_BCS_BATCH(batch, 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + +static int +gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile) +{ + int out_slice_data_bit_offset; + int slice_header_size = in_slice_data_bit_offset / 8; + int i, j; + + if (profile != 3) + out_slice_data_bit_offset = in_slice_data_bit_offset; + else { + for (i = 0, j = 0; i < slice_header_size; i++, j++) { + if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) { + i++, j += 2; + } + } + + out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8; + } + + return out_slice_data_bit_offset; +} + +static void +gen8_mfd_vc1_bsd_object(VADriverContextP ctx, + VAPictureParameterBufferVC1 *pic_param, + VASliceParameterBufferVC1 *slice_param, + VASliceParameterBufferVC1 *next_slice_param, + dri_bo *slice_data_bo, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + int next_slice_start_vert_pos; + int macroblock_offset; + uint8_t *slice_data = NULL; + + dri_bo_map(slice_data_bo, 0); + slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset); + macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data, + slice_param->macroblock_offset, + pic_param->sequence_fields.bits.profile); + dri_bo_unmap(slice_data_bo); + + if (next_slice_param) + next_slice_start_vert_pos = next_slice_param->slice_vertical_position; + else + next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16; + + BEGIN_BCS_BATCH(batch, 5); + OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2)); + OUT_BCS_BATCH(batch, + slice_param->slice_data_size - (macroblock_offset >> 3)); + OUT_BCS_BATCH(batch, + slice_param->slice_data_offset + (macroblock_offset >> 3)); + OUT_BCS_BATCH(batch, + slice_param->slice_vertical_position << 16 | + next_slice_start_vert_pos << 0); + OUT_BCS_BATCH(batch, + (macroblock_offset & 0x7)); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_vc1_decode_picture(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + VAPictureParameterBufferVC1 *pic_param; + VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param; + dri_bo *slice_data_bo; + int i, j; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer; + + gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context); + intel_batchbuffer_start_atomic_bcs(batch, 0x1000); + intel_batchbuffer_emit_mi_flush(batch); + gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context); + gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context); + gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context); + gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context); + gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context); + gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context); + gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context); + + for (j = 0; j < decode_state->num_slice_params; j++) { + assert(decode_state->slice_params && decode_state->slice_params[j]->buffer); + slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer; + slice_data_bo = decode_state->slice_datas[j]->bo; + gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context); + + if (j == decode_state->num_slice_params - 1) + next_slice_group_param = NULL; + else + next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer; + + for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) { + assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL); + + if (i < decode_state->slice_params[j]->num_elements - 1) + next_slice_param = slice_param + 1; + else + next_slice_param = next_slice_group_param; + + gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context); + slice_param++; + } + } + + intel_batchbuffer_end_atomic(batch); + intel_batchbuffer_flush(batch); +} + +static void +gen8_mfd_jpeg_decode_init(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct object_surface *obj_surface; + VAPictureParameterBufferJPEGBaseline *pic_param; + int subsampling = SUBSAMPLE_YUV420; + int fourcc = VA_FOURCC_IMC3; + + pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer; + + if (pic_param->num_components == 1) + subsampling = SUBSAMPLE_YUV400; + else if (pic_param->num_components == 3) { + int h1 = pic_param->components[0].h_sampling_factor; + int h2 = pic_param->components[1].h_sampling_factor; + int h3 = pic_param->components[2].h_sampling_factor; + int v1 = pic_param->components[0].v_sampling_factor; + int v2 = pic_param->components[1].v_sampling_factor; + int v3 = pic_param->components[2].v_sampling_factor; + + if (h1 == 2 && h2 == 1 && h3 == 1 && + v1 == 2 && v2 == 1 && v3 == 1) { + subsampling = SUBSAMPLE_YUV420; + fourcc = VA_FOURCC_IMC3; + } else if (h1 == 2 && h2 == 1 && h3 == 1 && + v1 == 1 && v2 == 1 && v3 == 1) { + subsampling = SUBSAMPLE_YUV422H; + fourcc = VA_FOURCC_422H; + } else if (h1 == 1 && h2 == 1 && h3 == 1 && + v1 == 1 && v2 == 1 && v3 == 1) { + subsampling = SUBSAMPLE_YUV444; + fourcc = VA_FOURCC_444P; + } else if (h1 == 4 && h2 == 1 && h3 == 1 && + v1 == 1 && v2 == 1 && v3 == 1) { + subsampling = SUBSAMPLE_YUV411; + fourcc = VA_FOURCC_411P; + } else if (h1 == 1 && h2 == 1 && h3 == 1 && + v1 == 2 && v2 == 1 && v3 == 1) { + subsampling = SUBSAMPLE_YUV422V; + fourcc = VA_FOURCC_422V; + } else if (h1 == 2 && h2 == 1 && h3 == 1 && + v1 == 2 && v2 == 2 && v3 == 2) { + subsampling = SUBSAMPLE_YUV422H; + fourcc = VA_FOURCC_422H; + } else if (h2 == 2 && h2 == 2 && h3 == 2 && + v1 == 2 && v2 == 1 && v3 == 1) { + subsampling = SUBSAMPLE_YUV422V; + fourcc = VA_FOURCC_422V; + } else + assert(0); + } + else { + assert(0); + } + + /* Current decoded picture */ + obj_surface = decode_state->render_object; + i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling); + + dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo); + gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo; + dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo); + gen7_mfd_context->pre_deblocking_output.valid = 1; + + gen7_mfd_context->post_deblocking_output.bo = NULL; + gen7_mfd_context->post_deblocking_output.valid = 0; + + gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL; + gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0; + + gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL; + gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0; + + gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL; + gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0; + + gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL; + gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0; + + gen7_mfd_context->bitplane_read_buffer.bo = NULL; + gen7_mfd_context->bitplane_read_buffer.valid = 0; +} + +static const int va_to_gen7_jpeg_rotation[4] = { + GEN7_JPEG_ROTATION_0, + GEN7_JPEG_ROTATION_90, + GEN7_JPEG_ROTATION_180, + GEN7_JPEG_ROTATION_270 +}; + +static void +gen8_mfd_jpeg_pic_state(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + VAPictureParameterBufferJPEGBaseline *pic_param; + int chroma_type = GEN7_YUV420; + int frame_width_in_blks; + int frame_height_in_blks; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer; + + if (pic_param->num_components == 1) + chroma_type = GEN7_YUV400; + else if (pic_param->num_components == 3) { + int h1 = pic_param->components[0].h_sampling_factor; + int h2 = pic_param->components[1].h_sampling_factor; + int h3 = pic_param->components[2].h_sampling_factor; + int v1 = pic_param->components[0].v_sampling_factor; + int v2 = pic_param->components[1].v_sampling_factor; + int v3 = pic_param->components[2].v_sampling_factor; + + if (h1 == 2 && h2 == 1 && h3 == 1 && + v1 == 2 && v2 == 1 && v3 == 1) + chroma_type = GEN7_YUV420; + else if (h1 == 2 && h2 == 1 && h3 == 1 && + v1 == 1 && v2 == 1 && v3 == 1) + chroma_type = GEN7_YUV422H_2Y; + else if (h1 == 1 && h2 == 1 && h3 == 1 && + v1 == 1 && v2 == 1 && v3 == 1) + chroma_type = GEN7_YUV444; + else if (h1 == 4 && h2 == 1 && h3 == 1 && + v1 == 1 && v2 == 1 && v3 == 1) + chroma_type = GEN7_YUV411; + else if (h1 == 1 && h2 == 1 && h3 == 1 && + v1 == 2 && v2 == 1 && v3 == 1) + chroma_type = GEN7_YUV422V_2Y; + else if (h1 == 2 && h2 == 1 && h3 == 1 && + v1 == 2 && v2 == 2 && v3 == 2) + chroma_type = GEN7_YUV422H_4Y; + else if (h2 == 2 && h2 == 2 && h3 == 2 && + v1 == 2 && v2 == 1 && v3 == 1) + chroma_type = GEN7_YUV422V_4Y; + else + assert(0); + } + + if (chroma_type == GEN7_YUV400 || + chroma_type == GEN7_YUV444 || + chroma_type == GEN7_YUV422V_2Y) { + frame_width_in_blks = ((pic_param->picture_width + 7) / 8); + frame_height_in_blks = ((pic_param->picture_height + 7) / 8); + } else if (chroma_type == GEN7_YUV411) { + frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4; + frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4; + } else { + frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2; + frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2; + } + + BEGIN_BCS_BATCH(batch, 3); + OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2)); + OUT_BCS_BATCH(batch, + (va_to_gen7_jpeg_rotation[0] << 4) | /* without rotation */ + (chroma_type << 0)); + OUT_BCS_BATCH(batch, + ((frame_height_in_blks - 1) << 16) | /* FrameHeightInBlks */ + ((frame_width_in_blks - 1) << 0)); /* FrameWidthInBlks */ + ADVANCE_BCS_BATCH(batch); +} + +static const int va_to_gen7_jpeg_hufftable[2] = { + MFX_HUFFTABLE_ID_Y, + MFX_HUFFTABLE_ID_UV +}; + +static void +gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context, + int num_tables) +{ + VAHuffmanTableBufferJPEGBaseline *huffman_table; + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + int index; + + if (!decode_state->huffman_table || !decode_state->huffman_table->buffer) + return; + + huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer; + + for (index = 0; index < num_tables; index++) { + int id = va_to_gen7_jpeg_hufftable[index]; + if (!huffman_table->load_huffman_table[index]) + continue; + BEGIN_BCS_BATCH(batch, 53); + OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2)); + OUT_BCS_BATCH(batch, id); + intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12); + intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12); + intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16); + intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164); + ADVANCE_BCS_BATCH(batch); + } +} + +static const int va_to_gen7_jpeg_qm[5] = { + -1, + MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX, + MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX, + MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX, + MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX +}; + +static void +gen8_mfd_jpeg_qm_state(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + VAPictureParameterBufferJPEGBaseline *pic_param; + VAIQMatrixBufferJPEGBaseline *iq_matrix; + int index; + + if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer) + return; + + iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer; + pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer; + + assert(pic_param->num_components <= 3); + + for (index = 0; index < pic_param->num_components; index++) { + int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1; + int qm_type; + unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector]; + unsigned char raster_qm[64]; + int j; + + if (id > 4 || id < 1) + continue; + + if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector]) + continue; + + qm_type = va_to_gen7_jpeg_qm[id]; + + for (j = 0; j < 64; j++) + raster_qm[zigzag_direct[j]] = qm[j]; + + gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context); + } +} + +static void +gen8_mfd_jpeg_bsd_object(VADriverContextP ctx, + VAPictureParameterBufferJPEGBaseline *pic_param, + VASliceParameterBufferJPEGBaseline *slice_param, + VASliceParameterBufferJPEGBaseline *next_slice_param, + dri_bo *slice_data_bo, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + int scan_component_mask = 0; + int i; + + assert(slice_param->num_components > 0); + assert(slice_param->num_components < 4); + assert(slice_param->num_components <= pic_param->num_components); + + for (i = 0; i < slice_param->num_components; i++) { + switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) { + case 1: + scan_component_mask |= (1 << 0); + break; + case 2: + scan_component_mask |= (1 << 1); + break; + case 3: + scan_component_mask |= (1 << 2); + break; + default: + assert(0); + break; + } + } + + BEGIN_BCS_BATCH(batch, 6); + OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2)); + OUT_BCS_BATCH(batch, + slice_param->slice_data_size); + OUT_BCS_BATCH(batch, + slice_param->slice_data_offset); + OUT_BCS_BATCH(batch, + slice_param->slice_horizontal_position << 16 | + slice_param->slice_vertical_position << 0); + OUT_BCS_BATCH(batch, + ((slice_param->num_components != 1) << 30) | /* interleaved */ + (scan_component_mask << 27) | /* scan components */ + (0 << 26) | /* disable interrupt allowed */ + (slice_param->num_mcus << 0)); /* MCU count */ + OUT_BCS_BATCH(batch, + (slice_param->restart_interval << 0)); /* RestartInterval */ + ADVANCE_BCS_BATCH(batch); +} + +/* Workaround for JPEG decoding on Ivybridge */ +#ifdef JPEG_WA + +static struct { + int width; + int height; + unsigned char data[32]; + int data_size; + int data_bit_offset; + int qp; +} gen7_jpeg_wa_clip = { + 16, + 16, + { + 0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c, + 0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00 + }, + 14, + 40, + 28, +}; + +static void +gen8_jpeg_wa_init(VADriverContextP ctx, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + VAStatus status; + struct object_surface *obj_surface; + + if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE) + i965_DestroySurfaces(ctx, + &gen7_mfd_context->jpeg_wa_surface_id, + 1); + + status = i965_CreateSurfaces(ctx, + gen7_jpeg_wa_clip.width, + gen7_jpeg_wa_clip.height, + VA_RT_FORMAT_YUV420, + 1, + &gen7_mfd_context->jpeg_wa_surface_id); + assert(status == VA_STATUS_SUCCESS); + + obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id); + assert(obj_surface); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); + gen7_mfd_context->jpeg_wa_surface_object = obj_surface; + + if (!gen7_mfd_context->jpeg_wa_slice_data_bo) { + gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr, + "JPEG WA data", + 0x1000, + 0x1000); + dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo, + 0, + gen7_jpeg_wa_clip.data_size, + gen7_jpeg_wa_clip.data); + } +} + +static void +gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + + BEGIN_BCS_BATCH(batch, 5); + OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2)); + OUT_BCS_BATCH(batch, + (MFX_LONG_MODE << 17) | /* Currently only support long format */ + (MFD_MODE_VLD << 15) | /* VLD mode */ + (0 << 10) | /* disable Stream-Out */ + (0 << 9) | /* Post Deblocking Output */ + (1 << 8) | /* Pre Deblocking Output */ + (0 << 5) | /* not in stitch mode */ + (MFX_CODEC_DECODE << 4) | /* decoding mode */ + (MFX_FORMAT_AVC << 0)); + OUT_BCS_BATCH(batch, + (0 << 4) | /* terminate if AVC motion and POC table error occurs */ + (0 << 3) | /* terminate if AVC mbdata error occurs */ + (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */ + (0 << 1) | + (0 << 0)); + OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ + OUT_BCS_BATCH(batch, 0); /* reserved */ + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_jpeg_wa_surface_state(VADriverContextP ctx, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object; + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + + BEGIN_BCS_BATCH(batch, 6); + OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, + ((obj_surface->orig_width - 1) << 18) | + ((obj_surface->orig_height - 1) << 4)); + OUT_BCS_BATCH(batch, + (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */ + (1 << 27) | /* interleave chroma, set to 0 for JPEG */ + (0 << 22) | /* surface object control state, ignored */ + ((obj_surface->width - 1) << 3) | /* pitch */ + (0 << 2) | /* must be 0 */ + (1 << 1) | /* must be tiled */ + (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */ + OUT_BCS_BATCH(batch, + (0 << 16) | /* X offset for U(Cb), must be 0 */ + (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */ + OUT_BCS_BATCH(batch, + (0 << 16) | /* X offset for V(Cr), must be 0 */ + (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */ + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object; + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + dri_bo *intra_bo; + int i; + + intra_bo = dri_bo_alloc(i965->intel.bufmgr, + "intra row store", + 128 * 64, + 0x1000); + + BEGIN_BCS_BATCH(batch, 61); + OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2)); + OUT_BCS_RELOC(batch, + obj_surface->bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + + OUT_BCS_BATCH(batch, 0); /* post deblocking */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* uncompressed-video & stream out 7-12 */ + OUT_BCS_BATCH(batch, 0); /* ignore for decoding */ + OUT_BCS_BATCH(batch, 0); /* ignore for decoding */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* the DW 13-15 is for intra row store scratch */ + OUT_BCS_RELOC(batch, + intra_bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* the DW 16-18 is for deblocking filter */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* DW 19..50 */ + for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) { + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + } + OUT_BCS_BATCH(batch, 0); + + /* the DW52-54 is for mb status address */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + /* the DW56-60 is for ILDB & second ILDB address */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); + + dri_bo_unreference(intra_bo); +} + +static void +gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + dri_bo *bsd_mpc_bo, *mpr_bo; + + bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr, + "bsd mpc row store", + 11520, /* 1.5 * 120 * 64 */ + 0x1000); + + mpr_bo = dri_bo_alloc(i965->intel.bufmgr, + "mpr row store", + 7680, /* 1. 0 * 120 * 64 */ + 0x1000); + + BEGIN_BCS_BATCH(batch, 10); + OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2)); + + OUT_BCS_RELOC(batch, + bsd_mpc_bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + OUT_BCS_RELOC(batch, + mpr_bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); + + dri_bo_unreference(bsd_mpc_bo); + dri_bo_unreference(mpr_bo); +} + +static void +gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx, + struct gen7_mfd_context *gen7_mfd_context) +{ + +} + +static void +gen8_jpeg_wa_avc_img_state(VADriverContextP ctx, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + int img_struct = 0; + int mbaff_frame_flag = 0; + unsigned int width_in_mbs = 1, height_in_mbs = 1; + + BEGIN_BCS_BATCH(batch, 16); + OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2)); + OUT_BCS_BATCH(batch, + width_in_mbs * height_in_mbs); + OUT_BCS_BATCH(batch, + ((height_in_mbs - 1) << 16) | + ((width_in_mbs - 1) << 0)); + OUT_BCS_BATCH(batch, + (0 << 24) | + (0 << 16) | + (0 << 14) | + (0 << 13) | + (0 << 12) | /* differ from GEN6 */ + (0 << 10) | + (img_struct << 8)); + OUT_BCS_BATCH(batch, + (1 << 10) | /* 4:2:0 */ + (1 << 7) | /* CABAC */ + (0 << 6) | + (0 << 5) | + (0 << 4) | + (0 << 3) | + (1 << 2) | + (mbaff_frame_flag << 1) | + (0 << 0)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + int i; + + BEGIN_BCS_BATCH(batch, 71); + OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2)); + + /* reference surfaces 0..15 */ + for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) { + OUT_BCS_BATCH(batch, 0); /* top */ + OUT_BCS_BATCH(batch, 0); /* bottom */ + } + + OUT_BCS_BATCH(batch, 0); + + /* the current decoding frame/field */ + OUT_BCS_BATCH(batch, 0); /* top */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + /* POC List */ + for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) { + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + } + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + + BEGIN_BCS_BATCH(batch, 11); + OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2)); + OUT_BCS_RELOC(batch, + gen7_mfd_context->jpeg_wa_slice_data_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */ + OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */ + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + + /* the input bitsteam format on GEN7 differs from GEN6 */ + BEGIN_BCS_BATCH(batch, 6); + OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2)); + OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, + (0 << 31) | + (0 << 14) | + (0 << 12) | + (0 << 10) | + (0 << 8)); + OUT_BCS_BATCH(batch, + ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) | + (0 << 5) | + (0 << 4) | + (1 << 3) | /* LastSlice Flag */ + (gen7_jpeg_wa_clip.data_bit_offset & 0x7)); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1; + int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0; + int first_mb_in_slice = 0; + int slice_type = SLICE_TYPE_I; + + BEGIN_BCS_BATCH(batch, 11); + OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2)); + OUT_BCS_BATCH(batch, slice_type); + OUT_BCS_BATCH(batch, + (num_ref_idx_l1 << 24) | + (num_ref_idx_l0 << 16) | + (0 << 8) | + (0 << 0)); + OUT_BCS_BATCH(batch, + (0 << 29) | + (1 << 27) | /* disable Deblocking */ + (0 << 24) | + (gen7_jpeg_wa_clip.qp << 16) | + (0 << 8) | + (0 << 0)); + OUT_BCS_BATCH(batch, + (slice_ver_pos << 24) | + (slice_hor_pos << 16) | + (first_mb_in_slice << 0)); + OUT_BCS_BATCH(batch, + (next_slice_ver_pos << 16) | + (next_slice_hor_pos << 0)); + OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */ + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_jpeg_wa(VADriverContextP ctx, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + gen8_jpeg_wa_init(ctx, gen7_mfd_context); + intel_batchbuffer_emit_mi_flush(batch); + gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context); + gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context); + gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context); + gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context); + gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context); + gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context); + gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context); + + gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context); + gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context); + gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context); +} + +#endif + +void +gen8_mfd_jpeg_decode_picture(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + VAPictureParameterBufferJPEGBaseline *pic_param; + VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param; + dri_bo *slice_data_bo; + int i, j, max_selector = 0; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer; + + /* Currently only support Baseline DCT */ + gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context); + intel_batchbuffer_start_atomic_bcs(batch, 0x1000); +#ifdef JPEG_WA + gen8_mfd_jpeg_wa(ctx, gen7_mfd_context); +#endif + intel_batchbuffer_emit_mi_flush(batch); + gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context); + gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context); + gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context); + gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context); + gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context); + + for (j = 0; j < decode_state->num_slice_params; j++) { + assert(decode_state->slice_params && decode_state->slice_params[j]->buffer); + slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer; + slice_data_bo = decode_state->slice_datas[j]->bo; + gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context); + + if (j == decode_state->num_slice_params - 1) + next_slice_group_param = NULL; + else + next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer; + + for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) { + int component; + + assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL); + + if (i < decode_state->slice_params[j]->num_elements - 1) + next_slice_param = slice_param + 1; + else + next_slice_param = next_slice_group_param; + + for (component = 0; component < slice_param->num_components; component++) { + if (max_selector < slice_param->components[component].dc_table_selector) + max_selector = slice_param->components[component].dc_table_selector; + + if (max_selector < slice_param->components[component].ac_table_selector) + max_selector = slice_param->components[component].ac_table_selector; + } + + slice_param++; + } + } + + assert(max_selector < 2); + gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1); + + for (j = 0; j < decode_state->num_slice_params; j++) { + assert(decode_state->slice_params && decode_state->slice_params[j]->buffer); + slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer; + slice_data_bo = decode_state->slice_datas[j]->bo; + gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context); + + if (j == decode_state->num_slice_params - 1) + next_slice_group_param = NULL; + else + next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer; + + for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) { + assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL); + + if (i < decode_state->slice_params[j]->num_elements - 1) + next_slice_param = slice_param + 1; + else + next_slice_param = next_slice_group_param; + + gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context); + slice_param++; + } + } + + intel_batchbuffer_end_atomic(batch); + intel_batchbuffer_flush(batch); +} + +static const int vp8_dc_qlookup[128] = +{ + 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17, + 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28, + 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, + 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, + 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, + 75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118, + 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157, +}; + +static const int vp8_ac_qlookup[128] = +{ + 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, + 52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, + 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, + 110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152, + 155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209, + 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284, +}; + +static inline unsigned int vp8_clip_quantization_index(int index) +{ + if(index > 127) + return 127; + else if(index <0) + return 0; + + return index; +} + +static void +gen8_mfd_vp8_decode_init(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct object_surface *obj_surface; + struct i965_driver_data *i965 = i965_driver_data(ctx); + dri_bo *bo; + VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer; + int width_in_mbs = (pic_param->frame_width + 15) / 16; + int height_in_mbs = (pic_param->frame_height + 15) / 16; + + assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */ + assert(height_in_mbs > 0 && height_in_mbs <= 256); + + intel_update_vp8_frame_store_index(ctx, + decode_state, + pic_param, + gen7_mfd_context->reference_surface); + + /* Current decoded picture */ + obj_surface = decode_state->render_object; + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); + + dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo); + gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo; + dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo); + gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable; + + dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo); + gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo; + dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo); + gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable; + + intel_ensure_vp8_segmentation_buffer(ctx, + &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs); + + /* The same as AVC */ + dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "intra row store", + width_in_mbs * 64, + 0x1000); + assert(bo); + gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo; + gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1; + + dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "deblocking filter row store", + width_in_mbs * 64 * 4, + 0x1000); + assert(bo); + gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo; + gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1; + + dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "bsd mpc row store", + width_in_mbs * 64 * 2, + 0x1000); + assert(bo); + gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo; + gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1; + + dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "mpr row store", + width_in_mbs * 64 * 2, + 0x1000); + assert(bo); + gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo; + gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1; + + gen7_mfd_context->bitplane_read_buffer.valid = 0; +} + +static void +gen8_mfd_vp8_pic_state(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer; + VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer; + VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */ + dri_bo *probs_bo = decode_state->probability_data->bo; + int i, j,log2num; + unsigned int quantization_value[4][6]; + + /* There is no safe way to error out if the segmentation buffer + could not be allocated. So, instead of aborting, simply decode + something even if the result may look totally inacurate */ + const unsigned int enable_segmentation = + pic_param->pic_fields.bits.segmentation_enabled && + gen7_mfd_context->segmentation_buffer.valid; + + log2num = (int)log2(slice_param->num_of_partitions - 1); + + BEGIN_BCS_BATCH(batch, 38); + OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2)); + OUT_BCS_BATCH(batch, + (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 | + (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0); + OUT_BCS_BATCH(batch, + log2num << 24 | + pic_param->pic_fields.bits.sharpness_level << 16 | + pic_param->pic_fields.bits.sign_bias_alternate << 13 | + pic_param->pic_fields.bits.sign_bias_golden << 12 | + pic_param->pic_fields.bits.loop_filter_adj_enable << 11 | + pic_param->pic_fields.bits.mb_no_coeff_skip << 10 | + pic_param->pic_fields.bits.update_mb_segmentation_map << 9 | + pic_param->pic_fields.bits.segmentation_enabled << 8 | + (enable_segmentation && + !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 | + (enable_segmentation && + pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 | + (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/ + pic_param->pic_fields.bits.filter_type << 4 | + (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */ + !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */ + + OUT_BCS_BATCH(batch, + pic_param->loop_filter_level[3] << 24 | + pic_param->loop_filter_level[2] << 16 | + pic_param->loop_filter_level[1] << 8 | + pic_param->loop_filter_level[0] << 0); + + /* Quantizer Value for 4 segmetns, DW4-DW15 */ + for (i = 0; i < 4; i++) { + quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/ + quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/ + quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/ + /* 101581>>16 is equivalent to 155/100 */ + quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/ + quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/ + quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/ + + quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8); + quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132); + + OUT_BCS_BATCH(batch, + quantization_value[i][0] << 16 | /* Y1AC */ + quantization_value[i][1] << 0); /* Y1DC */ + OUT_BCS_BATCH(batch, + quantization_value[i][5] << 16 | /* UVAC */ + quantization_value[i][4] << 0); /* UVDC */ + OUT_BCS_BATCH(batch, + quantization_value[i][3] << 16 | /* Y2AC */ + quantization_value[i][2] << 0); /* Y2DC */ + } + + /* CoeffProbability table for non-key frame, DW16-DW18 */ + if (probs_bo) { + OUT_BCS_RELOC(batch, probs_bo, + 0, I915_GEM_DOMAIN_INSTRUCTION, + 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + } else { + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + } + + OUT_BCS_BATCH(batch, + pic_param->mb_segment_tree_probs[2] << 16 | + pic_param->mb_segment_tree_probs[1] << 8 | + pic_param->mb_segment_tree_probs[0] << 0); + + OUT_BCS_BATCH(batch, + pic_param->prob_skip_false << 24 | + pic_param->prob_intra << 16 | + pic_param->prob_last << 8 | + pic_param->prob_gf << 0); + + OUT_BCS_BATCH(batch, + pic_param->y_mode_probs[3] << 24 | + pic_param->y_mode_probs[2] << 16 | + pic_param->y_mode_probs[1] << 8 | + pic_param->y_mode_probs[0] << 0); + + OUT_BCS_BATCH(batch, + pic_param->uv_mode_probs[2] << 16 | + pic_param->uv_mode_probs[1] << 8 | + pic_param->uv_mode_probs[0] << 0); + + /* MV update value, DW23-DW32 */ + for (i = 0; i < 2; i++) { + for (j = 0; j < 20; j += 4) { + OUT_BCS_BATCH(batch, + (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 | + pic_param->mv_probs[i][j + 2] << 16 | + pic_param->mv_probs[i][j + 1] << 8 | + pic_param->mv_probs[i][j + 0] << 0); + } + } + + OUT_BCS_BATCH(batch, + (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 | + (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 | + (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) << 8 | + (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) << 0); + + OUT_BCS_BATCH(batch, + (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 | + (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 | + (pic_param->loop_filter_deltas_mode[1] & 0x7f) << 8 | + (pic_param->loop_filter_deltas_mode[0] & 0x7f) << 0); + + /* segmentation id stream base address, DW35-DW37 */ + if (enable_segmentation) { + OUT_BCS_RELOC(batch, gen7_mfd_context->segmentation_buffer.bo, + 0, I915_GEM_DOMAIN_INSTRUCTION, + 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + } + else { + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + } + ADVANCE_BCS_BATCH(batch); +} + +static void +gen8_mfd_vp8_bsd_object(VADriverContextP ctx, + VAPictureParameterBufferVP8 *pic_param, + VASliceParameterBufferVP8 *slice_param, + dri_bo *slice_data_bo, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + int i, log2num; + unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3); + unsigned int used_bits = 8-pic_param->bool_coder_ctx.count; + unsigned int partition_size_0 = slice_param->partition_size[0]; + + assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7); + if (used_bits == 8) { + used_bits = 0; + offset += 1; + partition_size_0 -= 1; + } + + assert(slice_param->num_of_partitions >= 2); + assert(slice_param->num_of_partitions <= 9); + + log2num = (int)log2(slice_param->num_of_partitions - 1); + + BEGIN_BCS_BATCH(batch, 22); + OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2)); + OUT_BCS_BATCH(batch, + used_bits << 16 | /* Partition 0 CPBAC Entropy Count */ + pic_param->bool_coder_ctx.range << 8 | /* Partition 0 Count Entropy Range */ + log2num << 4 | + (slice_param->macroblock_offset & 0x7)); + OUT_BCS_BATCH(batch, + pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */ + 0); + + OUT_BCS_BATCH(batch, partition_size_0); + OUT_BCS_BATCH(batch, offset); + //partion sizes in bytes are present after the above first partition when there are more than one token partition + offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2)); + for (i = 1; i < 9; i++) { + if (i < slice_param->num_of_partitions) { + OUT_BCS_BATCH(batch, slice_param->partition_size[i]); + OUT_BCS_BATCH(batch, offset); + } else { + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + } + + offset += slice_param->partition_size[i]; + } + + OUT_BCS_BATCH(batch, + 1 << 31 | /* concealment method */ + 0); + + ADVANCE_BCS_BATCH(batch); +} + +void +gen8_mfd_vp8_decode_picture(VADriverContextP ctx, + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) +{ + struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; + VAPictureParameterBufferVP8 *pic_param; + VASliceParameterBufferVP8 *slice_param; + dri_bo *slice_data_bo; + + assert(decode_state->pic_param && decode_state->pic_param->buffer); + pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer; + + /* one slice per frame */ + if (decode_state->num_slice_params != 1 || + (!decode_state->slice_params || + !decode_state->slice_params[0] || + (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) || + (!decode_state->slice_datas || + !decode_state->slice_datas[0] || + !decode_state->slice_datas[0]->bo) || + !decode_state->probability_data) { + WARN_ONCE("Wrong parameters for VP8 decoding\n"); + + return; + } + + slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; + slice_data_bo = decode_state->slice_datas[0]->bo; + + gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context); + intel_batchbuffer_start_atomic_bcs(batch, 0x1000); + intel_batchbuffer_emit_mi_flush(batch); + gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context); + gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context); + gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context); + gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context); + gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context); + gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context); + gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context); + intel_batchbuffer_end_atomic(batch); + intel_batchbuffer_flush(batch); +} + +static VAStatus +gen8_mfd_decode_picture(VADriverContextP ctx, + VAProfile profile, + union codec_state *codec_state, + struct hw_context *hw_context) + +{ + struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context; + struct decode_state *decode_state = &codec_state->decode; + VAStatus vaStatus; + + assert(gen7_mfd_context); + + vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state); + + if (vaStatus != VA_STATUS_SUCCESS) + goto out; + + gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1; + + switch (profile) { + case VAProfileMPEG2Simple: + case VAProfileMPEG2Main: + gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context); + break; + + case VAProfileH264ConstrainedBaseline: + case VAProfileH264Main: + case VAProfileH264High: + case VAProfileH264StereoHigh: + case VAProfileH264MultiviewHigh: + gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context); + break; + + case VAProfileVC1Simple: + case VAProfileVC1Main: + case VAProfileVC1Advanced: + gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context); + break; + + case VAProfileJPEGBaseline: + gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context); + break; + + case VAProfileVP8Version0_3: + gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context); + break; + + default: + assert(0); + break; + } + + vaStatus = VA_STATUS_SUCCESS; + +out: + return vaStatus; +} + +static void +gen8_mfd_context_destroy(void *hw_context) +{ + struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context; + + dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo); + gen7_mfd_context->post_deblocking_output.bo = NULL; + + dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo); + gen7_mfd_context->pre_deblocking_output.bo = NULL; + + dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo); + gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL; + + dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo); + gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL; + + dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo); + gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL; + + dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo); + gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL; + + dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo); + gen7_mfd_context->bitplane_read_buffer.bo = NULL; + + dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo); + gen7_mfd_context->segmentation_buffer.bo = NULL; + + dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo); + + intel_batchbuffer_free(gen7_mfd_context->base.batch); + free(gen7_mfd_context); +} + +static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx, + struct gen7_mfd_context *gen7_mfd_context) +{ + gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1; + gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1; + gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1; + gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1; +} + +struct hw_context * +gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config) +{ + struct intel_driver_data *intel = intel_driver_data(ctx); + struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context)); + int i; + + gen7_mfd_context->base.destroy = gen8_mfd_context_destroy; + gen7_mfd_context->base.run = gen8_mfd_decode_picture; + gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0); + + for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) { + gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID; + gen7_mfd_context->reference_surface[i].frame_store_id = -1; + } + + gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE; + gen7_mfd_context->segmentation_buffer.valid = 0; + + switch (obj_config->profile) { + case VAProfileMPEG2Simple: + case VAProfileMPEG2Main: + gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context); + break; + + case VAProfileH264ConstrainedBaseline: + case VAProfileH264Main: + case VAProfileH264High: + case VAProfileH264StereoHigh: + case VAProfileH264MultiviewHigh: + gen8_mfd_avc_context_init(ctx, gen7_mfd_context); + break; + default: + break; + } + return (struct hw_context *)gen7_mfd_context; +} diff --git a/src/gen8_post_processing.c b/src/gen8_post_processing.c new file mode 100644 index 0000000..1e6068d --- /dev/null +++ b/src/gen8_post_processing.c @@ -0,0 +1,1466 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Xiang Haihao <haihao.xiang@intel.com> + * Zhao Yakui <yakui.zhao@intel.com> + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +#include "intel_batchbuffer.h" +#include "intel_driver.h" +#include "i965_defines.h" +#include "i965_structs.h" +#include "i965_drv_video.h" +#include "i965_post_processing.h" +#include "i965_render.h" +#include "intel_media.h" + +#define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8 + +#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) +#define BINDING_TABLE_OFFSET SURFACE_STATE_OFFSET(MAX_PP_SURFACES) + +#define GPU_ASM_BLOCK_WIDTH 16 +#define GPU_ASM_BLOCK_HEIGHT 8 +#define GPU_ASM_X_OFFSET_ALIGNMENT 4 + +#define VA_STATUS_SUCCESS_1 0xFFFFFFFE + +static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context, + const struct i965_surface *src_surface, + const VARectangle *src_rect, + struct i965_surface *dst_surface, + const VARectangle *dst_rect, + void *filter_param); + +static VAStatus gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context, + const struct i965_surface *src_surface, + const VARectangle *src_rect, + struct i965_surface *dst_surface, + const VARectangle *dst_rect, + void *filter_param); + +/* TODO: Modify the shader and then compile it again. + * Currently it is derived from Haswell*/ +static const uint32_t pp_null_gen8[][4] = { +}; + +static const uint32_t pp_nv12_load_save_nv12_gen8[][4] = { +#include "shaders/post_processing/gen8/pl2_to_pl2.g8b" +}; + +static const uint32_t pp_nv12_load_save_pl3_gen8[][4] = { +#include "shaders/post_processing/gen8/pl2_to_pl3.g8b" +}; + +static const uint32_t pp_pl3_load_save_nv12_gen8[][4] = { +#include "shaders/post_processing/gen8/pl3_to_pl2.g8b" +}; + +static const uint32_t pp_pl3_load_save_pl3_gen8[][4] = { +#include "shaders/post_processing/gen8/pl3_to_pl3.g8b" +}; + +static const uint32_t pp_nv12_scaling_gen8[][4] = { +#include "shaders/post_processing/gen8/pl2_to_pl2.g8b" +}; + +static const uint32_t pp_nv12_avs_gen8[][4] = { +#include "shaders/post_processing/gen8/pl2_to_pl2.g8b" +}; + +static const uint32_t pp_nv12_dndi_gen8[][4] = { +// #include "shaders/post_processing/gen7/dndi.g75b" +}; + +static const uint32_t pp_nv12_dn_gen8[][4] = { +// #include "shaders/post_processing/gen7/nv12_dn_nv12.g75b" +}; +static const uint32_t pp_nv12_load_save_pa_gen8[][4] = { +#include "shaders/post_processing/gen8/pl2_to_pa.g8b" +}; +static const uint32_t pp_pl3_load_save_pa_gen8[][4] = { +#include "shaders/post_processing/gen8/pl3_to_pa.g8b" +}; +static const uint32_t pp_pa_load_save_nv12_gen8[][4] = { +#include "shaders/post_processing/gen8/pa_to_pl2.g8b" +}; +static const uint32_t pp_pa_load_save_pl3_gen8[][4] = { +#include "shaders/post_processing/gen8/pa_to_pl3.g8b" +}; +static const uint32_t pp_pa_load_save_pa_gen8[][4] = { +#include "shaders/post_processing/gen8/pa_to_pa.g8b" +}; +static const uint32_t pp_rgbx_load_save_nv12_gen8[][4] = { +#include "shaders/post_processing/gen8/rgbx_to_nv12.g8b" +}; +static const uint32_t pp_nv12_load_save_rgbx_gen8[][4] = { +#include "shaders/post_processing/gen8/pl2_to_rgbx.g8b" +}; + +static struct pp_module pp_modules_gen8[] = { + { + { + "NULL module (for testing)", + PP_NULL, + pp_null_gen8, + sizeof(pp_null_gen8), + NULL, + }, + + pp_null_initialize, + }, + + { + { + "NV12_NV12", + PP_NV12_LOAD_SAVE_N12, + pp_nv12_load_save_nv12_gen8, + sizeof(pp_nv12_load_save_nv12_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "NV12_PL3", + PP_NV12_LOAD_SAVE_PL3, + pp_nv12_load_save_pl3_gen8, + sizeof(pp_nv12_load_save_pl3_gen8), + NULL, + }, + gen8_pp_plx_avs_initialize, + }, + + { + { + "PL3_NV12", + PP_PL3_LOAD_SAVE_N12, + pp_pl3_load_save_nv12_gen8, + sizeof(pp_pl3_load_save_nv12_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "PL3_PL3", + PP_PL3_LOAD_SAVE_N12, + pp_pl3_load_save_pl3_gen8, + sizeof(pp_pl3_load_save_pl3_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "NV12 Scaling module", + PP_NV12_SCALING, + pp_nv12_scaling_gen8, + sizeof(pp_nv12_scaling_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "NV12 AVS module", + PP_NV12_AVS, + pp_nv12_avs_gen8, + sizeof(pp_nv12_avs_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "NV12 DNDI module", + PP_NV12_DNDI, + pp_nv12_dndi_gen8, + sizeof(pp_nv12_dndi_gen8), + NULL, + }, + + pp_null_initialize, + }, + + { + { + "NV12 DN module", + PP_NV12_DN, + pp_nv12_dn_gen8, + sizeof(pp_nv12_dn_gen8), + NULL, + }, + + pp_null_initialize, + }, + { + { + "NV12_PA module", + PP_NV12_LOAD_SAVE_PA, + pp_nv12_load_save_pa_gen8, + sizeof(pp_nv12_load_save_pa_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "PL3_PA module", + PP_PL3_LOAD_SAVE_PA, + pp_pl3_load_save_pa_gen8, + sizeof(pp_pl3_load_save_pa_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "PA_NV12 module", + PP_PA_LOAD_SAVE_NV12, + pp_pa_load_save_nv12_gen8, + sizeof(pp_pa_load_save_nv12_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "PA_PL3 module", + PP_PA_LOAD_SAVE_PL3, + pp_pa_load_save_pl3_gen8, + sizeof(pp_pa_load_save_pl3_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "PA_PA module", + PP_PA_LOAD_SAVE_PA, + pp_pa_load_save_pa_gen8, + sizeof(pp_pa_load_save_pa_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "RGBX_NV12 module", + PP_RGBX_LOAD_SAVE_NV12, + pp_rgbx_load_save_nv12_gen8, + sizeof(pp_rgbx_load_save_nv12_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, + + { + { + "NV12_RGBX module", + PP_NV12_LOAD_SAVE_RGBX, + pp_nv12_load_save_rgbx_gen8, + sizeof(pp_nv12_load_save_rgbx_gen8), + NULL, + }, + + gen8_pp_plx_avs_initialize, + }, +}; + +static int +pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface) +{ + int fourcc; + + if (surface->type == I965_SURFACE_TYPE_IMAGE) { + struct object_image *obj_image = (struct object_image *)surface->base; + fourcc = obj_image->image.format.fourcc; + } else { + struct object_surface *obj_surface = (struct object_surface *)surface->base; + fourcc = obj_surface->fourcc; + } + + return fourcc; +} + +static void +gen8_pp_set_surface_tiling(struct gen8_surface_state *ss, unsigned int tiling) +{ + switch (tiling) { + case I915_TILING_NONE: + ss->ss0.tiled_surface = 0; + ss->ss0.tile_walk = 0; + break; + case I915_TILING_X: + ss->ss0.tiled_surface = 1; + ss->ss0.tile_walk = I965_TILEWALK_XMAJOR; + break; + case I915_TILING_Y: + ss->ss0.tiled_surface = 1; + ss->ss0.tile_walk = I965_TILEWALK_YMAJOR; + break; + } +} + +static void +gen8_pp_set_surface2_tiling(struct gen8_surface_state2 *ss, unsigned int tiling) +{ + switch (tiling) { + case I915_TILING_NONE: + ss->ss2.tiled_surface = 0; + ss->ss2.tile_walk = 0; + break; + case I915_TILING_X: + ss->ss2.tiled_surface = 1; + ss->ss2.tile_walk = I965_TILEWALK_XMAJOR; + break; + case I915_TILING_Y: + ss->ss2.tiled_surface = 1; + ss->ss2.tile_walk = I965_TILEWALK_YMAJOR; + break; + } +} + + +static void +gen8_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context, + dri_bo *surf_bo, unsigned long surf_bo_offset, + int width, int height, int pitch, int format, + int index, int is_target) +{ + struct gen8_surface_state *ss; + dri_bo *ss_bo; + unsigned int tiling; + unsigned int swizzle; + + dri_bo_get_tiling(surf_bo, &tiling, &swizzle); + ss_bo = pp_context->surface_state_binding_table.bo; + assert(ss_bo); + + dri_bo_map(ss_bo, True); + assert(ss_bo->virtual); + ss = (struct gen8_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index)); + memset(ss, 0, sizeof(*ss)); + ss->ss0.surface_type = I965_SURFACE_2D; + ss->ss0.surface_format = format; + ss->ss8.base_addr = surf_bo->offset + surf_bo_offset; + ss->ss2.width = width - 1; + ss->ss2.height = height - 1; + ss->ss3.pitch = pitch - 1; + + /* Always set 1(align 4 mode) per B-spec */ + ss->ss0.vertical_alignment = 1; + ss->ss0.horizontal_alignment = 1; + + gen8_pp_set_surface_tiling(ss, tiling); + gen8_render_set_surface_scs(ss); + dri_bo_emit_reloc(ss_bo, + I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0, + surf_bo_offset, + SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8), + surf_bo); + ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index); + dri_bo_unmap(ss_bo); +} + + +static void +gen8_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context, + dri_bo *surf_bo, unsigned long surf_bo_offset, + int width, int height, int wpitch, + int xoffset, int yoffset, + int format, int interleave_chroma, + int index) +{ + struct gen8_surface_state2 *ss2; + dri_bo *ss2_bo; + unsigned int tiling; + unsigned int swizzle; + + dri_bo_get_tiling(surf_bo, &tiling, &swizzle); + ss2_bo = pp_context->surface_state_binding_table.bo; + assert(ss2_bo); + + dri_bo_map(ss2_bo, True); + assert(ss2_bo->virtual); + ss2 = (struct gen8_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index)); + memset(ss2, 0, sizeof(*ss2)); + ss2->ss6.base_addr = surf_bo->offset + surf_bo_offset; + ss2->ss1.cbcr_pixel_offset_v_direction = 0; + ss2->ss1.width = width - 1; + ss2->ss1.height = height - 1; + ss2->ss2.pitch = wpitch - 1; + ss2->ss2.interleave_chroma = interleave_chroma; + ss2->ss2.surface_format = format; + ss2->ss3.x_offset_for_cb = xoffset; + ss2->ss3.y_offset_for_cb = yoffset; + gen8_pp_set_surface2_tiling(ss2, tiling); + dri_bo_emit_reloc(ss2_bo, + I915_GEM_DOMAIN_RENDER, 0, + surf_bo_offset, + SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state2, ss6), + surf_bo); + ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index); + dri_bo_unmap(ss2_bo); +} + +static void +gen8_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context, + const struct i965_surface *surface, + int base_index, int is_target, + const VARectangle *rect, + int *width, int *height, int *pitch, int *offset) +{ + struct object_surface *obj_surface; + struct object_image *obj_image; + dri_bo *bo; + int fourcc = pp_get_surface_fourcc(ctx, surface); + const i965_fourcc_info *fourcc_info = get_fourcc_info(fourcc); + + if (fourcc_info == NULL) + return; + + if (surface->type == I965_SURFACE_TYPE_SURFACE) { + obj_surface = (struct object_surface *)surface->base; + bo = obj_surface->bo; + width[0] = MIN(rect->x + rect->width, obj_surface->orig_width); + height[0] = MIN(rect->y + rect->height, obj_surface->orig_height); + pitch[0] = obj_surface->width; + offset[0] = 0; + + if (fourcc_info->num_planes == 1 && is_target) + width[0] = width[0] * (fourcc_info->bpp[0] / 8); /* surface format is R8 */ + + width[1] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_surface->cb_cr_width); + height[1] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_surface->cb_cr_height); + pitch[1] = obj_surface->cb_cr_pitch; + offset[1] = obj_surface->y_cb_offset * obj_surface->width; + + width[2] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_surface->cb_cr_width); + height[2] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_surface->cb_cr_height); + pitch[2] = obj_surface->cb_cr_pitch; + offset[2] = obj_surface->y_cr_offset * obj_surface->width; + } else { + int U = 0, V = 0; + + /* FIXME: add support for ARGB/ABGR image */ + obj_image = (struct object_image *)surface->base; + bo = obj_image->bo; + width[0] = MIN(rect->x + rect->width, obj_image->image.width); + height[0] = MIN(rect->y + rect->height, obj_image->image.height); + pitch[0] = obj_image->image.pitches[0]; + offset[0] = obj_image->image.offsets[0]; + + if (fourcc_info->num_planes == 1) { + if (is_target) + width[0] = width[0] * (fourcc_info->bpp[0] / 8); /* surface format is R8 */ + } else if (fourcc_info->num_planes == 2) { + U = 1, V = 1; + } else { + assert(fourcc_info->num_components == 3); + + U = fourcc_info->components[1].plane; + V = fourcc_info->components[2].plane; + assert((U == 1 && V == 2) || + (U == 2 && V == 1)); + } + + /* Always set width/height although they aren't used for fourcc_info->num_planes == 1 */ + width[1] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_image->image.width / fourcc_info->hfactor); + height[1] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_image->image.height / fourcc_info->vfactor); + pitch[1] = obj_image->image.pitches[U]; + offset[1] = obj_image->image.offsets[U]; + + width[2] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_image->image.width / fourcc_info->hfactor); + height[2] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_image->image.height / fourcc_info->vfactor); + pitch[2] = obj_image->image.pitches[V]; + offset[2] = obj_image->image.offsets[V]; + } + + if (is_target) { + gen8_pp_set_surface_state(ctx, pp_context, + bo, 0, + width[0] / 4, height[0], pitch[0], + I965_SURFACEFORMAT_R8_UINT, + base_index, 1); + + if (fourcc_info->num_planes == 2) { + gen8_pp_set_surface_state(ctx, pp_context, + bo, offset[1], + width[1] / 2, height[1], pitch[1], + I965_SURFACEFORMAT_R8G8_SINT, + base_index + 1, 1); + } else if (fourcc_info->num_planes == 3) { + gen8_pp_set_surface_state(ctx, pp_context, + bo, offset[1], + width[1] / 4, height[1], pitch[1], + I965_SURFACEFORMAT_R8_SINT, + base_index + 1, 1); + gen8_pp_set_surface_state(ctx, pp_context, + bo, offset[2], + width[2] / 4, height[2], pitch[2], + I965_SURFACEFORMAT_R8_SINT, + base_index + 2, 1); + } + + if (fourcc_info->format == I965_COLOR_RGB) { + struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; + /* the format is MSB: X-B-G-R */ + pp_static_parameter->grf2.save_avs_rgb_swap = 0; + if ((fourcc == VA_FOURCC_BGRA) || + (fourcc == VA_FOURCC_BGRX)) { + /* It is stored as MSB: X-R-G-B */ + pp_static_parameter->grf2.save_avs_rgb_swap = 1; + } + } + } else { + int format0 = SURFACE_FORMAT_Y8_UNORM; + + switch (fourcc) { + case VA_FOURCC_YUY2: + format0 = SURFACE_FORMAT_YCRCB_NORMAL; + break; + + case VA_FOURCC_UYVY: + format0 = SURFACE_FORMAT_YCRCB_SWAPY; + break; + + default: + break; + } + + if (fourcc_info->format == I965_COLOR_RGB) { + struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; + /* Only R8G8B8A8_UNORM is supported for BGRX or RGBX */ + format0 = SURFACE_FORMAT_R8G8B8A8_UNORM; + pp_static_parameter->grf2.src_avs_rgb_swap = 0; + if ((fourcc == VA_FOURCC_BGRA) || + (fourcc == VA_FOURCC_BGRX)) { + pp_static_parameter->grf2.src_avs_rgb_swap = 1; + } + } + + gen8_pp_set_surface2_state(ctx, pp_context, + bo, offset[0], + width[0], height[0], pitch[0], + 0, 0, + format0, 0, + base_index); + + if (fourcc_info->num_planes == 2) { + gen8_pp_set_surface2_state(ctx, pp_context, + bo, offset[1], + width[1], height[1], pitch[1], + 0, 0, + SURFACE_FORMAT_R8B8_UNORM, 0, + base_index + 1); + } else if (fourcc_info->num_planes == 3) { + gen8_pp_set_surface2_state(ctx, pp_context, + bo, offset[1], + width[1], height[1], pitch[1], + 0, 0, + SURFACE_FORMAT_R8_UNORM, 0, + base_index + 1); + gen8_pp_set_surface2_state(ctx, pp_context, + bo, offset[2], + width[2], height[2], pitch[2], + 0, 0, + SURFACE_FORMAT_R8_UNORM, 0, + base_index + 2); + } + } +} + +static int +pp_null_x_steps(void *private_context) +{ + return 1; +} + +static int +pp_null_y_steps(void *private_context) +{ + return 1; +} + +static int +pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y) +{ + return 0; +} + +static VAStatus +pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context, + const struct i965_surface *src_surface, + const VARectangle *src_rect, + struct i965_surface *dst_surface, + const VARectangle *dst_rect, + void *filter_param) +{ + /* private function & data */ + pp_context->pp_x_steps = pp_null_x_steps; + pp_context->pp_y_steps = pp_null_y_steps; + pp_context->private_context = NULL; + pp_context->pp_set_block_parameter = pp_null_set_block_parameter; + + dst_surface->flags = src_surface->flags; + + return VA_STATUS_SUCCESS; +} + +static void calculate_boundary_block_mask(struct i965_post_processing_context *pp_context, const VARectangle *dst_rect) +{ + int i, dst_width_adjust; + /* x offset of dest surface must be dword aligned. + * so we have to extend dst surface on left edge, and mask out pixels not interested + */ + if (dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT) { + pp_context->block_horizontal_mask_left = 0; + for (i=dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; i<GPU_ASM_BLOCK_WIDTH; i++) + { + pp_context->block_horizontal_mask_left |= 1<<i; + } + } + else { + pp_context->block_horizontal_mask_left = 0xffff; + } + + dst_width_adjust = dst_rect->width + dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; + if (dst_width_adjust%GPU_ASM_BLOCK_WIDTH){ + pp_context->block_horizontal_mask_right = (1 << (dst_width_adjust%GPU_ASM_BLOCK_WIDTH)) - 1; + } + else { + pp_context->block_horizontal_mask_right = 0xffff; + } + + if (dst_rect->height%GPU_ASM_BLOCK_HEIGHT){ + pp_context->block_vertical_mask_bottom = (1 << (dst_rect->height%GPU_ASM_BLOCK_HEIGHT)) - 1; + } + else { + pp_context->block_vertical_mask_bottom = 0xff; + } + +} + +static int +gen7_pp_avs_x_steps(void *private_context) +{ + struct pp_avs_context *pp_avs_context = private_context; + + return pp_avs_context->dest_w / 16; +} + +static int +gen7_pp_avs_y_steps(void *private_context) +{ + struct pp_avs_context *pp_avs_context = private_context; + + return pp_avs_context->dest_h / 16; +} + +static int +gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y) +{ + struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context; + struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter; + + pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x; + pp_inline_parameter->grf7.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y; + pp_inline_parameter->grf7.constant_0 = 0xffffffff; + pp_inline_parameter->grf7.sampler_load_main_video_x_scaling_step = pp_avs_context->horiz_range / pp_avs_context->src_w; + + return 0; +} + +static void gen7_update_src_surface_uv_offset(VADriverContextP ctx, + struct i965_post_processing_context *pp_context, + const struct i965_surface *surface) +{ + struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; + int fourcc = pp_get_surface_fourcc(ctx, surface); + + if (fourcc == VA_FOURCC_YUY2) { + pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0; + pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1; + pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3; + } else if (fourcc == VA_FOURCC_UYVY) { + pp_static_parameter->grf2.di_destination_packed_y_component_offset = 1; + pp_static_parameter->grf2.di_destination_packed_u_component_offset = 0; + pp_static_parameter->grf2.di_destination_packed_v_component_offset = 2; + } +} + +static VAStatus +gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context, + const struct i965_surface *src_surface, + const VARectangle *src_rect, + struct i965_surface *dst_surface, + const VARectangle *dst_rect, + void *filter_param) +{ +/* TODO: Add the sampler_8x8 state */ + struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context; + struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; + struct gen8_sampler_8x8_avs *sampler_8x8; + struct i965_sampler_8x8_coefficient *sampler_8x8_state; + int i; + int width[3], height[3], pitch[3], offset[3]; + int src_width, src_height; + unsigned char *cc_ptr; + + memset(pp_static_parameter, 0, sizeof(struct gen7_pp_static_parameter)); + + /* source surface */ + gen8_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0, + src_rect, + width, height, pitch, offset); + src_height = height[0]; + src_width = width[0]; + + /* destination surface */ + gen8_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1, + dst_rect, + width, height, pitch, offset); + + /* sampler 8x8 state */ + dri_bo_map(pp_context->dynamic_state.bo, True); + assert(pp_context->dynamic_state.bo->virtual); + + cc_ptr = (unsigned char *) pp_context->dynamic_state.bo->virtual + + pp_context->sampler_offset; + /* Currently only one gen8 sampler_8x8 is initialized */ + sampler_8x8 = (struct gen8_sampler_8x8_avs *) cc_ptr; + memset(sampler_8x8, 0, sizeof(*sampler_8x8)); + + sampler_8x8->dw0.gain_factor = 44; + sampler_8x8->dw0.weak_edge_threshold = 1; + sampler_8x8->dw0.strong_edge_threshold = 8; + /* Use the value like that on Ivy instead of default + * sampler_8x8->dw0.r3x_coefficient = 5; + */ + sampler_8x8->dw0.r3x_coefficient = 27; + sampler_8x8->dw0.r3c_coefficient = 5; + + sampler_8x8->dw2.global_noise_estimation = 255; + sampler_8x8->dw2.non_edge_weight = 1; + sampler_8x8->dw2.regular_weight = 2; + sampler_8x8->dw2.strong_edge_weight = 7; + /* Use the value like that on Ivy instead of default + * sampler_8x8->dw2.r5x_coefficient = 7; + * sampler_8x8->dw2.r5cx_coefficient = 7; + * sampler_8x8->dw2.r5c_coefficient = 7; + */ + sampler_8x8->dw2.r5x_coefficient = 9; + sampler_8x8->dw2.r5cx_coefficient = 8; + sampler_8x8->dw2.r5c_coefficient = 3; + + sampler_8x8->dw3.sin_alpha = 101; /* sin_alpha = 0 */ + sampler_8x8->dw3.cos_alpha = 79; /* cos_alpha = 0 */ + sampler_8x8->dw3.sat_max = 0x1f; + sampler_8x8->dw3.hue_max = 14; + /* The 8tap filter will determine whether the adaptive Filter is + * applied for all channels(dw153). + * If the 8tap filter is disabled, the adaptive filter should be disabled. + * Only when 8tap filter is enabled, it can be enabled or not. + */ + sampler_8x8->dw3.enable_8tap_filter = 3; + sampler_8x8->dw3.ief4_smooth_enable = 0; + + sampler_8x8->dw4.s3u = 0; + sampler_8x8->dw4.diamond_margin = 4; + sampler_8x8->dw4.vy_std_enable = 0; + sampler_8x8->dw4.umid = 110; + sampler_8x8->dw4.vmid = 154; + + sampler_8x8->dw5.diamond_dv = 0; + sampler_8x8->dw5.diamond_th = 35; + sampler_8x8->dw5.diamond_alpha = 100; /* diamond_alpha = 0 */ + sampler_8x8->dw5.hs_margin = 3; + sampler_8x8->dw5.diamond_du = 2; + + sampler_8x8->dw6.y_point1 = 46; + sampler_8x8->dw6.y_point2 = 47; + sampler_8x8->dw6.y_point3 = 254; + sampler_8x8->dw6.y_point4 = 255; + + sampler_8x8->dw7.inv_margin_vyl = 3300; /* inv_margin_vyl = 0 */ + + sampler_8x8->dw8.inv_margin_vyu = 1600; /* inv_margin_vyu = 0 */ + sampler_8x8->dw8.p0l = 46; + sampler_8x8->dw8.p1l = 216; + + sampler_8x8->dw9.p2l = 236; + sampler_8x8->dw9.p3l = 236; + sampler_8x8->dw9.b0l = 133; + sampler_8x8->dw9.b1l = 130; + + sampler_8x8->dw10.b2l = 130; + sampler_8x8->dw10.b3l = 130; + /* s0l = -5 / 256. s2.8 */ + sampler_8x8->dw10.s0l = 1029; /* s0l = 0 */ + sampler_8x8->dw10.y_slope2 = 31; /* y_slop2 = 0 */ + + sampler_8x8->dw11.s1l = 0; + sampler_8x8->dw11.s2l = 0; + + sampler_8x8->dw12.s3l = 0; + sampler_8x8->dw12.p0u = 46; + sampler_8x8->dw12.p1u = 66; + sampler_8x8->dw12.y_slope1 = 31; /* y_slope1 = 0 */ + + sampler_8x8->dw13.p2u = 130; + sampler_8x8->dw13.p3u = 236; + sampler_8x8->dw13.b0u = 143; + sampler_8x8->dw13.b1u = 163; + + sampler_8x8->dw14.b2u = 200; + sampler_8x8->dw14.b3u = 140; + sampler_8x8->dw14.s0u = 256; /* s0u = 0 */ + + sampler_8x8->dw15.s1u = 113; /* s1u = 0 */ + sampler_8x8->dw15.s2u = 1203; /* s2u = 0 */ + + sampler_8x8_state = sampler_8x8->coefficients; + + for (i = 0; i < 17; i++) { + float coff; + coff = i; + coff = coff / 16; + + memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state)); + /* for Y channel, currently ignore */ + sampler_8x8_state->dw0.table_0x_filter_c0 = 0x0; + sampler_8x8_state->dw0.table_0x_filter_c1 = 0x0; + sampler_8x8_state->dw0.table_0x_filter_c2 = 0x0; + sampler_8x8_state->dw0.table_0x_filter_c3 = + intel_format_convert(1 - coff, 1, 6, 0); + sampler_8x8_state->dw1.table_0x_filter_c4 = + intel_format_convert(coff, 1, 6, 0); + sampler_8x8_state->dw1.table_0x_filter_c5 = 0x0; + sampler_8x8_state->dw1.table_0x_filter_c6 = 0x0; + sampler_8x8_state->dw1.table_0x_filter_c7 = 0x0; + sampler_8x8_state->dw2.table_0y_filter_c0 = 0x0; + sampler_8x8_state->dw2.table_0y_filter_c1 = 0x0; + sampler_8x8_state->dw2.table_0y_filter_c2 = 0x0; + sampler_8x8_state->dw2.table_0y_filter_c3 = + intel_format_convert(1 - coff, 1, 6, 0); + sampler_8x8_state->dw3.table_0y_filter_c4 = + intel_format_convert(coff, 1, 6, 0); + sampler_8x8_state->dw3.table_0y_filter_c5 = 0x0; + sampler_8x8_state->dw3.table_0y_filter_c6 = 0x0; + sampler_8x8_state->dw3.table_0y_filter_c7 = 0x0; + /* for U/V channel, 0.25 */ + sampler_8x8_state->dw4.table_1x_filter_c0 = 0x0; + sampler_8x8_state->dw4.table_1x_filter_c1 = 0x0; + sampler_8x8_state->dw4.table_1x_filter_c2 = 0x0; + sampler_8x8_state->dw4.table_1x_filter_c3 = + intel_format_convert(1 - coff, 1, 6, 0); + sampler_8x8_state->dw5.table_1x_filter_c4 = + intel_format_convert(coff, 1, 6, 0); + sampler_8x8_state->dw5.table_1x_filter_c5 = 0x00; + sampler_8x8_state->dw5.table_1x_filter_c6 = 0x0; + sampler_8x8_state->dw5.table_1x_filter_c7 = 0x0; + sampler_8x8_state->dw6.table_1y_filter_c0 = 0x0; + sampler_8x8_state->dw6.table_1y_filter_c1 = 0x0; + sampler_8x8_state->dw6.table_1y_filter_c2 = 0x0; + sampler_8x8_state->dw6.table_1y_filter_c3 = + intel_format_convert(1 - coff, 1, 6, 0); + sampler_8x8_state->dw7.table_1y_filter_c4 = + intel_format_convert(coff, 1, 6,0); + sampler_8x8_state->dw7.table_1y_filter_c5 = 0x0; + sampler_8x8_state->dw7.table_1y_filter_c6 = 0x0; + sampler_8x8_state->dw7.table_1y_filter_c7 = 0x0; + sampler_8x8_state++; + } + + sampler_8x8->dw152.default_sharpness_level = 0; + sampler_8x8->dw153.adaptive_filter_for_all_channel = 1; + sampler_8x8->dw153.bypass_y_adaptive_filtering = 1; + sampler_8x8->dw153.bypass_x_adaptive_filtering = 1; + + dri_bo_unmap(pp_context->dynamic_state.bo); + + + /* private function & data */ + pp_context->pp_x_steps = gen7_pp_avs_x_steps; + pp_context->pp_y_steps = gen7_pp_avs_y_steps; + pp_context->private_context = &pp_context->pp_avs_context; + pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter; + + pp_avs_context->dest_x = dst_rect->x; + pp_avs_context->dest_y = dst_rect->y; + pp_avs_context->dest_w = ALIGN(dst_rect->width, 16); + pp_avs_context->dest_h = ALIGN(dst_rect->height, 16); + pp_avs_context->src_w = src_rect->width; + pp_avs_context->src_h = src_rect->height; + pp_avs_context->horiz_range = (float)src_rect->width / src_width; + + int dw = (pp_avs_context->src_w - 1) / 16 + 1; + dw = MAX(dw, dst_rect->width); + + pp_static_parameter->grf1.pointer_to_inline_parameter = 7; + pp_static_parameter->grf2.avs_wa_enable = 0; /* It is not required on GEN8+ */ + pp_static_parameter->grf2.alpha = 255; + + pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw; + pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / dst_rect->height; + pp_static_parameter->grf5.sampler_load_vertical_frame_origin = (float) src_rect->y / src_height - + (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step; + pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = (float) src_rect->x / src_width - + (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw; + + gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface); + + dst_surface->flags = src_surface->flags; + + return VA_STATUS_SUCCESS; +} + +static VAStatus +gen8_pp_initialize( + VADriverContextP ctx, + struct i965_post_processing_context *pp_context, + const struct i965_surface *src_surface, + const VARectangle *src_rect, + struct i965_surface *dst_surface, + const VARectangle *dst_rect, + int pp_index, + void * filter_param +) +{ + VAStatus va_status; + struct i965_driver_data *i965 = i965_driver_data(ctx); + dri_bo *bo; + int bo_size; + unsigned int end_offset; + struct pp_module *pp_module; + int static_param_size, inline_param_size; + + dri_bo_unreference(pp_context->surface_state_binding_table.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "surface state & binding table", + (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES, + 4096); + assert(bo); + pp_context->surface_state_binding_table.bo = bo; + + pp_context->idrt.num_interface_descriptors = 0; + + pp_context->sampler_size = 2 * 4096; + + bo_size = 4096 + pp_context->curbe_size + pp_context->sampler_size + + pp_context->idrt_size; + + dri_bo_unreference(pp_context->dynamic_state.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "dynamic_state", + bo_size, + 4096); + + assert(bo); + pp_context->dynamic_state.bo = bo; + pp_context->dynamic_state.bo_size = bo_size; + + end_offset = 0; + pp_context->dynamic_state.end_offset = 0; + + /* Constant buffer offset */ + pp_context->curbe_offset = ALIGN(end_offset, 64); + end_offset = pp_context->curbe_offset + pp_context->curbe_size; + + /* Interface descriptor offset */ + pp_context->idrt_offset = ALIGN(end_offset, 64); + end_offset = pp_context->idrt_offset + pp_context->idrt_size; + + /* Sampler state offset */ + pp_context->sampler_offset = ALIGN(end_offset, 64); + end_offset = pp_context->sampler_offset + pp_context->sampler_size; + + /* update the end offset of dynamic_state */ + pp_context->dynamic_state.end_offset = ALIGN(end_offset, 64); + + static_param_size = sizeof(struct gen7_pp_static_parameter); + inline_param_size = sizeof(struct gen7_pp_inline_parameter); + + memset(pp_context->pp_static_parameter, 0, static_param_size); + memset(pp_context->pp_inline_parameter, 0, inline_param_size); + + assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES); + pp_context->current_pp = pp_index; + pp_module = &pp_context->pp_modules[pp_index]; + + if (pp_module->initialize) + va_status = pp_module->initialize(ctx, pp_context, + src_surface, + src_rect, + dst_surface, + dst_rect, + filter_param); + else + va_status = VA_STATUS_ERROR_UNIMPLEMENTED; + + calculate_boundary_block_mask(pp_context, dst_rect); + + return va_status; +} + +static void +gen8_pp_interface_descriptor_table(VADriverContextP ctx, + struct i965_post_processing_context *pp_context) +{ + struct gen8_interface_descriptor_data *desc; + dri_bo *bo; + int pp_index = pp_context->current_pp; + unsigned char *cc_ptr; + + bo = pp_context->dynamic_state.bo; + + dri_bo_map(bo, 1); + assert(bo->virtual); + cc_ptr = (unsigned char *)bo->virtual + pp_context->idrt_offset; + + desc = (struct gen8_interface_descriptor_data *) cc_ptr + + pp_context->idrt.num_interface_descriptors; + + memset(desc, 0, sizeof(*desc)); + desc->desc0.kernel_start_pointer = + pp_context->pp_modules[pp_index].kernel.kernel_offset >> 6; /* reloc */ + desc->desc2.single_program_flow = 1; + desc->desc2.floating_point_mode = FLOATING_POINT_IEEE_754; + desc->desc3.sampler_count = 0; /* 1 - 4 samplers used */ + desc->desc3.sampler_state_pointer = pp_context->sampler_offset >> 5; + desc->desc4.binding_table_entry_count = 0; + desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5); + desc->desc5.constant_urb_entry_read_offset = 0; + + desc->desc5.constant_urb_entry_read_length = 6; /* grf 1-6 */ + + dri_bo_unmap(bo); + pp_context->idrt.num_interface_descriptors++; +} + + +static void +gen8_pp_upload_constants(VADriverContextP ctx, + struct i965_post_processing_context *pp_context) +{ + unsigned char *constant_buffer; + int param_size; + + assert(sizeof(struct gen7_pp_static_parameter) == 192); + + param_size = sizeof(struct gen7_pp_static_parameter); + + dri_bo_map(pp_context->dynamic_state.bo, 1); + assert(pp_context->dynamic_state.bo->virtual); + constant_buffer = (unsigned char *) pp_context->dynamic_state.bo->virtual + + pp_context->curbe_offset; + + memcpy(constant_buffer, pp_context->pp_static_parameter, param_size); + dri_bo_unmap(pp_context->dynamic_state.bo); + return; +} + +static void +gen8_pp_states_setup(VADriverContextP ctx, + struct i965_post_processing_context *pp_context) +{ + gen8_pp_interface_descriptor_table(ctx, pp_context); + gen8_pp_upload_constants(ctx, pp_context); +} + +static void +gen6_pp_pipeline_select(VADriverContextP ctx, + struct i965_post_processing_context *pp_context) +{ + struct intel_batchbuffer *batch = pp_context->batch; + + BEGIN_BATCH(batch, 1); + OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); + ADVANCE_BATCH(batch); +} + +static void +gen8_pp_state_base_address(VADriverContextP ctx, + struct i965_post_processing_context *pp_context) +{ + struct intel_batchbuffer *batch = pp_context->batch; + + BEGIN_BATCH(batch, 16); + OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (16 - 2)); + /* DW1 Generate state address */ + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* DW4. Surface state address */ + OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ + OUT_BATCH(batch, 0); + /* DW6. Dynamic state address */ + OUT_RELOC(batch, pp_context->dynamic_state.bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER, + 0, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0); + + /* DW8. Indirect object address */ + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0); + + /* DW10. Instruction base address */ + OUT_RELOC(batch, pp_context->instruction_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0); + + OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); + ADVANCE_BATCH(batch); +} + +static void +gen8_pp_vfe_state(VADriverContextP ctx, + struct i965_post_processing_context *pp_context) +{ + struct intel_batchbuffer *batch = pp_context->batch; + + BEGIN_BATCH(batch, 9); + OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (9 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, + (pp_context->vfe_gpu_state.max_num_threads - 1) << 16 | + pp_context->vfe_gpu_state.num_urb_entries << 8); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, + (pp_context->vfe_gpu_state.urb_entry_size) << 16 | + /* URB Entry Allocation Size, in 256 bits unit */ + (pp_context->vfe_gpu_state.curbe_allocation_size)); + /* CURBE Allocation Size, in 256 bits unit */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); +} + +static void +gen8_interface_descriptor_load(VADriverContextP ctx, + struct i965_post_processing_context *pp_context) +{ + struct intel_batchbuffer *batch = pp_context->batch; + + BEGIN_BATCH(batch, 6); + + OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH); + OUT_BATCH(batch, 0); + + OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, + pp_context->idrt.num_interface_descriptors * sizeof(struct gen8_interface_descriptor_data)); + OUT_BATCH(batch, pp_context->idrt_offset); + ADVANCE_BATCH(batch); +} + +static void +gen8_pp_curbe_load(VADriverContextP ctx, + struct i965_post_processing_context *pp_context) +{ + struct intel_batchbuffer *batch = pp_context->batch; + struct i965_driver_data *i965 = i965_driver_data(ctx); + int param_size = 64; + + param_size = sizeof(struct gen7_pp_static_parameter); + + BEGIN_BATCH(batch, 4); + OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, + param_size); + OUT_BATCH(batch, pp_context->curbe_offset); + ADVANCE_BATCH(batch); +} + +static void +gen8_pp_object_walker(VADriverContextP ctx, + struct i965_post_processing_context *pp_context) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = pp_context->batch; + int x, x_steps, y, y_steps; + int param_size, command_length_in_dws, extra_cmd_in_dws; + dri_bo *command_buffer; + unsigned int *command_ptr; + + param_size = sizeof(struct gen7_pp_inline_parameter); + + x_steps = pp_context->pp_x_steps(pp_context->private_context); + y_steps = pp_context->pp_y_steps(pp_context->private_context); + command_length_in_dws = 6 + (param_size >> 2); + extra_cmd_in_dws = 2; + command_buffer = dri_bo_alloc(i965->intel.bufmgr, + "command objects buffer", + (command_length_in_dws + extra_cmd_in_dws) * 4 * x_steps * y_steps + 64, + 4096); + + dri_bo_map(command_buffer, 1); + command_ptr = command_buffer->virtual; + + for (y = 0; y < y_steps; y++) { + for (x = 0; x < x_steps; x++) { + if (!pp_context->pp_set_block_parameter(pp_context, x, y)) { + + *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2)); + *command_ptr++ = 0; + *command_ptr++ = 0; + *command_ptr++ = 0; + *command_ptr++ = 0; + *command_ptr++ = 0; + memcpy(command_ptr, pp_context->pp_inline_parameter, param_size); + command_ptr += (param_size >> 2); + + *command_ptr++ = CMD_MEDIA_STATE_FLUSH; + *command_ptr++ = 0; + } + } + } + + if ((command_length_in_dws + extra_cmd_in_dws) * x_steps * y_steps % 2 == 0) + *command_ptr++ = 0; + + *command_ptr++ = MI_BATCH_BUFFER_END; + *command_ptr++ = 0; + + dri_bo_unmap(command_buffer); + + BEGIN_BATCH(batch, 3); + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0)); + OUT_RELOC(batch, command_buffer, + I915_GEM_DOMAIN_COMMAND, 0, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + dri_bo_unreference(command_buffer); + + /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END + * will cause control to pass back to ring buffer + */ + intel_batchbuffer_end_atomic(batch); + intel_batchbuffer_flush(batch); + intel_batchbuffer_start_atomic(batch, 0x1000); +} + +static void +gen8_pp_pipeline_setup(VADriverContextP ctx, + struct i965_post_processing_context *pp_context) +{ + struct intel_batchbuffer *batch = pp_context->batch; + + intel_batchbuffer_start_atomic(batch, 0x1000); + intel_batchbuffer_emit_mi_flush(batch); + gen6_pp_pipeline_select(ctx, pp_context); + gen8_pp_state_base_address(ctx, pp_context); + gen8_pp_vfe_state(ctx, pp_context); + gen8_pp_curbe_load(ctx, pp_context); + gen8_interface_descriptor_load(ctx, pp_context); + gen8_pp_vfe_state(ctx, pp_context); + gen8_pp_object_walker(ctx, pp_context); + intel_batchbuffer_end_atomic(batch); +} + +static VAStatus +gen8_post_processing( + VADriverContextP ctx, + struct i965_post_processing_context *pp_context, + const struct i965_surface *src_surface, + const VARectangle *src_rect, + struct i965_surface *dst_surface, + const VARectangle *dst_rect, + int pp_index, + void * filter_param +) +{ + VAStatus va_status; + + va_status = gen8_pp_initialize(ctx, pp_context, + src_surface, + src_rect, + dst_surface, + dst_rect, + pp_index, + filter_param); + + if (va_status == VA_STATUS_SUCCESS) { + gen8_pp_states_setup(ctx, pp_context); + gen8_pp_pipeline_setup(ctx, pp_context); + } + + return va_status; +} + +static void +gen8_post_processing_context_finalize(struct i965_post_processing_context *pp_context) +{ + dri_bo_unreference(pp_context->surface_state_binding_table.bo); + pp_context->surface_state_binding_table.bo = NULL; + + dri_bo_unreference(pp_context->pp_dndi_context.stmm_bo); + pp_context->pp_dndi_context.stmm_bo = NULL; + + dri_bo_unreference(pp_context->pp_dn_context.stmm_bo); + pp_context->pp_dn_context.stmm_bo = NULL; + + if (pp_context->instruction_state.bo) { + dri_bo_unreference(pp_context->instruction_state.bo); + pp_context->instruction_state.bo = NULL; + } + + if (pp_context->indirect_state.bo) { + dri_bo_unreference(pp_context->indirect_state.bo); + pp_context->indirect_state.bo = NULL; + } + + if (pp_context->dynamic_state.bo) { + dri_bo_unreference(pp_context->dynamic_state.bo); + pp_context->dynamic_state.bo = NULL; + } + + free(pp_context->pp_static_parameter); + free(pp_context->pp_inline_parameter); + pp_context->pp_static_parameter = NULL; + pp_context->pp_inline_parameter = NULL; +} + +#define VPP_CURBE_ALLOCATION_SIZE 32 + +void +gen8_post_processing_context_init(VADriverContextP ctx, + void *data, + struct intel_batchbuffer *batch) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + int i, kernel_size; + unsigned int kernel_offset, end_offset; + unsigned char *kernel_ptr; + struct pp_module *pp_module; + struct i965_post_processing_context *pp_context = data; + + { + pp_context->vfe_gpu_state.max_num_threads = 60; + pp_context->vfe_gpu_state.num_urb_entries = 59; + pp_context->vfe_gpu_state.gpgpu_mode = 0; + pp_context->vfe_gpu_state.urb_entry_size = 16 - 1; + pp_context->vfe_gpu_state.curbe_allocation_size = VPP_CURBE_ALLOCATION_SIZE; + } + + pp_context->intel_post_processing = gen8_post_processing; + pp_context->finalize = gen8_post_processing_context_finalize; + + assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen8)); + + memcpy(pp_context->pp_modules, pp_modules_gen8, sizeof(pp_context->pp_modules)); + + kernel_size = 4096 ; + + for (i = 0; i < NUM_PP_MODULES; i++) { + pp_module = &pp_context->pp_modules[i]; + + if (pp_module->kernel.bin && pp_module->kernel.size) { + kernel_size += pp_module->kernel.size; + } + } + + pp_context->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr, + "kernel shader", + kernel_size, + 0x1000); + if (pp_context->instruction_state.bo == NULL) { + WARN_ONCE("failure to allocate the buffer space for kernel shader in VPP\n"); + return; + } + + assert(pp_context->instruction_state.bo); + + + pp_context->instruction_state.bo_size = kernel_size; + pp_context->instruction_state.end_offset = 0; + end_offset = 0; + + dri_bo_map(pp_context->instruction_state.bo, 1); + kernel_ptr = (unsigned char *)(pp_context->instruction_state.bo->virtual); + + for (i = 0; i < NUM_PP_MODULES; i++) { + pp_module = &pp_context->pp_modules[i]; + + kernel_offset = ALIGN(end_offset, 64); + pp_module->kernel.kernel_offset = kernel_offset; + + if (pp_module->kernel.bin && pp_module->kernel.size) { + + memcpy(kernel_ptr + kernel_offset, pp_module->kernel.bin, pp_module->kernel.size); + end_offset = kernel_offset + pp_module->kernel.size; + } + } + + pp_context->instruction_state.end_offset = ALIGN(end_offset, 64); + + dri_bo_unmap(pp_context->instruction_state.bo); + + /* static & inline parameters */ + pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1); + pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1); + + pp_context->pp_dndi_context.current_out_surface = VA_INVALID_SURFACE; + pp_context->pp_dndi_context.current_out_obj_surface = NULL; + pp_context->pp_dndi_context.frame_order = -1; + pp_context->batch = batch; + + pp_context->idrt_size = 5 * sizeof(struct gen8_interface_descriptor_data); + pp_context->curbe_size = 256; +} diff --git a/src/gen8_render.c b/src/gen8_render.c new file mode 100644 index 0000000..9c49cbc --- /dev/null +++ b/src/gen8_render.c @@ -0,0 +1,1824 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * Keith Packard <keithp@keithp.com> + * Xiang Haihao <haihao.xiang@intel.com> + * Zhao Yakui <yakui.zhao@intel.com> + * + */ + +/* + * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <math.h> + +#include <va/va_drmcommon.h> + +#include "intel_batchbuffer.h" +#include "intel_driver.h" +#include "i965_defines.h" +#include "i965_drv_video.h" +#include "i965_structs.h" + +#include "i965_render.h" + +#define SF_KERNEL_NUM_GRF 16 +#define SF_MAX_THREADS 1 + +#define PS_KERNEL_NUM_GRF 48 +#define PS_MAX_THREADS 32 + +/* Programs for Gen8 */ +static const uint32_t sf_kernel_static_gen8[][4] ={ + +}; +static const uint32_t ps_kernel_static_gen8[][4] = { +#include "shaders/render/exa_wm_src_affine.g8b" +#include "shaders/render/exa_wm_src_sample_planar.g8b" +#include "shaders/render/exa_wm_yuv_color_balance.g8b" +#include "shaders/render/exa_wm_yuv_rgb.g8b" +#include "shaders/render/exa_wm_write.g8b" +}; + +static const uint32_t ps_subpic_kernel_static_gen8[][4] = { +#include "shaders/render/exa_wm_src_affine.g8b" +#include "shaders/render/exa_wm_src_sample_argb.g8b" +#include "shaders/render/exa_wm_write.g8b" +}; + + +#define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8 + +#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) +#define BINDING_TABLE_OFFSET SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES) + +enum { + SF_KERNEL = 0, + PS_KERNEL, + PS_SUBPIC_KERNEL +}; + +static struct i965_kernel render_kernels_gen8[] = { + { + "SF", + SF_KERNEL, + sf_kernel_static_gen8, + sizeof(sf_kernel_static_gen8), + NULL + }, + { + "PS", + PS_KERNEL, + ps_kernel_static_gen8, + sizeof(ps_kernel_static_gen8), + NULL + }, + + { + "PS_SUBPIC", + PS_SUBPIC_KERNEL, + ps_subpic_kernel_static_gen8, + sizeof(ps_subpic_kernel_static_gen8), + NULL + } +}; + +#define URB_VS_ENTRIES 8 +#define URB_VS_ENTRY_SIZE 1 + +#define URB_GS_ENTRIES 0 +#define URB_GS_ENTRY_SIZE 0 + +#define URB_CLIP_ENTRIES 0 +#define URB_CLIP_ENTRY_SIZE 0 + +#define URB_SF_ENTRIES 1 +#define URB_SF_ENTRY_SIZE 2 + +#define URB_CS_ENTRIES 4 +#define URB_CS_ENTRY_SIZE 4 + +static float yuv_to_rgb_bt601[3][4] = { +{1.164, 0, 1.596, -0.06275,}, +{1.164, -0.392, -0.813, -0.50196,}, +{1.164, 2.017, 0, -0.50196,}, +}; + +static float yuv_to_rgb_bt709[3][4] = { +{1.164, 0, 1.793, -0.06275,}, +{1.164, -0.213, -0.533, -0.50196,}, +{1.164, 2.112, 0, -0.50196,}, +}; + +static float yuv_to_rgb_smpte_240[3][4] = { +{1.164, 0, 1.794, -0.06275,}, +{1.164, -0.258, -0.5425, -0.50196,}, +{1.164, 2.078, 0, -0.50196,}, +}; + + +static void +gen8_render_set_surface_tiling(struct gen8_surface_state *ss, uint32_t tiling) +{ + switch (tiling) { + case I915_TILING_NONE: + ss->ss0.tiled_surface = 0; + ss->ss0.tile_walk = 0; + break; + case I915_TILING_X: + ss->ss0.tiled_surface = 1; + ss->ss0.tile_walk = I965_TILEWALK_XMAJOR; + break; + case I915_TILING_Y: + ss->ss0.tiled_surface = 1; + ss->ss0.tile_walk = I965_TILEWALK_YMAJOR; + break; + } +} + +/* Set "Shader Channel Select" for GEN8+ */ +void +gen8_render_set_surface_scs(struct gen8_surface_state *ss) +{ + ss->ss7.shader_chanel_select_r = HSW_SCS_RED; + ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN; + ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE; + ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA; +} + +static void +gen8_render_set_surface_state( + struct gen8_surface_state *ss, + dri_bo *bo, + unsigned long offset, + int width, + int height, + int pitch, + int format, + unsigned int flags +) +{ + unsigned int tiling; + unsigned int swizzle; + + memset(ss, 0, sizeof(*ss)); + + switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) { + case I965_PP_FLAG_BOTTOM_FIELD: + ss->ss0.vert_line_stride_ofs = 1; + /* fall-through */ + case I965_PP_FLAG_TOP_FIELD: + ss->ss0.vert_line_stride = 1; + height /= 2; + break; + } + + ss->ss0.surface_type = I965_SURFACE_2D; + ss->ss0.surface_format = format; + + ss->ss8.base_addr = bo->offset + offset; + + ss->ss2.width = width - 1; + ss->ss2.height = height - 1; + + ss->ss3.pitch = pitch - 1; + + /* Always set 1(align 4 mode) per B-spec */ + ss->ss0.vertical_alignment = 1; + ss->ss0.horizontal_alignment = 1; + + dri_bo_get_tiling(bo, &tiling, &swizzle); + gen8_render_set_surface_tiling(ss, tiling); +} + +static void +gen8_render_src_surface_state( + VADriverContextP ctx, + int index, + dri_bo *region, + unsigned long offset, + int w, + int h, + int pitch, + int format, + unsigned int flags +) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + void *ss; + dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo; + + assert(index < MAX_RENDER_SURFACES); + + dri_bo_map(ss_bo, 1); + assert(ss_bo->virtual); + ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index); + + gen8_render_set_surface_state(ss, + region, offset, + w, h, + pitch, format, flags); + gen8_render_set_surface_scs(ss); + dri_bo_emit_reloc(ss_bo, + I915_GEM_DOMAIN_SAMPLER, 0, + offset, + SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8), + region); + + ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index); + dri_bo_unmap(ss_bo); + render_state->wm.sampler_count++; +} + +static void +gen8_render_src_surfaces_state( + VADriverContextP ctx, + struct object_surface *obj_surface, + unsigned int flags +) +{ + int region_pitch; + int rw, rh; + dri_bo *region; + + region_pitch = obj_surface->width; + rw = obj_surface->orig_width; + rh = obj_surface->orig_height; + region = obj_surface->bo; + + gen8_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags); /* Y */ + gen8_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags); + + if (obj_surface->fourcc == VA_FOURCC_Y800) /* single plane for grayscale */ + return; + + if (obj_surface->fourcc == VA_FOURCC_NV12) { + gen8_render_src_surface_state(ctx, 3, region, + region_pitch * obj_surface->y_cb_offset, + obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch, + I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */ + gen8_render_src_surface_state(ctx, 4, region, + region_pitch * obj_surface->y_cb_offset, + obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch, + I965_SURFACEFORMAT_R8G8_UNORM, flags); + } else { + gen8_render_src_surface_state(ctx, 3, region, + region_pitch * obj_surface->y_cb_offset, + obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch, + I965_SURFACEFORMAT_R8_UNORM, flags); /* U */ + gen8_render_src_surface_state(ctx, 4, region, + region_pitch * obj_surface->y_cb_offset, + obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch, + I965_SURFACEFORMAT_R8_UNORM, flags); + gen8_render_src_surface_state(ctx, 5, region, + region_pitch * obj_surface->y_cr_offset, + obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch, + I965_SURFACEFORMAT_R8_UNORM, flags); /* V */ + gen8_render_src_surface_state(ctx, 6, region, + region_pitch * obj_surface->y_cr_offset, + obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch, + I965_SURFACEFORMAT_R8_UNORM, flags); + } +} + +static void +gen8_subpic_render_src_surfaces_state(VADriverContextP ctx, + struct object_surface *obj_surface) +{ + dri_bo *subpic_region; + unsigned int index = obj_surface->subpic_render_idx; + struct object_subpic *obj_subpic = obj_surface->obj_subpic[index]; + struct object_image *obj_image = obj_subpic->obj_image; + + assert(obj_surface); + assert(obj_surface->bo); + subpic_region = obj_image->bo; + /*subpicture surface*/ + gen8_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0); + gen8_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0); +} + +static void +gen8_render_dest_surface_state(VADriverContextP ctx, int index) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + struct intel_region *dest_region = render_state->draw_region; + void *ss; + dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo; + int format; + assert(index < MAX_RENDER_SURFACES); + + if (dest_region->cpp == 2) { + format = I965_SURFACEFORMAT_B5G6R5_UNORM; + } else { + format = I965_SURFACEFORMAT_B8G8R8A8_UNORM; + } + + dri_bo_map(ss_bo, 1); + assert(ss_bo->virtual); + ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index); + + gen8_render_set_surface_state(ss, + dest_region->bo, 0, + dest_region->width, dest_region->height, + dest_region->pitch, format, 0); + gen8_render_set_surface_scs(ss); + dri_bo_emit_reloc(ss_bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0, + SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8), + dest_region->bo); + + ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index); + dri_bo_unmap(ss_bo); +} + +static void +i965_fill_vertex_buffer( + VADriverContextP ctx, + float tex_coords[4], /* [(u1,v1);(u2,v2)] */ + float vid_coords[4] /* [(x1,y1);(x2,y2)] */ +) +{ + struct i965_driver_data * const i965 = i965_driver_data(ctx); + float vb[12]; + + enum { X1, Y1, X2, Y2 }; + + static const unsigned int g_rotation_indices[][6] = { + [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 }, + [VA_ROTATION_90] = { X2, Y1, X2, Y2, X1, Y2 }, + [VA_ROTATION_180] = { X1, Y1, X2, Y1, X2, Y2 }, + [VA_ROTATION_270] = { X1, Y2, X1, Y1, X2, Y1 }, + }; + + const unsigned int * const rotation_indices = + g_rotation_indices[i965->rotation_attrib->value]; + + vb[0] = tex_coords[rotation_indices[0]]; /* bottom-right corner */ + vb[1] = tex_coords[rotation_indices[1]]; + vb[2] = vid_coords[X2]; + vb[3] = vid_coords[Y2]; + + vb[4] = tex_coords[rotation_indices[2]]; /* bottom-left corner */ + vb[5] = tex_coords[rotation_indices[3]]; + vb[6] = vid_coords[X1]; + vb[7] = vid_coords[Y2]; + + vb[8] = tex_coords[rotation_indices[4]]; /* top-left corner */ + vb[9] = tex_coords[rotation_indices[5]]; + vb[10] = vid_coords[X1]; + vb[11] = vid_coords[Y1]; + + dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb); +} + +static void +i965_subpic_render_upload_vertex(VADriverContextP ctx, + struct object_surface *obj_surface, + const VARectangle *output_rect) +{ + unsigned int index = obj_surface->subpic_render_idx; + struct object_subpic *obj_subpic = obj_surface->obj_subpic[index]; + float tex_coords[4], vid_coords[4]; + VARectangle dst_rect; + + if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD) + dst_rect = obj_subpic->dst_rect; + else { + const float sx = (float)output_rect->width / obj_surface->orig_width; + const float sy = (float)output_rect->height / obj_surface->orig_height; + dst_rect.x = output_rect->x + sx * obj_subpic->dst_rect.x; + dst_rect.y = output_rect->y + sy * obj_subpic->dst_rect.y; + dst_rect.width = sx * obj_subpic->dst_rect.width; + dst_rect.height = sy * obj_subpic->dst_rect.height; + } + + tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width; + tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height; + tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width; + tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height; + + vid_coords[0] = dst_rect.x; + vid_coords[1] = dst_rect.y; + vid_coords[2] = (float)(dst_rect.x + dst_rect.width); + vid_coords[3] = (float)(dst_rect.y + dst_rect.height); + + i965_fill_vertex_buffer(ctx, tex_coords, vid_coords); +} + +static void +i965_render_upload_vertex( + VADriverContextP ctx, + struct object_surface *obj_surface, + const VARectangle *src_rect, + const VARectangle *dst_rect +) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + struct intel_region *dest_region = render_state->draw_region; + float tex_coords[4], vid_coords[4]; + int width, height; + + width = obj_surface->orig_width; + height = obj_surface->orig_height; + + tex_coords[0] = (float)src_rect->x / width; + tex_coords[1] = (float)src_rect->y / height; + tex_coords[2] = (float)(src_rect->x + src_rect->width) / width; + tex_coords[3] = (float)(src_rect->y + src_rect->height) / height; + + vid_coords[0] = dest_region->x + dst_rect->x; + vid_coords[1] = dest_region->y + dst_rect->y; + vid_coords[2] = vid_coords[0] + dst_rect->width; + vid_coords[3] = vid_coords[1] + dst_rect->height; + + i965_fill_vertex_buffer(ctx, tex_coords, vid_coords); +} + +static void +i965_render_drawing_rectangle(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + struct i965_render_state *render_state = &i965->render_state; + struct intel_region *dest_region = render_state->draw_region; + + BEGIN_BATCH(batch, 4); + OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2); + OUT_BATCH(batch, 0x00000000); + OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16); + OUT_BATCH(batch, 0x00000000); + ADVANCE_BATCH(batch); +} + +static void +i965_render_upload_image_palette( + VADriverContextP ctx, + struct object_image *obj_image, + unsigned int alpha +) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + unsigned int i; + + assert(obj_image); + + if (!obj_image) + return; + + if (obj_image->image.num_palette_entries == 0) + return; + + BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries); + OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1)); + /*fill palette*/ + //int32_t out[16]; //0-23:color 23-31:alpha + for (i = 0; i < obj_image->image.num_palette_entries; i++) + OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]); + ADVANCE_BATCH(batch); +} + +static void +gen8_clear_dest_region(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + struct i965_render_state *render_state = &i965->render_state; + struct intel_region *dest_region = render_state->draw_region; + unsigned int blt_cmd, br13; + int pitch; + + blt_cmd = GEN8_XY_COLOR_BLT_CMD; + br13 = 0xf0 << 16; + pitch = dest_region->pitch; + + if (dest_region->cpp == 4) { + br13 |= BR13_8888; + blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA); + } else { + assert(dest_region->cpp == 2); + br13 |= BR13_565; + } + + if (dest_region->tiling != I915_TILING_NONE) { + blt_cmd |= XY_COLOR_BLT_DST_TILED; + pitch /= 4; + } + + br13 |= pitch; + + intel_batchbuffer_start_atomic_blt(batch, 24); + BEGIN_BLT_BATCH(batch, 7); + + OUT_BATCH(batch, blt_cmd); + OUT_BATCH(batch, br13); + OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x)); + OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) | + (dest_region->x + dest_region->width)); + OUT_RELOC(batch, dest_region->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0); + OUT_BATCH(batch, 0x0); + OUT_BATCH(batch, 0x0); + ADVANCE_BATCH(batch); + intel_batchbuffer_end_atomic(batch); +} + + +/* + * for GEN8 + */ +#define ALIGNMENT 64 + +static void +gen8_render_initialize(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + dri_bo *bo; + int size; + unsigned int end_offset; + + /* VERTEX BUFFER */ + dri_bo_unreference(render_state->vb.vertex_buffer); + bo = dri_bo_alloc(i965->intel.bufmgr, + "vertex buffer", + 4096, + 4096); + assert(bo); + render_state->vb.vertex_buffer = bo; + + /* WM */ + dri_bo_unreference(render_state->wm.surface_state_binding_table_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "surface state & binding table", + (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES, + 4096); + assert(bo); + render_state->wm.surface_state_binding_table_bo = bo; + + render_state->curbe_size = 256; + + render_state->wm.sampler_count = 0; + + render_state->sampler_size = MAX_SAMPLERS * sizeof(struct gen8_sampler_state); + + render_state->cc_state_size = sizeof(struct gen6_color_calc_state); + + render_state->cc_viewport_size = sizeof(struct i965_cc_viewport); + + render_state->blend_state_size = sizeof(struct gen8_global_blend_state) + + 16 * sizeof(struct gen8_blend_state_rt); + + render_state->sf_clip_size = 1024; + + render_state->scissor_size = 1024; + + size = ALIGN(render_state->curbe_size, ALIGNMENT) + + ALIGN(render_state->sampler_size, ALIGNMENT) + + ALIGN(render_state->cc_viewport_size, ALIGNMENT) + + ALIGN(render_state->cc_state_size, ALIGNMENT) + + ALIGN(render_state->blend_state_size, ALIGNMENT) + + ALIGN(render_state->sf_clip_size, ALIGNMENT) + + ALIGN(render_state->scissor_size, ALIGNMENT); + + dri_bo_unreference(render_state->dynamic_state.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "dynamic_state", + size, + 4096); + + render_state->dynamic_state.bo = bo; + + end_offset = 0; + render_state->dynamic_state.end_offset = 0; + + /* Constant buffer offset */ + render_state->curbe_offset = end_offset; + end_offset += ALIGN(render_state->curbe_size, ALIGNMENT); + + /* Sampler_state */ + render_state->sampler_offset = end_offset; + end_offset += ALIGN(render_state->sampler_size, ALIGNMENT); + + /* CC_VIEWPORT_state */ + render_state->cc_viewport_offset = end_offset; + end_offset += ALIGN(render_state->cc_viewport_size, ALIGNMENT); + + /* CC_STATE_state */ + render_state->cc_state_offset = end_offset; + end_offset += ALIGN(render_state->cc_state_size, ALIGNMENT); + + /* Blend_state */ + render_state->blend_state_offset = end_offset; + end_offset += ALIGN(render_state->blend_state_size, ALIGNMENT); + + /* SF_CLIP_state */ + render_state->sf_clip_offset = end_offset; + end_offset += ALIGN(render_state->sf_clip_size, ALIGNMENT); + + /* SCISSOR_state */ + render_state->scissor_offset = end_offset; + end_offset += ALIGN(render_state->scissor_size, ALIGNMENT); + + /* update the end offset of dynamic_state */ + render_state->dynamic_state.end_offset = end_offset; + +} + +static void +gen8_render_sampler(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + struct gen8_sampler_state *sampler_state; + int i; + unsigned char *cc_ptr; + + assert(render_state->wm.sampler_count > 0); + assert(render_state->wm.sampler_count <= MAX_SAMPLERS); + + dri_bo_map(render_state->dynamic_state.bo, 1); + assert(render_state->dynamic_state.bo->virtual); + + cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual + + render_state->sampler_offset; + + sampler_state = (struct gen8_sampler_state *) cc_ptr; + + for (i = 0; i < render_state->wm.sampler_count; i++) { + memset(sampler_state, 0, sizeof(*sampler_state)); + sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR; + sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR; + sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP; + sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP; + sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP; + sampler_state++; + } + + dri_bo_unmap(render_state->dynamic_state.bo); +} + +static void +gen8_render_blend_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + struct gen8_global_blend_state *global_blend_state; + struct gen8_blend_state_rt *blend_state; + unsigned char *cc_ptr; + + dri_bo_map(render_state->dynamic_state.bo, 1); + assert(render_state->dynamic_state.bo->virtual); + + cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual + + render_state->blend_state_offset; + + global_blend_state = (struct gen8_global_blend_state*) cc_ptr; + + memset(global_blend_state, 0, render_state->blend_state_size); + /* Global blend state + blend_state for Render Target */ + blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1); + blend_state->blend1.logic_op_enable = 1; + blend_state->blend1.logic_op_func = 0xc; + blend_state->blend1.pre_blend_clamp_enable = 1; + + dri_bo_unmap(render_state->dynamic_state.bo); +} + + +static void +gen8_render_cc_viewport(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + struct i965_cc_viewport *cc_viewport; + unsigned char *cc_ptr; + + dri_bo_map(render_state->dynamic_state.bo, 1); + assert(render_state->dynamic_state.bo->virtual); + + cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual + + render_state->cc_viewport_offset; + + cc_viewport = (struct i965_cc_viewport *) cc_ptr; + + memset(cc_viewport, 0, sizeof(*cc_viewport)); + + cc_viewport->min_depth = -1.e35; + cc_viewport->max_depth = 1.e35; + + dri_bo_unmap(render_state->dynamic_state.bo); +} + +static void +gen8_render_color_calc_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + struct gen6_color_calc_state *color_calc_state; + unsigned char *cc_ptr; + + dri_bo_map(render_state->dynamic_state.bo, 1); + assert(render_state->dynamic_state.bo->virtual); + + cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual + + render_state->cc_state_offset; + + color_calc_state = (struct gen6_color_calc_state *) cc_ptr; + + memset(color_calc_state, 0, sizeof(*color_calc_state)); + color_calc_state->constant_r = 1.0; + color_calc_state->constant_g = 0.0; + color_calc_state->constant_b = 1.0; + color_calc_state->constant_a = 1.0; + dri_bo_unmap(render_state->dynamic_state.bo); +} + +#define PI 3.1415926 + +static void +gen8_render_upload_constants(VADriverContextP ctx, + struct object_surface *obj_surface, + unsigned int flags) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + unsigned short *constant_buffer; + unsigned char *cc_ptr; + float *color_balance_base; + float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST; + float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */ + float hue = (float)i965->hue_attrib->value / 180 * PI; + float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION; + float *yuv_to_rgb; + unsigned int color_flag; + + dri_bo_map(render_state->dynamic_state.bo, 1); + assert(render_state->dynamic_state.bo->virtual); + + cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual + + render_state->curbe_offset; + + constant_buffer = (unsigned short *) cc_ptr; + + if (obj_surface->subsampling == SUBSAMPLE_YUV400) { + assert(obj_surface->fourcc == VA_FOURCC_Y800); + + *constant_buffer = 2; + } else { + if (obj_surface->fourcc == VA_FOURCC_NV12) + *constant_buffer = 1; + else + *constant_buffer = 0; + } + + if (i965->contrast_attrib->value == DEFAULT_CONTRAST && + i965->brightness_attrib->value == DEFAULT_BRIGHTNESS && + i965->hue_attrib->value == DEFAULT_HUE && + i965->saturation_attrib->value == DEFAULT_SATURATION) + constant_buffer[1] = 1; /* skip color balance transformation */ + else + constant_buffer[1] = 0; + + color_balance_base = (float *)constant_buffer + 4; + *color_balance_base++ = contrast; + *color_balance_base++ = brightness; + *color_balance_base++ = cos(hue) * contrast * saturation; + *color_balance_base++ = sin(hue) * contrast * saturation; + + color_flag = flags & VA_SRC_COLOR_MASK; + yuv_to_rgb = (float *)constant_buffer + 8; + if (color_flag == VA_SRC_BT709) + memcpy(yuv_to_rgb, yuv_to_rgb_bt709, sizeof(yuv_to_rgb_bt709)); + else if (color_flag == VA_SRC_SMPTE_240) + memcpy(yuv_to_rgb, yuv_to_rgb_smpte_240, sizeof(yuv_to_rgb_smpte_240)); + else + memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601)); + + dri_bo_unmap(render_state->dynamic_state.bo); +} + +static void +gen8_render_setup_states( + VADriverContextP ctx, + struct object_surface *obj_surface, + const VARectangle *src_rect, + const VARectangle *dst_rect, + unsigned int flags +) +{ + gen8_render_dest_surface_state(ctx, 0); + gen8_render_src_surfaces_state(ctx, obj_surface, flags); + gen8_render_sampler(ctx); + gen8_render_cc_viewport(ctx); + gen8_render_color_calc_state(ctx); + gen8_render_blend_state(ctx); + gen8_render_upload_constants(ctx, obj_surface, flags); + i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect); +} + +static void +gen8_emit_state_base_address(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + struct i965_render_state *render_state = &i965->render_state; + + BEGIN_BATCH(batch, 16); + OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (16 - 2)); + OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /*DW4 */ + OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ + OUT_BATCH(batch, 0); + + /*DW6*/ + /* Dynamic state base address */ + OUT_RELOC(batch, render_state->dynamic_state.bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER, + 0, BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0); + + /*DW8*/ + OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */ + OUT_BATCH(batch, 0); + + /*DW10 */ + /* Instruction base address */ + OUT_RELOC(batch, render_state->instruction_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); + OUT_BATCH(batch, 0); + + /*DW12 */ + OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* General state upper bound */ + OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */ + OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Indirect object upper bound */ + OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Instruction access upper bound */ + ADVANCE_BATCH(batch); +} + +static void +gen8_emit_cc_state_pointers(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + struct i965_render_state *render_state = &i965->render_state; + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2)); + OUT_BATCH(batch, (render_state->cc_state_offset + 1)); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2)); + OUT_BATCH(batch, (render_state->blend_state_offset + 1)); + ADVANCE_BATCH(batch); + +} + +static void +gen8_emit_vertices(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + struct i965_render_state *render_state = &i965->render_state; + + BEGIN_BATCH(batch, 5); + OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2)); + OUT_BATCH(batch, + (0 << GEN8_VB0_BUFFER_INDEX_SHIFT) | + (0 << GEN8_VB0_MOCS_SHIFT) | + GEN7_VB0_ADDRESS_MODIFYENABLE | + ((4 * 4) << VB0_BUFFER_PITCH_SHIFT)); + OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 12 * 4); + ADVANCE_BATCH(batch); + + /* Topology in 3D primitive is overrided by VF_TOPOLOGY command */ + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN8_3DSTATE_VF_TOPOLOGY | (2 - 2)); + OUT_BATCH(batch, + _3DPRIM_RECTLIST); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 7); + OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2)); + OUT_BATCH(batch, + GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL); + OUT_BATCH(batch, 3); /* vertex count per instance */ + OUT_BATCH(batch, 0); /* start vertex offset */ + OUT_BATCH(batch, 1); /* single instance */ + OUT_BATCH(batch, 0); /* start instance location */ + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); +} + +static void +gen8_emit_vertex_element_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + /* + * The VUE layout + * dword 0-3: pad (0, 0, 0. 0) + * dword 4-7: position (x, y, 1.0, 1.0), + * dword 8-11: texture coordinate 0 (u0, v0, 1.0, 1.0) + */ + + /* Set up our vertex elements, sourced from the single vertex buffer. */ + OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (7 - 2)); + + /* Element state 0. These are 4 dwords of 0 required for the VUE format. + * We don't really know or care what they do. + */ + + OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) | + GEN8_VE0_VALID | + (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (0 << VE0_OFFSET_SHIFT)); + OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) | + (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) | + (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) | + (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT)); + + /* offset 8: X, Y -> {x, y, 1.0, 1.0} */ + OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) | + GEN8_VE0_VALID | + (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (8 << VE0_OFFSET_SHIFT)); + OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | + (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); + + /* offset 0: u,v -> {U, V, 1.0, 1.0} */ + OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) | + GEN8_VE0_VALID | + (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (0 << VE0_OFFSET_SHIFT)); + OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | + (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); +} + +static void +gen8_emit_vs_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + /* disable VS constant buffer */ + BEGIN_BATCH(batch, 11); + OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (11 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* CS Buffer 0 */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* CS Buffer 1 */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* CS Buffer 2 */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* CS Buffer 3 */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 9); + OUT_BATCH(batch, GEN6_3DSTATE_VS | (9 - 2)); + OUT_BATCH(batch, 0); /* without VS kernel */ + OUT_BATCH(batch, 0); + /* VS shader dispatch flag */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* DW6. VS shader GRF and URB buffer definition */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /* pass-through */ + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + +} + +/* + * URB layout on GEN8 + * ---------------------------------------- + * | PS Push Constants (8KB) | VS entries | + * ---------------------------------------- + */ +static void +gen8_emit_urb(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + unsigned int num_urb_entries = 64; + + /* The minimum urb entries is 64 */ + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + /* Size is 8Kbs and base address is 0Kb */ + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2)); + /* Size is 8Kbs and base address is 0Kb */ + OUT_BATCH(batch, + (0 << GEN8_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT) | + (8 << GEN8_PUSH_CONSTANT_BUFFER_SIZE_SHIFT)); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2)); + OUT_BATCH(batch, + (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) | + (4 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT | + (4 << GEN7_URB_STARTING_ADDRESS_SHIFT)); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2)); + OUT_BATCH(batch, + (0 << GEN7_URB_ENTRY_SIZE_SHIFT) | + (5 << GEN7_URB_STARTING_ADDRESS_SHIFT)); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2)); + OUT_BATCH(batch, + (0 << GEN7_URB_ENTRY_SIZE_SHIFT) | + (6 << GEN7_URB_STARTING_ADDRESS_SHIFT)); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2)); + OUT_BATCH(batch, + (0 << GEN7_URB_ENTRY_SIZE_SHIFT) | + (7 << GEN7_URB_STARTING_ADDRESS_SHIFT)); + ADVANCE_BATCH(batch); +} + +static void +gen8_emit_bypass_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + /* bypass GS */ + BEGIN_BATCH(batch, 11); + OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (11 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 10); + OUT_BATCH(batch, GEN6_3DSTATE_GS | (10 - 2)); + /* GS shader address */ + OUT_BATCH(batch, 0); /* without GS kernel */ + OUT_BATCH(batch, 0); + /* DW3. GS shader dispatch flag */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* DW6. GS shader GRF and URB offset/length */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /* pass-through */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + /* disable HS */ + BEGIN_BATCH(batch, 11); + OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (11 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 9); + OUT_BATCH(batch, GEN7_3DSTATE_HS | (9 - 2)); + OUT_BATCH(batch, 0); + /*DW2. HS pass-through */ + OUT_BATCH(batch, 0); + /*DW3. HS shader address */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /*DW5. HS shader flag. URB offset/length and so on */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + /* Disable TE */ + BEGIN_BATCH(batch, 4); + OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + /* Disable DS */ + BEGIN_BATCH(batch, 11); + OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (11 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 9); + OUT_BATCH(batch, GEN7_3DSTATE_DS | (9 - 2)); + /* DW1. DS shader pointer */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* DW3-5. DS shader dispatch flag.*/ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* DW6-7. DS shader pass-through, GRF,URB offset/Length,Thread Number*/ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* DW8. DS shader output URB */ + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + /* Disable STREAMOUT */ + BEGIN_BATCH(batch, 5); + OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (5 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); +} + +static void +gen8_emit_invarient_states(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + BEGIN_BATCH(batch, 1); + OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN8_3DSTATE_MULTISAMPLE | (2 - 2)); + OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER | + GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ + ADVANCE_BATCH(batch); + + /* Update 3D Multisample pattern */ + BEGIN_BATCH(batch, 9); + OUT_BATCH(batch, GEN8_3DSTATE_SAMPLE_PATTERN | (9 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2)); + OUT_BATCH(batch, 1); + ADVANCE_BATCH(batch); + + /* Set system instruction pointer */ + BEGIN_BATCH(batch, 3); + OUT_BATCH(batch, CMD_STATE_SIP | 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); +} + +static void +gen8_emit_clip_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /* pass-through */ + OUT_BATCH(batch, 0); +} + +static void +gen8_emit_sf_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + BEGIN_BATCH(batch, 5); + OUT_BATCH(batch, GEN8_3DSTATE_RASTER | (5 - 2)); + OUT_BATCH(batch, GEN8_3DSTATE_RASTER_CULL_NONE); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + + BEGIN_BATCH(batch, 4); + OUT_BATCH(batch, GEN7_3DSTATE_SBE | (4 - 2)); + OUT_BATCH(batch, + (GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH) | + (GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET) | + (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) | + (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) | + (1 << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + /* SBE for backend setup */ + BEGIN_BATCH(batch, 11); + OUT_BATCH(batch, GEN8_3DSTATE_SBE_SWIZ | (11 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 4); + OUT_BATCH(batch, GEN6_3DSTATE_SF | (4 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); + ADVANCE_BATCH(batch); +} + +static void +gen8_emit_wm_state(VADriverContextP ctx, int kernel) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + struct i965_render_state *render_state = &i965->render_state; + unsigned int num_samples = 0; + unsigned int max_threads; + + max_threads = i965->intel.device_info->max_wm_threads - 2; + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN8_3DSTATE_PSEXTRA | (2 - 2)); + OUT_BATCH(batch, + (GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE)); + ADVANCE_BATCH(batch); + + if (kernel == PS_KERNEL) { + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2)); + OUT_BATCH(batch, + GEN8_PS_BLEND_HAS_WRITEABLE_RT); + ADVANCE_BATCH(batch); + } else if (kernel == PS_SUBPIC_KERNEL) { + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2)); + OUT_BATCH(batch, + (GEN8_PS_BLEND_HAS_WRITEABLE_RT | + GEN8_PS_BLEND_COLOR_BUFFER_BLEND_ENABLE | + (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT) | + (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT) | + (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_BLEND_FACTOR_SHIFT) | + (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_BLEND_FACTOR_SHIFT))); + ADVANCE_BATCH(batch); + } + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN6_3DSTATE_WM | (2 - 2)); + OUT_BATCH(batch, + GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 11); + OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (11 - 2)); + OUT_BATCH(batch, URB_CS_ENTRY_SIZE); + OUT_BATCH(batch, 0); + /*DW3-4. Constant buffer 0 */ + OUT_BATCH(batch, render_state->curbe_offset); + OUT_BATCH(batch, 0); + + /*DW5-10. Constant buffer 1-3 */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 12); + OUT_BATCH(batch, GEN7_3DSTATE_PS | (12 - 2)); + /* PS shader address */ + OUT_BATCH(batch, render_state->render_kernels[kernel].kernel_offset); + + OUT_BATCH(batch, 0); + /* DW3. PS shader flag .Binding table cnt/sample cnt */ + OUT_BATCH(batch, + (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) | + (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + /* DW4-5. Scatch space */ + OUT_BATCH(batch, 0); /* scratch space base offset */ + OUT_BATCH(batch, 0); + /* DW6. PS shader threads. */ + OUT_BATCH(batch, + ((max_threads - 1) << GEN8_PS_MAX_THREADS_SHIFT) | num_samples | + GEN7_PS_PUSH_CONSTANT_ENABLE | + GEN7_PS_16_DISPATCH_ENABLE); + /* DW7. PS shader GRF */ + OUT_BATCH(batch, + (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0)); + OUT_BATCH(batch, 0); /* kernel 1 pointer */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /* kernel 2 pointer */ + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2)); + OUT_BATCH(batch, BINDING_TABLE_OFFSET); + ADVANCE_BATCH(batch); +} + +static void +gen8_emit_depth_buffer_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + BEGIN_BATCH(batch, 8); + OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (8 - 2)); + OUT_BATCH(batch, + (I965_DEPTHFORMAT_D32_FLOAT << 18) | + (I965_SURFACE_NULL << 29)); + /* DW2-3. Depth Buffer Address */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + /* DW4-7. Surface structure */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + /* Update the Hier Depth buffer */ + BEGIN_BATCH(batch, 5); + OUT_BATCH(batch, GEN7_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + /* Update the stencil buffer */ + BEGIN_BATCH(batch, 5); + OUT_BATCH(batch, GEN7_3DSTATE_STENCIL_BUFFER | (5 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 3); + OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); +} + +static void +gen8_emit_depth_stencil_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + BEGIN_BATCH(batch, 3); + OUT_BATCH(batch, GEN8_3DSTATE_WM_DEPTH_STENCIL | (3 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); +} + +static void +gen8_emit_wm_hz_op(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + BEGIN_BATCH(batch, 5); + OUT_BATCH(batch, GEN8_3DSTATE_WM_HZ_OP | (5 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); +} + +static void +gen8_emit_viewport_state_pointers(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + struct i965_render_state *render_state = &i965->render_state; + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2)); + OUT_BATCH(batch, render_state->cc_viewport_offset); + ADVANCE_BATCH(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2)); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); +} + +static void +gen8_emit_sampler_state_pointers(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + struct i965_render_state *render_state = &i965->render_state; + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2)); + OUT_BATCH(batch, render_state->sampler_offset); + ADVANCE_BATCH(batch); +} + + +static void +gen7_emit_drawing_rectangle(VADriverContextP ctx) +{ + i965_render_drawing_rectangle(ctx); +} + +static void +gen8_render_emit_states(VADriverContextP ctx, int kernel) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + intel_batchbuffer_start_atomic(batch, 0x1000); + intel_batchbuffer_emit_mi_flush(batch); + gen8_emit_invarient_states(ctx); + gen8_emit_state_base_address(ctx); + gen8_emit_viewport_state_pointers(ctx); + gen8_emit_urb(ctx); + gen8_emit_cc_state_pointers(ctx); + gen8_emit_sampler_state_pointers(ctx); + gen8_emit_wm_hz_op(ctx); + gen8_emit_bypass_state(ctx); + gen8_emit_vs_state(ctx); + gen8_emit_clip_state(ctx); + gen8_emit_sf_state(ctx); + gen8_emit_depth_stencil_state(ctx); + gen8_emit_wm_state(ctx, kernel); + gen8_emit_depth_buffer_state(ctx); + gen7_emit_drawing_rectangle(ctx); + gen8_emit_vertex_element_state(ctx); + gen8_emit_vertices(ctx); + intel_batchbuffer_end_atomic(batch); +} + +static void +gen8_render_put_surface( + VADriverContextP ctx, + struct object_surface *obj_surface, + const VARectangle *src_rect, + const VARectangle *dst_rect, + unsigned int flags +) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + + gen8_render_initialize(ctx); + gen8_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags); + gen8_clear_dest_region(ctx); + gen8_render_emit_states(ctx, PS_KERNEL); + intel_batchbuffer_flush(batch); +} + +static void +gen8_subpicture_render_blend_state(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + struct gen8_global_blend_state *global_blend_state; + struct gen8_blend_state_rt *blend_state; + unsigned char *cc_ptr; + + dri_bo_map(render_state->dynamic_state.bo, 1); + assert(render_state->dynamic_state.bo->virtual); + + cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual + + render_state->blend_state_offset; + + global_blend_state = (struct gen8_global_blend_state*) cc_ptr; + + memset(global_blend_state, 0, render_state->blend_state_size); + /* Global blend state + blend_state for Render Target */ + blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1); + blend_state->blend0.color_blend_func = I965_BLENDFUNCTION_ADD; + blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA; + blend_state->blend0.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA; + blend_state->blend0.alpha_blend_func = I965_BLENDFUNCTION_ADD; + blend_state->blend0.ia_dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA; + blend_state->blend0.ia_src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA; + blend_state->blend0.colorbuf_blend = 1; + blend_state->blend1.post_blend_clamp_enable = 1; + blend_state->blend1.pre_blend_clamp_enable = 1; + blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */ + + dri_bo_unmap(render_state->dynamic_state.bo); +} + +static void +gen8_subpic_render_upload_constants(VADriverContextP ctx, + struct object_surface *obj_surface) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + float *constant_buffer; + float global_alpha = 1.0; + unsigned int index = obj_surface->subpic_render_idx; + struct object_subpic *obj_subpic = obj_surface->obj_subpic[index]; + unsigned char *cc_ptr; + + if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) { + global_alpha = obj_subpic->global_alpha; + } + + + dri_bo_map(render_state->dynamic_state.bo, 1); + assert(render_state->dynamic_state.bo->virtual); + + cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual + + render_state->curbe_offset; + + constant_buffer = (float *) cc_ptr; + *constant_buffer = global_alpha; + + dri_bo_unmap(render_state->dynamic_state.bo); +} + +static void +gen8_subpicture_render_setup_states( + VADriverContextP ctx, + struct object_surface *obj_surface, + const VARectangle *src_rect, + const VARectangle *dst_rect +) +{ + gen8_render_dest_surface_state(ctx, 0); + gen8_subpic_render_src_surfaces_state(ctx, obj_surface); + gen8_render_sampler(ctx); + gen8_render_cc_viewport(ctx); + gen8_render_color_calc_state(ctx); + gen8_subpicture_render_blend_state(ctx); + gen8_subpic_render_upload_constants(ctx, obj_surface); + i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect); +} + +static void +gen8_render_put_subpicture( + VADriverContextP ctx, + struct object_surface *obj_surface, + const VARectangle *src_rect, + const VARectangle *dst_rect +) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct intel_batchbuffer *batch = i965->batch; + unsigned int index = obj_surface->subpic_render_idx; + struct object_subpic *obj_subpic = obj_surface->obj_subpic[index]; + + assert(obj_subpic); + gen8_render_initialize(ctx); + gen8_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect); + gen8_render_emit_states(ctx, PS_SUBPIC_KERNEL); + i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff); + intel_batchbuffer_flush(batch); +} + +static void +gen8_render_terminate(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + + dri_bo_unreference(render_state->vb.vertex_buffer); + render_state->vb.vertex_buffer = NULL; + + dri_bo_unreference(render_state->wm.surface_state_binding_table_bo); + render_state->wm.surface_state_binding_table_bo = NULL; + + if (render_state->instruction_state.bo) { + dri_bo_unreference(render_state->instruction_state.bo); + render_state->instruction_state.bo = NULL; + } + + if (render_state->dynamic_state.bo) { + dri_bo_unreference(render_state->dynamic_state.bo); + render_state->dynamic_state.bo = NULL; + } + + if (render_state->indirect_state.bo) { + dri_bo_unreference(render_state->indirect_state.bo); + render_state->indirect_state.bo = NULL; + } + + if (render_state->draw_region) { + dri_bo_unreference(render_state->draw_region->bo); + free(render_state->draw_region); + render_state->draw_region = NULL; + } +} + +bool +gen8_render_init(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + int i, kernel_size; + unsigned int kernel_offset, end_offset; + unsigned char *kernel_ptr; + struct i965_kernel *kernel; + + render_state->render_put_surface = gen8_render_put_surface; + render_state->render_put_subpicture = gen8_render_put_subpicture; + render_state->render_terminate = gen8_render_terminate; + + memcpy(render_state->render_kernels, render_kernels_gen8, + sizeof(render_state->render_kernels)); + + kernel_size = 4096; + + for (i = 0; i < NUM_RENDER_KERNEL; i++) { + kernel = &render_state->render_kernels[i]; + + if (!kernel->size) + continue; + + kernel_size += kernel->size; + } + + render_state->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr, + "kernel shader", + kernel_size, + 0x1000); + if (render_state->instruction_state.bo == NULL) { + WARN_ONCE("failure to allocate the buffer space for kernel shader\n"); + return false; + } + + assert(render_state->instruction_state.bo); + + render_state->instruction_state.bo_size = kernel_size; + render_state->instruction_state.end_offset = 0; + end_offset = 0; + + dri_bo_map(render_state->instruction_state.bo, 1); + kernel_ptr = (unsigned char *)(render_state->instruction_state.bo->virtual); + for (i = 0; i < NUM_RENDER_KERNEL; i++) { + kernel = &render_state->render_kernels[i]; + kernel_offset = end_offset; + kernel->kernel_offset = kernel_offset; + + if (!kernel->size) + continue; + + memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size); + + end_offset += ALIGN(kernel->size, ALIGNMENT); + } + + render_state->instruction_state.end_offset = end_offset; + + dri_bo_unmap(render_state->instruction_state.bo); + + return true; +} diff --git a/src/gen8_vme.c b/src/gen8_vme.c new file mode 100644 index 0000000..8cae2a0 --- /dev/null +++ b/src/gen8_vme.c @@ -0,0 +1,1214 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Zhao Yakui <yakui.zhao@intel.com> + * Xiang Haihao <haihao.xiang@intel.com> + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <string.h> +#include <assert.h> + +#include "intel_batchbuffer.h" +#include "intel_driver.h" + +#include "i965_defines.h" +#include "i965_drv_video.h" +#include "i965_encoder.h" +#include "gen6_vme.h" +#include "gen6_mfc.h" + +#ifdef SURFACE_STATE_PADDED_SIZE +#undef SURFACE_STATE_PADDED_SIZE +#endif + +#define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8 +#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) +#define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index) + +#define VME_INTRA_SHADER 0 +#define VME_INTER_SHADER 1 +#define VME_BINTER_SHADER 2 + +#define CURBE_ALLOCATION_SIZE 37 /* in 256-bit */ +#define CURBE_TOTAL_DATA_LENGTH (4 * 32) /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */ +#define CURBE_URB_ENTRY_LENGTH 4 /* in 256-bit, it should be less than or equal to CURBE_TOTAL_DATA_LENGTH / 32 */ + +#define VME_MSG_LENGTH 32 + +static const uint32_t gen8_vme_intra_frame[][4] = { +#include "shaders/vme/intra_frame_gen8.g8b" +}; + +static const uint32_t gen8_vme_inter_frame[][4] = { +#include "shaders/vme/inter_frame_gen8.g8b" +}; + +static const uint32_t gen8_vme_inter_bframe[][4] = { +#include "shaders/vme/inter_bframe_gen8.g8b" +}; + +static struct i965_kernel gen8_vme_kernels[] = { + { + "VME Intra Frame", + VME_INTRA_SHADER, /*index*/ + gen8_vme_intra_frame, + sizeof(gen8_vme_intra_frame), + NULL + }, + { + "VME inter Frame", + VME_INTER_SHADER, + gen8_vme_inter_frame, + sizeof(gen8_vme_inter_frame), + NULL + }, + { + "VME inter BFrame", + VME_BINTER_SHADER, + gen8_vme_inter_bframe, + sizeof(gen8_vme_inter_bframe), + NULL + } +}; + +static const uint32_t gen8_vme_mpeg2_intra_frame[][4] = { +#include "shaders/vme/intra_frame_gen8.g8b" +}; + +static const uint32_t gen8_vme_mpeg2_inter_frame[][4] = { +#include "shaders/vme/mpeg2_inter_gen8.g8b" +}; + +static struct i965_kernel gen8_vme_mpeg2_kernels[] = { + { + "VME Intra Frame", + VME_INTRA_SHADER, /*index*/ + gen8_vme_mpeg2_intra_frame, + sizeof(gen8_vme_mpeg2_intra_frame), + NULL + }, + { + "VME inter Frame", + VME_INTER_SHADER, + gen8_vme_mpeg2_inter_frame, + sizeof(gen8_vme_mpeg2_inter_frame), + NULL + }, +}; + +/* only used for VME source surface state */ +static void +gen8_vme_source_surface_state(VADriverContextP ctx, + int index, + struct object_surface *obj_surface, + struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + + vme_context->vme_surface2_setup(ctx, + &vme_context->gpe_context, + obj_surface, + BINDING_TABLE_OFFSET(index), + SURFACE_STATE_OFFSET(index)); +} + +static void +gen8_vme_media_source_surface_state(VADriverContextP ctx, + int index, + struct object_surface *obj_surface, + struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + + vme_context->vme_media_rw_surface_setup(ctx, + &vme_context->gpe_context, + obj_surface, + BINDING_TABLE_OFFSET(index), + SURFACE_STATE_OFFSET(index)); +} + +static void +gen8_vme_media_chroma_source_surface_state(VADriverContextP ctx, + int index, + struct object_surface *obj_surface, + struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + + vme_context->vme_media_chroma_surface_setup(ctx, + &vme_context->gpe_context, + obj_surface, + BINDING_TABLE_OFFSET(index), + SURFACE_STATE_OFFSET(index)); +} + +static void +gen8_vme_output_buffer_setup(VADriverContextP ctx, + struct encode_state *encode_state, + int index, + struct intel_encoder_context *encoder_context) + +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct gen6_vme_context *vme_context = encoder_context->vme_context; + VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I; + int width_in_mbs = pSequenceParameter->picture_width_in_mbs; + int height_in_mbs = pSequenceParameter->picture_height_in_mbs; + + vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs; + vme_context->vme_output.pitch = 16; /* in bytes, always 16 */ + + if (is_intra) + vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2; + else + vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24; + /* + * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref + * + 16 FBR Info + 128 FBR MV + 32 FBR Ref. + * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24. + */ + + vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr, + "VME output buffer", + vme_context->vme_output.num_blocks * vme_context->vme_output.size_block, + 0x1000); + assert(vme_context->vme_output.bo); + vme_context->vme_buffer_suface_setup(ctx, + &vme_context->gpe_context, + &vme_context->vme_output, + BINDING_TABLE_OFFSET(index), + SURFACE_STATE_OFFSET(index)); +} + +static void +gen8_vme_output_vme_batchbuffer_setup(VADriverContextP ctx, + struct encode_state *encode_state, + int index, + struct intel_encoder_context *encoder_context) + +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct gen6_vme_context *vme_context = encoder_context->vme_context; + VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + int width_in_mbs = pSequenceParameter->picture_width_in_mbs; + int height_in_mbs = pSequenceParameter->picture_height_in_mbs; + + vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1; + vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */ + vme_context->vme_batchbuffer.pitch = 16; + vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr, + "VME batchbuffer", + vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block, + 0x1000); + /* + vme_context->vme_buffer_suface_setup(ctx, + &vme_context->gpe_context, + &vme_context->vme_batchbuffer, + BINDING_TABLE_OFFSET(index), + SURFACE_STATE_OFFSET(index)); + */ +} + +static VAStatus +gen8_vme_surface_setup(VADriverContextP ctx, + struct encode_state *encode_state, + int is_intra, + struct intel_encoder_context *encoder_context) +{ + struct object_surface *obj_surface; + + /*Setup surfaces state*/ + /* current picture for encoding */ + obj_surface = encode_state->input_yuv_object; + gen8_vme_source_surface_state(ctx, 0, obj_surface, encoder_context); + gen8_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context); + gen8_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context); + + if (!is_intra) { + VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + int slice_type; + + slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI); + + intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen8_vme_source_surface_state); + + if (slice_type == SLICE_TYPE_B) + intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen8_vme_source_surface_state); + } + + /* VME output */ + gen8_vme_output_buffer_setup(ctx, encode_state, 3, encoder_context); + gen8_vme_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context); + + return VA_STATUS_SUCCESS; +} + +static VAStatus gen8_vme_interface_setup(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + struct gen8_interface_descriptor_data *desc; + int i; + dri_bo *bo; + unsigned char *desc_ptr; + + bo = vme_context->gpe_context.dynamic_state.bo; + dri_bo_map(bo, 1); + assert(bo->virtual); + desc_ptr = (unsigned char *)bo->virtual + vme_context->gpe_context.idrt_offset; + + desc = (struct gen8_interface_descriptor_data *)desc_ptr; + + for (i = 0; i < vme_context->vme_kernel_sum; i++) { + struct i965_kernel *kernel; + kernel = &vme_context->gpe_context.kernels[i]; + assert(sizeof(*desc) == 32); + /*Setup the descritor table*/ + memset(desc, 0, sizeof(*desc)); + desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6; + desc->desc3.sampler_count = 0; /* FIXME: */ + desc->desc3.sampler_state_pointer = 0; + desc->desc4.binding_table_entry_count = 1; /* FIXME: */ + desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5); + desc->desc5.constant_urb_entry_read_offset = 0; + desc->desc5.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH; + + + desc++; + } + + dri_bo_unmap(bo); + + return VA_STATUS_SUCCESS; +} + +static VAStatus gen8_vme_constant_setup(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + unsigned char *constant_buffer; + unsigned int *vme_state_message; + int mv_num = 32; + + vme_state_message = (unsigned int *)vme_context->vme_state_message; + + if (encoder_context->codec == CODEC_H264 || + encoder_context->codec == CODEC_H264_MVC) { + if (vme_context->h264_level >= 30) { + mv_num = 16; + + if (vme_context->h264_level >= 31) + mv_num = 8; + } + } else if (encoder_context->codec == CODEC_MPEG2) { + mv_num = 2; + } + + vme_state_message[31] = mv_num; + + dri_bo_map(vme_context->gpe_context.dynamic_state.bo, 1); + assert(vme_context->gpe_context.dynamic_state.bo->virtual); + constant_buffer = (unsigned char *)vme_context->gpe_context.dynamic_state.bo->virtual + + vme_context->gpe_context.curbe_offset; + + /* VME MV/Mb cost table is passed by using const buffer */ + /* Now it uses the fixed search path. So it is constructed directly + * in the GPU shader. + */ + memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128); + + dri_bo_unmap(vme_context->gpe_context.dynamic_state.bo); + + return VA_STATUS_SUCCESS; +} + +#define MB_SCOREBOARD_A (1 << 0) +#define MB_SCOREBOARD_B (1 << 1) +#define MB_SCOREBOARD_C (1 << 2) + +/* check whether the mb of (x_index, y_index) is out of bound */ +static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height) +{ + int mb_index; + if (x_index < 0 || x_index >= mb_width) + return -1; + if (y_index < 0 || y_index >= mb_height) + return -1; + + mb_index = y_index * mb_width + x_index; + if (mb_index < first_mb || mb_index > (first_mb + num_mb)) + return -1; + return 0; +} + +static void +gen8wa_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, + struct encode_state *encode_state, + int mb_width, int mb_height, + int kernel, + int transform_8x8_mode_flag, + struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + int mb_row; + int s; + unsigned int *command_ptr; + +#define USE_SCOREBOARD (1 << 21) + + dri_bo_map(vme_context->vme_batchbuffer.bo, 1); + command_ptr = vme_context->vme_batchbuffer.bo->virtual; + + for (s = 0; s < encode_state->num_slice_params_ext; s++) { + VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; + int first_mb = pSliceParameter->macroblock_address; + int num_mb = pSliceParameter->num_macroblocks; + unsigned int mb_intra_ub, score_dep; + int x_outer, y_outer, x_inner, y_inner; + int xtemp_outer = 0; + + x_outer = first_mb % mb_width; + y_outer = first_mb / mb_width; + mb_row = y_outer; + + for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { + x_inner = x_outer; + y_inner = y_outer; + for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) { + mb_intra_ub = 0; + score_dep = 0; + if (x_inner != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE; + score_dep |= MB_SCOREBOARD_A; + } + if (y_inner != mb_row) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B; + score_dep |= MB_SCOREBOARD_B; + if (x_inner != 0) + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D; + if (x_inner != (mb_width -1)) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; + score_dep |= MB_SCOREBOARD_C; + } + } + + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = kernel; + *command_ptr++ = USE_SCOREBOARD; + /* Indirect data */ + *command_ptr++ = 0; + /* the (X, Y) term of scoreboard */ + *command_ptr++ = ((y_inner << 16) | x_inner); + *command_ptr++ = score_dep; + /*inline data */ + *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner); + *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); + *command_ptr++ = CMD_MEDIA_STATE_FLUSH; + *command_ptr++ = 0; + + x_inner -= 2; + y_inner += 1; + } + x_outer += 1; + } + + xtemp_outer = mb_width - 2; + if (xtemp_outer < 0) + xtemp_outer = 0; + x_outer = xtemp_outer; + y_outer = first_mb / mb_width; + for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { + y_inner = y_outer; + x_inner = x_outer; + for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) { + mb_intra_ub = 0; + score_dep = 0; + if (x_inner != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE; + score_dep |= MB_SCOREBOARD_A; + } + if (y_inner != mb_row) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B; + score_dep |= MB_SCOREBOARD_B; + if (x_inner != 0) + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D; + + if (x_inner != (mb_width -1)) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; + score_dep |= MB_SCOREBOARD_C; + } + } + + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = kernel; + *command_ptr++ = USE_SCOREBOARD; + /* Indirect data */ + *command_ptr++ = 0; + /* the (X, Y) term of scoreboard */ + *command_ptr++ = ((y_inner << 16) | x_inner); + *command_ptr++ = score_dep; + /*inline data */ + *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner); + *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); + + *command_ptr++ = CMD_MEDIA_STATE_FLUSH; + *command_ptr++ = 0; + x_inner -= 2; + y_inner += 1; + } + x_outer++; + if (x_outer >= mb_width) { + y_outer += 1; + x_outer = xtemp_outer; + } + } + } + + *command_ptr++ = MI_BATCH_BUFFER_END; + *command_ptr++ = 0; + + dri_bo_unmap(vme_context->vme_batchbuffer.bo); +} + +static void +gen8_vme_fill_vme_batchbuffer(VADriverContextP ctx, + struct encode_state *encode_state, + int mb_width, int mb_height, + int kernel, + int transform_8x8_mode_flag, + struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + int mb_x = 0, mb_y = 0; + int i, s; + unsigned int *command_ptr; + + dri_bo_map(vme_context->vme_batchbuffer.bo, 1); + command_ptr = vme_context->vme_batchbuffer.bo->virtual; + + for (s = 0; s < encode_state->num_slice_params_ext; s++) { + VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; + int slice_mb_begin = pSliceParameter->macroblock_address; + int slice_mb_number = pSliceParameter->num_macroblocks; + unsigned int mb_intra_ub; + int slice_mb_x = pSliceParameter->macroblock_address % mb_width; + for (i = 0; i < slice_mb_number; ) { + int mb_count = i + slice_mb_begin; + mb_x = mb_count % mb_width; + mb_y = mb_count / mb_width; + mb_intra_ub = 0; + if (mb_x != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE; + } + if (mb_y != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B; + if (mb_x != 0) + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D; + if (mb_x != (mb_width -1)) + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; + } + if (i < mb_width) { + if (i == 0) + mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE); + mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK); + if ((i == (mb_width - 1)) && slice_mb_x) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; + } + } + + if ((i == mb_width) && slice_mb_x) { + mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D); + } + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = kernel; + *command_ptr++ = 0; + *command_ptr++ = 0; + *command_ptr++ = 0; + *command_ptr++ = 0; + + /*inline data */ + *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x); + *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); + + *command_ptr++ = CMD_MEDIA_STATE_FLUSH; + *command_ptr++ = 0; + i += 1; + } + } + + *command_ptr++ = MI_BATCH_BUFFER_END; + *command_ptr++ = 0; + + dri_bo_unmap(vme_context->vme_batchbuffer.bo); +} + +static void gen8_vme_media_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + + gen8_gpe_context_init(ctx, &vme_context->gpe_context); + + /* VME output buffer */ + dri_bo_unreference(vme_context->vme_output.bo); + vme_context->vme_output.bo = NULL; + + dri_bo_unreference(vme_context->vme_batchbuffer.bo); + vme_context->vme_batchbuffer.bo = NULL; + + /* VME state */ + dri_bo_unreference(vme_context->vme_state.bo); + vme_context->vme_state.bo = NULL; +} + +static void gen8_vme_pipeline_programing(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + struct intel_batchbuffer *batch = encoder_context->base.batch; + VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + int width_in_mbs = pSequenceParameter->picture_width_in_mbs; + int height_in_mbs = pSequenceParameter->picture_height_in_mbs; + int kernel_shader; + bool allow_hwscore = true; + int s; + unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY); + + if (is_low_quality) + allow_hwscore = false; + else { + for (s = 0; s < encode_state->num_slice_params_ext; s++) { + pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; + if ((pSliceParameter->macroblock_address % width_in_mbs)) { + allow_hwscore = false; + break; + } + } + } + + if ((pSliceParameter->slice_type == SLICE_TYPE_I) || + (pSliceParameter->slice_type == SLICE_TYPE_I)) { + kernel_shader = VME_INTRA_SHADER; + } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) || + (pSliceParameter->slice_type == SLICE_TYPE_SP)) { + kernel_shader = VME_INTER_SHADER; + } else { + kernel_shader = VME_BINTER_SHADER; + if (!allow_hwscore) + kernel_shader = VME_INTER_SHADER; + } + if (allow_hwscore) + gen8wa_vme_walker_fill_vme_batchbuffer(ctx, + encode_state, + width_in_mbs, height_in_mbs, + kernel_shader, + pPicParameter->pic_fields.bits.transform_8x8_mode_flag, + encoder_context); + else + gen8_vme_fill_vme_batchbuffer(ctx, + encode_state, + width_in_mbs, height_in_mbs, + kernel_shader, + pPicParameter->pic_fields.bits.transform_8x8_mode_flag, + encoder_context); + + intel_batchbuffer_start_atomic(batch, 0x1000); + gen8_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch); + BEGIN_BATCH(batch, 3); + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0)); + OUT_RELOC(batch, + vme_context->vme_batchbuffer.bo, + I915_GEM_DOMAIN_COMMAND, 0, + 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + intel_batchbuffer_end_atomic(batch); +} + +static VAStatus gen8_vme_prepare(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + VAStatus vaStatus = VA_STATUS_SUCCESS; + VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I; + VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + struct gen6_vme_context *vme_context = encoder_context->vme_context; + + if (!vme_context->h264_level || + (vme_context->h264_level != pSequenceParameter->level_idc)) { + vme_context->h264_level = pSequenceParameter->level_idc; + } + + intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context); + + /*Setup all the memory object*/ + gen8_vme_surface_setup(ctx, encode_state, is_intra, encoder_context); + gen8_vme_interface_setup(ctx, encode_state, encoder_context); + //gen8_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context); + gen8_vme_constant_setup(ctx, encode_state, encoder_context); + + /*Programing media pipeline*/ + gen8_vme_pipeline_programing(ctx, encode_state, encoder_context); + + return vaStatus; +} + +static VAStatus gen8_vme_run(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + + intel_batchbuffer_flush(batch); + + return VA_STATUS_SUCCESS; +} + +static VAStatus gen8_vme_stop(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + return VA_STATUS_SUCCESS; +} + +static VAStatus +gen8_vme_pipeline(VADriverContextP ctx, + VAProfile profile, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + gen8_vme_media_init(ctx, encoder_context); + gen8_vme_prepare(ctx, encode_state, encoder_context); + gen8_vme_run(ctx, encode_state, encoder_context); + gen8_vme_stop(ctx, encode_state, encoder_context); + + return VA_STATUS_SUCCESS; +} + +static void +gen8_vme_mpeg2_output_buffer_setup(VADriverContextP ctx, + struct encode_state *encode_state, + int index, + int is_intra, + struct intel_encoder_context *encoder_context) + +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct gen6_vme_context *vme_context = encoder_context->vme_context; + VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; + int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; + int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; + + vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs; + vme_context->vme_output.pitch = 16; /* in bytes, always 16 */ + + if (is_intra) + vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2; + else + vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24; + /* + * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref + * + 16 FBR Info + 128 FBR MV + 32 FBR Ref. + * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24. + */ + + vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr, + "VME output buffer", + vme_context->vme_output.num_blocks * vme_context->vme_output.size_block, + 0x1000); + assert(vme_context->vme_output.bo); + vme_context->vme_buffer_suface_setup(ctx, + &vme_context->gpe_context, + &vme_context->vme_output, + BINDING_TABLE_OFFSET(index), + SURFACE_STATE_OFFSET(index)); +} + +static void +gen8_vme_mpeg2_output_vme_batchbuffer_setup(VADriverContextP ctx, + struct encode_state *encode_state, + int index, + struct intel_encoder_context *encoder_context) + +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct gen6_vme_context *vme_context = encoder_context->vme_context; + VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; + int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; + int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; + + vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1; + vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */ + vme_context->vme_batchbuffer.pitch = 16; + vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr, + "VME batchbuffer", + vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block, + 0x1000); + vme_context->vme_buffer_suface_setup(ctx, + &vme_context->gpe_context, + &vme_context->vme_batchbuffer, + BINDING_TABLE_OFFSET(index), + SURFACE_STATE_OFFSET(index)); +} + +static VAStatus +gen8_vme_mpeg2_surface_setup(VADriverContextP ctx, + struct encode_state *encode_state, + int is_intra, + struct intel_encoder_context *encoder_context) +{ + struct object_surface *obj_surface; + + /*Setup surfaces state*/ + /* current picture for encoding */ + obj_surface = encode_state->input_yuv_object; + gen8_vme_source_surface_state(ctx, 0, obj_surface, encoder_context); + gen8_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context); + gen8_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context); + + if (!is_intra) { + /* reference 0 */ + obj_surface = encode_state->reference_objects[0]; + + if (obj_surface->bo != NULL) + gen8_vme_source_surface_state(ctx, 1, obj_surface, encoder_context); + + /* reference 1 */ + obj_surface = encode_state->reference_objects[1]; + + if (obj_surface && obj_surface->bo != NULL) + gen8_vme_source_surface_state(ctx, 2, obj_surface, encoder_context); + } + + /* VME output */ + gen8_vme_mpeg2_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context); + gen8_vme_mpeg2_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context); + + return VA_STATUS_SUCCESS; +} + +static void +gen8wa_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx, + struct encode_state *encode_state, + int mb_width, int mb_height, + int kernel, + struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + unsigned int *command_ptr; + +#define MPEG2_SCOREBOARD (1 << 21) + + dri_bo_map(vme_context->vme_batchbuffer.bo, 1); + command_ptr = vme_context->vme_batchbuffer.bo->virtual; + + { + unsigned int mb_intra_ub, score_dep; + int x_outer, y_outer, x_inner, y_inner; + int xtemp_outer = 0; + int first_mb = 0; + int num_mb = mb_width * mb_height; + + x_outer = 0; + y_outer = 0; + + + for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { + x_inner = x_outer; + y_inner = y_outer; + for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) { + mb_intra_ub = 0; + score_dep = 0; + if (x_inner != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE; + score_dep |= MB_SCOREBOARD_A; + } + if (y_inner != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B; + score_dep |= MB_SCOREBOARD_B; + + if (x_inner != 0) + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D; + + if (x_inner != (mb_width -1)) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; + score_dep |= MB_SCOREBOARD_C; + } + } + + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = kernel; + *command_ptr++ = MPEG2_SCOREBOARD; + /* Indirect data */ + *command_ptr++ = 0; + /* the (X, Y) term of scoreboard */ + *command_ptr++ = ((y_inner << 16) | x_inner); + *command_ptr++ = score_dep; + /*inline data */ + *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner); + *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8)); + *command_ptr++ = CMD_MEDIA_STATE_FLUSH; + *command_ptr++ = 0; + + x_inner -= 2; + y_inner += 1; + } + x_outer += 1; + } + + xtemp_outer = mb_width - 2; + if (xtemp_outer < 0) + xtemp_outer = 0; + x_outer = xtemp_outer; + y_outer = 0; + for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { + y_inner = y_outer; + x_inner = x_outer; + for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) { + mb_intra_ub = 0; + score_dep = 0; + if (x_inner != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE; + score_dep |= MB_SCOREBOARD_A; + } + if (y_inner != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B; + score_dep |= MB_SCOREBOARD_B; + + if (x_inner != 0) + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D; + + if (x_inner != (mb_width -1)) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; + score_dep |= MB_SCOREBOARD_C; + } + } + + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = kernel; + *command_ptr++ = MPEG2_SCOREBOARD; + /* Indirect data */ + *command_ptr++ = 0; + /* the (X, Y) term of scoreboard */ + *command_ptr++ = ((y_inner << 16) | x_inner); + *command_ptr++ = score_dep; + /*inline data */ + *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner); + *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8)); + + *command_ptr++ = CMD_MEDIA_STATE_FLUSH; + *command_ptr++ = 0; + x_inner -= 2; + y_inner += 1; + } + x_outer++; + if (x_outer >= mb_width) { + y_outer += 1; + x_outer = xtemp_outer; + } + } + } + + *command_ptr++ = MI_BATCH_BUFFER_END; + *command_ptr++ = 0; + + dri_bo_unmap(vme_context->vme_batchbuffer.bo); + return; +} + +static void +gen8_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx, + struct encode_state *encode_state, + int mb_width, int mb_height, + int kernel, + int transform_8x8_mode_flag, + struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + int mb_x = 0, mb_y = 0; + int i, s, j; + unsigned int *command_ptr; + + + dri_bo_map(vme_context->vme_batchbuffer.bo, 1); + command_ptr = vme_context->vme_batchbuffer.bo->virtual; + + for (s = 0; s < encode_state->num_slice_params_ext; s++) { + VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer; + + for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) { + int slice_mb_begin = slice_param->macroblock_address; + int slice_mb_number = slice_param->num_macroblocks; + unsigned int mb_intra_ub; + + for (i = 0; i < slice_mb_number;) { + int mb_count = i + slice_mb_begin; + + mb_x = mb_count % mb_width; + mb_y = mb_count / mb_width; + mb_intra_ub = 0; + + if (mb_x != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE; + } + + if (mb_y != 0) { + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B; + + if (mb_x != 0) + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D; + + if (mb_x != (mb_width -1)) + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; + } + + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = kernel; + *command_ptr++ = 0; + *command_ptr++ = 0; + *command_ptr++ = 0; + *command_ptr++ = 0; + + /*inline data */ + *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x); + *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); + + *command_ptr++ = CMD_MEDIA_STATE_FLUSH; + *command_ptr++ = 0; + i += 1; + } + + slice_param++; + } + } + + *command_ptr++ = MI_BATCH_BUFFER_END; + *command_ptr++ = 0; + + dri_bo_unmap(vme_context->vme_batchbuffer.bo); +} + +static void +gen8_vme_mpeg2_pipeline_programing(VADriverContextP ctx, + struct encode_state *encode_state, + int is_intra, + struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + struct intel_batchbuffer *batch = encoder_context->base.batch; + VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; + int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; + int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; + bool allow_hwscore = true; + int s; + int kernel_shader; + VAEncPictureParameterBufferMPEG2 *pic_param = NULL; + + for (s = 0; s < encode_state->num_slice_params_ext; s++) { + int j; + VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer; + + for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) { + if (slice_param->macroblock_address % width_in_mbs) { + allow_hwscore = false; + break; + } + } + } + + pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer; + if (pic_param->picture_type == VAEncPictureTypeIntra) { + allow_hwscore = false; + kernel_shader = VME_INTRA_SHADER; + } else { + kernel_shader = VME_INTER_SHADER; + } + + if (allow_hwscore) + gen8wa_vme_mpeg2_walker_fill_vme_batchbuffer(ctx, + encode_state, + width_in_mbs, height_in_mbs, + kernel_shader, + encoder_context); + else + gen8_vme_mpeg2_fill_vme_batchbuffer(ctx, + encode_state, + width_in_mbs, height_in_mbs, + is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER, + 0, + encoder_context); + + intel_batchbuffer_start_atomic(batch, 0x1000); + gen8_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch); + BEGIN_BATCH(batch, 4); + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0)); + OUT_RELOC(batch, + vme_context->vme_batchbuffer.bo, + I915_GEM_DOMAIN_COMMAND, 0, + 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + + intel_batchbuffer_end_atomic(batch); +} + +static VAStatus +gen8_vme_mpeg2_prepare(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + VAStatus vaStatus = VA_STATUS_SUCCESS; + VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer; + VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; + struct gen6_vme_context *vme_context = encoder_context->vme_context; + + if ((!vme_context->mpeg2_level) || + (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) { + vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK; + } + + + /*Setup all the memory object*/ + gen8_vme_mpeg2_surface_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context); + gen8_vme_interface_setup(ctx, encode_state, encoder_context); + //gen8_vme_vme_state_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context); + intel_vme_mpeg2_state_setup(ctx, encode_state, encoder_context); + gen8_vme_constant_setup(ctx, encode_state, encoder_context); + + /*Programing media pipeline*/ + gen8_vme_mpeg2_pipeline_programing(ctx, encode_state, slice_param->is_intra_slice, encoder_context); + + return vaStatus; +} + +static VAStatus +gen8_vme_mpeg2_pipeline(VADriverContextP ctx, + VAProfile profile, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + gen8_vme_media_init(ctx, encoder_context); + gen8_vme_mpeg2_prepare(ctx, encode_state, encoder_context); + gen8_vme_run(ctx, encode_state, encoder_context); + gen8_vme_stop(ctx, encode_state, encoder_context); + + return VA_STATUS_SUCCESS; +} + +static void +gen8_vme_context_destroy(void *context) +{ + struct gen6_vme_context *vme_context = context; + + gen8_gpe_context_destroy(&vme_context->gpe_context); + + dri_bo_unreference(vme_context->vme_output.bo); + vme_context->vme_output.bo = NULL; + + dri_bo_unreference(vme_context->vme_state.bo); + vme_context->vme_state.bo = NULL; + + dri_bo_unreference(vme_context->vme_batchbuffer.bo); + vme_context->vme_batchbuffer.bo = NULL; + + if (vme_context->vme_state_message) { + free(vme_context->vme_state_message); + vme_context->vme_state_message = NULL; + } + + free(vme_context); +} + +Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context) +{ + struct gen6_vme_context *vme_context = calloc(1, sizeof(struct gen6_vme_context)); + struct i965_kernel *vme_kernel_list = NULL; + int i965_kernel_num; + + switch (encoder_context->codec) { + case CODEC_H264: + case CODEC_H264_MVC: + vme_kernel_list = gen8_vme_kernels; + encoder_context->vme_pipeline = gen8_vme_pipeline; + i965_kernel_num = sizeof(gen8_vme_kernels) / sizeof(struct i965_kernel); + break; + + case CODEC_MPEG2: + vme_kernel_list = gen8_vme_mpeg2_kernels; + encoder_context->vme_pipeline = gen8_vme_mpeg2_pipeline; + i965_kernel_num = sizeof(gen8_vme_mpeg2_kernels) / sizeof(struct i965_kernel); + + break; + + default: + /* never get here */ + assert(0); + + break; + } + vme_context->vme_kernel_sum = i965_kernel_num; + vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; + + vme_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6; + vme_context->gpe_context.curbe_size = CURBE_TOTAL_DATA_LENGTH; + vme_context->gpe_context.sampler_size = 0; + + + vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1; + vme_context->gpe_context.vfe_state.num_urb_entries = 64; + vme_context->gpe_context.vfe_state.gpgpu_mode = 0; + vme_context->gpe_context.vfe_state.urb_entry_size = 16; + vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1; + + gen7_vme_scoreboard_init(ctx, vme_context); + + gen8_gpe_load_kernels(ctx, + &vme_context->gpe_context, + vme_kernel_list, + i965_kernel_num); + vme_context->vme_surface2_setup = gen8_gpe_surface2_setup; + vme_context->vme_media_rw_surface_setup = gen8_gpe_media_rw_surface_setup; + vme_context->vme_buffer_suface_setup = gen8_gpe_buffer_suface_setup; + vme_context->vme_media_chroma_surface_setup = gen8_gpe_media_chroma_surface_setup; + + encoder_context->vme_context = vme_context; + encoder_context->vme_context_destroy = gen8_vme_context_destroy; + + vme_context->vme_state_message = malloc(VME_MSG_LENGTH * sizeof(int)); + + return True; +} diff --git a/src/i965_avc_bsd.c b/src/i965_avc_bsd.c index 0a5b89b..944a608 100644 --- a/src/i965_avc_bsd.c +++ b/src/i965_avc_bsd.c @@ -51,6 +51,7 @@ i965_avc_bsd_init_avc_bsd_surface(VADriverContextP ctx, if (!avc_bsd_surface) { avc_bsd_surface = calloc(sizeof(GenAvcSurface), 1); + avc_bsd_surface->frame_store_id = -1; assert((obj_surface->size & 0x3f) == 0); obj_surface->private_data = avc_bsd_surface; } @@ -388,7 +389,7 @@ i965_avc_bsd_buf_base_state(VADriverContextP ctx, { struct intel_batchbuffer *batch = i965_h264_context->batch; struct i965_avc_bsd_context *i965_avc_bsd_context; - int i, j; + int i; VAPictureH264 *va_pic; struct object_surface *obj_surface; GenAvcSurface *avc_bsd_surface; @@ -418,24 +419,8 @@ i965_avc_bsd_buf_base_state(VADriverContextP ctx, OUT_BCS_BATCH(batch, 0); for (i = 0; i < ARRAY_ELEMS(i965_h264_context->fsid_list); i++) { - if (i965_h264_context->fsid_list[i].surface_id != VA_INVALID_ID && - i965_h264_context->fsid_list[i].obj_surface && - i965_h264_context->fsid_list[i].obj_surface->private_data) { - int found = 0; - for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) { - va_pic = &pic_param->ReferenceFrames[j]; - - if (va_pic->flags & VA_PICTURE_H264_INVALID) - continue; - - if (va_pic->picture_id == i965_h264_context->fsid_list[i].surface_id) { - found = 1; - break; - } - } - - assert(found == 1); - obj_surface = i965_h264_context->fsid_list[i].obj_surface; + obj_surface = i965_h264_context->fsid_list[i].obj_surface; + if (obj_surface && obj_surface->private_data) { avc_bsd_surface = obj_surface->private_data; OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_top, @@ -458,9 +443,11 @@ i965_avc_bsd_buf_base_state(VADriverContextP ctx, va_pic = &pic_param->CurrPic; obj_surface = decode_state->render_object; - obj_surface->flags &= ~SURFACE_REF_DIS_MASK; - obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0); - i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + if (pic_param->pic_fields.bits.reference_pic_flag) + obj_surface->flags |= SURFACE_REFERENCED; + else + obj_surface->flags &= ~SURFACE_REFERENCED; + i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420); /* initial uv component for YUV400 case */ if (pic_param->seq_fields.bits.chroma_format_idc == 0) { @@ -490,26 +477,16 @@ i965_avc_bsd_buf_base_state(VADriverContextP ctx, /* POC List */ for (i = 0; i < ARRAY_ELEMS(i965_h264_context->fsid_list); i++) { - if (i965_h264_context->fsid_list[i].surface_id != VA_INVALID_ID) { - int found = 0; - for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) { - va_pic = &pic_param->ReferenceFrames[j]; - - if (va_pic->flags & VA_PICTURE_H264_INVALID) - continue; - - if (va_pic->picture_id == i965_h264_context->fsid_list[i].surface_id) { - found = 1; - break; - } - } + obj_surface = i965_h264_context->fsid_list[i].obj_surface; - assert(found == 1); + if (obj_surface) { + const VAPictureH264 * const va_pic = avc_find_picture( + obj_surface->base.id, pic_param->ReferenceFrames, + ARRAY_ELEMS(pic_param->ReferenceFrames)); - if (!(va_pic->flags & VA_PICTURE_H264_INVALID)) { - OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt); - OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt); - } + assert(va_pic != NULL); + OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt); + OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt); } else { OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); @@ -803,7 +780,7 @@ i965_avc_bsd_object(VADriverContextP ctx, { struct i965_driver_data *i965 = i965_driver_data(ctx); - if (IS_IRONLAKE(i965->intel.device_id)) + if (IS_IRONLAKE(i965->intel.device_info)) ironlake_avc_bsd_object(ctx, decode_state, pic_param, slice_param, slice_index, i965_h264_context); else g4x_avc_bsd_object(ctx, decode_state, pic_param, slice_param, slice_index, i965_h264_context); @@ -941,8 +918,8 @@ i965_avc_bsd_pipeline(VADriverContextP ctx, struct decode_state *decode_state, v assert(decode_state->pic_param && decode_state->pic_param->buffer); pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer; - - intel_update_avc_frame_store_index(ctx, decode_state, pic_param, i965_h264_context->fsid_list); + intel_update_avc_frame_store_index(ctx, decode_state, pic_param, + i965_h264_context->fsid_list, &i965_h264_context->fs_ctx); i965_weight128_workaround(ctx,decode_state, h264_context); i965_h264_context->enable_avc_ildb = 0; diff --git a/src/i965_avc_hw_scoreboard.c b/src/i965_avc_hw_scoreboard.c index b17ea83..f866599 100644 --- a/src/i965_avc_hw_scoreboard.c +++ b/src/i965_avc_hw_scoreboard.c @@ -217,7 +217,7 @@ i965_avc_hw_scoreboard_urb_layout(VADriverContextP ctx, struct i965_h264_context unsigned int vfe_fence, cs_fence; vfe_fence = avc_hw_scoreboard_context->urb.cs_start; - cs_fence = URB_SIZE((&i965->intel)); + cs_fence = i965->intel.device_info->urb_size; BEGIN_BATCH(batch, 3); OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1); @@ -429,7 +429,7 @@ i965_avc_hw_scoreboard_decode_init(VADriverContextP ctx, void *h264_context) avc_hw_scoreboard_context->urb.cs_start = avc_hw_scoreboard_context->urb.vfe_start + avc_hw_scoreboard_context->urb.num_vfe_entries * avc_hw_scoreboard_context->urb.size_vfe_entry; assert(avc_hw_scoreboard_context->urb.cs_start + - avc_hw_scoreboard_context->urb.num_cs_entries * avc_hw_scoreboard_context->urb.size_cs_entry <= URB_SIZE((&i965->intel))); + avc_hw_scoreboard_context->urb.num_cs_entries * avc_hw_scoreboard_context->urb.size_cs_entry <= i965->intel.device_info->urb_size); } } diff --git a/src/i965_avc_ildb.c b/src/i965_avc_ildb.c index 62d599e..10292e0 100644 --- a/src/i965_avc_ildb.c +++ b/src/i965_avc_ildb.c @@ -342,7 +342,7 @@ i965_avc_ildb_upload_constants(VADriverContextP ctx, assert(avc_ildb_context->curbe.bo->virtual); root_input = avc_ildb_context->curbe.bo->virtual; - if (IS_IRONLAKE(i965->intel.device_id)) { + if (IS_IRONLAKE(i965->intel.device_info)) { root_input->max_concurrent_threads = 76; /* 72 - 2 + 8 - 2 */ } else { root_input->max_concurrent_threads = 54; /* 50 - 2 + 8 - 2 */ @@ -410,7 +410,7 @@ i965_avc_ildb_urb_layout(VADriverContextP ctx, struct i965_h264_context *i965_h2 unsigned int vfe_fence, cs_fence; vfe_fence = avc_ildb_context->urb.cs_start; - cs_fence = URB_SIZE((&i965->intel)); + cs_fence = i965->intel.device_info->urb_size; BEGIN_BATCH(batch, 3); OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1); @@ -427,7 +427,7 @@ i965_avc_ildb_state_base_address(VADriverContextP ctx, struct i965_h264_context struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch = i965_h264_context->batch; - if (IS_IRONLAKE(i965->intel.device_id)) { + if (IS_IRONLAKE(i965->intel.device_info)) { BEGIN_BATCH(batch, 8); OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6); OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); @@ -596,8 +596,8 @@ i965_avc_ildb_decode_init(VADriverContextP ctx, void *h264_context) avc_ildb_context->urb.vfe_start = 0; avc_ildb_context->urb.cs_start = avc_ildb_context->urb.vfe_start + avc_ildb_context->urb.num_vfe_entries * avc_ildb_context->urb.size_vfe_entry; - // assert(avc_ildb_context->urb.cs_start + - // avc_ildb_context->urb.num_cs_entries * avc_ildb_context->urb.size_cs_entry <= URB_SIZE((&i965->intel))); + // assert(avc_ildb_context->urb.cs_start + + // avc_ildb_context->urb.num_cs_entries * avc_ildb_context->urb.size_cs_entry <= i965->intel.device_info->urb_size); for (i = 0; i < NUM_AVC_ILDB_SURFACES; i++) { dri_bo_unreference(avc_ildb_context->surface[i].s_bo); @@ -614,7 +614,7 @@ i965_avc_ildb_decode_init(VADriverContextP ctx, void *h264_context) /* kernel offset */ assert(NUM_AVC_ILDB_INTERFACES == ARRAY_ELEMS(avc_ildb_kernel_offset_gen5)); - if (IS_IRONLAKE(i965->intel.device_id)) { + if (IS_IRONLAKE(i965->intel.device_info)) { avc_ildb_kernel_offset = avc_ildb_kernel_offset_gen5; } else { avc_ildb_kernel_offset = avc_ildb_kernel_offset_gen4; diff --git a/src/i965_decoder.h b/src/i965_decoder.h index 4f7d2cc..14d4d0c 100644 --- a/src/i965_decoder.h +++ b/src/i965_decoder.h @@ -29,6 +29,7 @@ #include <stdlib.h> #include <va/va.h> +#include <va/va_dec_vp8.h> #include <intel_bufmgr.h> #define MAX_GEN_REFERENCE_FRAMES 16 @@ -38,6 +39,21 @@ struct gen_frame_store { VASurfaceID surface_id; int frame_store_id; struct object_surface *obj_surface; + + /* This represents the time when this frame store was last used to + hold a reference frame. This is not connected to a presentation + timestamp (PTS), and this is not a common decoding time stamp + (DTS) either. It serves the purpose of tracking retired + reference frame candidates. + + This is only used for H.264 decoding on platforms before Haswell */ + uint64_t ref_age; +}; + +typedef struct gen_frame_store_context GenFrameStoreContext; +struct gen_frame_store_context { + uint64_t age; + int prev_poc; }; typedef struct gen_buffer GenBuffer; @@ -49,4 +65,6 @@ struct gen_buffer { struct hw_context * gen75_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config); +extern struct hw_context * +gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config); #endif /* I965_DECODER_H */ diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index 4ef09b5..8b546db 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -22,10 +22,11 @@ */ #include "sysdeps.h" - +#include <limits.h> #include <alloca.h> #include "intel_batchbuffer.h" +#include "intel_media.h" #include "i965_drv_video.h" #include "i965_decoder_utils.h" #include "i965_defines.h" @@ -139,7 +140,7 @@ mpeg2_set_reference_surfaces( ref_frames[n++].surface_id = ref_frames[0].surface_id; } - if (pic_param->picture_coding_extension.bits.progressive_frame) + if (pic_param->picture_coding_extension.bits.frame_pred_frame_dct) return; ref_frames[2].surface_id = VA_INVALID_ID; @@ -174,6 +175,75 @@ mpeg2_set_reference_surfaces( } } +/* Ensure the supplied VA surface has valid storage for decoding the + current picture */ +VAStatus +avc_ensure_surface_bo( + VADriverContextP ctx, + struct decode_state *decode_state, + struct object_surface *obj_surface, + const VAPictureParameterBufferH264 *pic_param +) +{ + VAStatus va_status; + uint32_t hw_fourcc, fourcc, subsample, chroma_format; + + /* Validate chroma format */ + switch (pic_param->seq_fields.bits.chroma_format_idc) { + case 0: // Grayscale + fourcc = VA_FOURCC_Y800; + subsample = SUBSAMPLE_YUV400; + chroma_format = VA_RT_FORMAT_YUV400; + break; + case 1: // YUV 4:2:0 + fourcc = VA_FOURCC_NV12; + subsample = SUBSAMPLE_YUV420; + chroma_format = VA_RT_FORMAT_YUV420; + break; + default: + return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT; + } + + /* Determine the HW surface format, bound to VA config needs */ + if ((decode_state->base.chroma_formats & chroma_format) == chroma_format) + hw_fourcc = fourcc; + else { + hw_fourcc = 0; + switch (fourcc) { + case VA_FOURCC_Y800: // Implement with an NV12 surface + if (decode_state->base.chroma_formats & VA_RT_FORMAT_YUV420) { + hw_fourcc = VA_FOURCC_NV12; + subsample = SUBSAMPLE_YUV420; + } + break; + } + } + if (!hw_fourcc) + return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT; + + /* (Re-)allocate the underlying surface buffer store, if necessary */ + if (!obj_surface->bo || obj_surface->fourcc != hw_fourcc) { + struct i965_driver_data * const i965 = i965_driver_data(ctx); + + i965_destroy_surface_storage(obj_surface); + va_status = i965_check_alloc_surface_bo(ctx, obj_surface, + i965->codec_info->has_tiled_surface, hw_fourcc, subsample); + if (va_status != VA_STATUS_SUCCESS) + return va_status; + } + + /* Fake chroma components if grayscale is implemented on top of NV12 */ + if (fourcc == VA_FOURCC_Y800 && hw_fourcc == VA_FOURCC_NV12) { + const uint32_t uv_offset = obj_surface->width * obj_surface->height; + const uint32_t uv_size = obj_surface->width * obj_surface->height / 2; + + drm_intel_gem_bo_map_gtt(obj_surface->bo); + memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size); + drm_intel_gem_bo_unmap_gtt(obj_surface->bo); + } + return VA_STATUS_SUCCESS; +} + /* Generate flat scaling matrices for H.264 decoding */ void avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix) @@ -185,6 +255,56 @@ avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix) memset(&iq_matrix->ScalingList8x8, 16, sizeof(iq_matrix->ScalingList8x8)); } +/* Returns the POC of the supplied VA picture */ +static int +avc_get_picture_poc(const VAPictureH264 *va_pic) +{ + int structure, field_poc[2]; + + structure = va_pic->flags & + (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD); + field_poc[0] = structure != VA_PICTURE_H264_BOTTOM_FIELD ? + va_pic->TopFieldOrderCnt : INT_MAX; + field_poc[1] = structure != VA_PICTURE_H264_TOP_FIELD ? + va_pic->BottomFieldOrderCnt : INT_MAX; + return MIN(field_poc[0], field_poc[1]); +} + +/* Returns a unique picture ID that represents the supplied VA surface object */ +int +avc_get_picture_id(struct object_surface *obj_surface) +{ + int pic_id; + + /* This highly depends on how the internal VA objects are organized. + + Theory of operations: + The VA objects are maintained in heaps so that any released VA + surface will become free again for future allocation. This means + that holes in there are filled in for subsequent allocations. + So, this ultimately means that we could just use the Heap ID of + the VA surface as the resulting picture ID (16 bits) */ + pic_id = 1 + (obj_surface->base.id & OBJECT_HEAP_ID_MASK); + return (pic_id <= 0xffff) ? pic_id : -1; +} + +/* Finds the VA/H264 picture associated with the specified VA surface id */ +VAPictureH264 * +avc_find_picture(VASurfaceID id, VAPictureH264 *pic_list, int pic_list_count) +{ + int i; + + if (id != VA_INVALID_ID) { + for (i = 0; i < pic_list_count; i++) { + VAPictureH264 * const va_pic = &pic_list[i]; + if (va_pic->picture_id == id && + !(va_pic->flags & VA_PICTURE_H264_INVALID)) + return va_pic; + } + } + return NULL; +} + /* Get first macroblock bit offset for BSD, minus EPB count (AVC) */ /* XXX: slice_data_bit_offset does not account for EPB */ unsigned int @@ -245,8 +365,24 @@ avc_get_first_mb_bit_offset_with_epb( static inline uint8_t get_ref_idx_state_1(const VAPictureH264 *va_pic, unsigned int frame_store_id) { + /* The H.264 standard, and the VA-API specification, allows for at + least 3 states for a picture: "used for short-term reference", + "used for long-term reference", or considered as not used for + reference. + + The latter is used in the MVC inter prediction and inter-view + prediction process (H.8.4). This has an incidence on the + colZeroFlag variable, as defined in 8.4.1.2. + + Since it is not possible to directly program that flag, let's + make the hardware derive this value by assimilating "considered + as not used for reference" to a "not used for short-term + reference", and subsequently making it "used for long-term + reference" to fit the definition of Bit6 here */ + const unsigned int ref_flags = VA_PICTURE_H264_SHORT_TERM_REFERENCE | + VA_PICTURE_H264_LONG_TERM_REFERENCE; const unsigned int is_long_term = - !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE); + ((va_pic->flags & ref_flags) != VA_PICTURE_H264_SHORT_TERM_REFERENCE); const unsigned int is_top_field = !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD); const unsigned int is_bottom_field = @@ -267,28 +403,35 @@ gen5_fill_avc_ref_idx_state( const GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES] ) { - unsigned int i, n, frame_idx; + int i, j; - for (i = 0, n = 0; i < ref_list_count; i++) { + for (i = 0; i < ref_list_count; i++) { const VAPictureH264 * const va_pic = &ref_list[i]; - if (va_pic->flags & VA_PICTURE_H264_INVALID) + if ((va_pic->flags & VA_PICTURE_H264_INVALID) || + va_pic->picture_id == VA_INVALID_ID) { + state[i] = 0xff; continue; + } - for (frame_idx = 0; frame_idx < MAX_GEN_REFERENCE_FRAMES; frame_idx++) { - const GenFrameStore * const fs = &frame_store[frame_idx]; - if (fs->surface_id != VA_INVALID_ID && - fs->surface_id == va_pic->picture_id) { - assert(frame_idx == fs->frame_store_id); + for (j = 0; j < MAX_GEN_REFERENCE_FRAMES; j++) { + if (frame_store[j].surface_id == va_pic->picture_id) break; - } } - assert(frame_idx < MAX_GEN_REFERENCE_FRAMES); - state[n++] = get_ref_idx_state_1(va_pic, frame_idx); + + if (j != MAX_GEN_REFERENCE_FRAMES) { // Found picture in the Frame Store + const GenFrameStore * const fs = &frame_store[j]; + assert(fs->frame_store_id == j); // Current architecture/assumption + state[i] = get_ref_idx_state_1(va_pic, fs->frame_store_id); + } + else { + WARN_ONCE("Invalid RefPicListX[] entry!!! It is not included in DPB\n"); + state[i] = get_ref_idx_state_1(va_pic, 0) | 0x80; + } } - for (; n < 32; n++) - state[n] = 0xff; + for (; i < 32; i++) + state[i] = 0xff; } /* Emit Reference List Entries (Gen6+: SNB, IVB) */ @@ -344,138 +487,249 @@ gen6_send_avc_ref_idx_state( ); } -void -intel_update_avc_frame_store_index(VADriverContextP ctx, - struct decode_state *decode_state, - VAPictureParameterBufferH264 *pic_param, - GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]) +static void +gen6_mfd_avc_phantom_slice_state(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *next_slice_param, + struct intel_batchbuffer *batch) { - int i, j; + int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1; + int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */ + int slice_hor_pos, slice_ver_pos, slice_start_mb_num, next_slice_hor_pos, next_slice_ver_pos; + int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag && + pic_param->seq_fields.bits.mb_adaptive_frame_field_flag); + + if (next_slice_param) { + int first_mb_in_next_slice; + + slice_hor_pos = 0; + slice_ver_pos = 0; + slice_start_mb_num = 0; + first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture; + next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; + next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs; + } else { + slice_hor_pos = 0; + slice_ver_pos = height_in_mbs; + slice_start_mb_num = width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag); + next_slice_hor_pos = 0; + next_slice_ver_pos = 0; + } - assert(MAX_GEN_REFERENCE_FRAMES == ARRAY_ELEMS(pic_param->ReferenceFrames)); + BEGIN_BCS_BATCH(batch, 11); + OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, + slice_ver_pos << 24 | + slice_hor_pos << 16 | + slice_start_mb_num << 0); + OUT_BCS_BATCH(batch, + next_slice_ver_pos << 16 | + next_slice_hor_pos << 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} - for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) { - int found = 0; +static void +gen6_mfd_avc_phantom_slice_bsd_object(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + struct intel_batchbuffer *batch) +{ - if (frame_store[i].surface_id == VA_INVALID_ID || - frame_store[i].obj_surface == NULL) - continue; + BEGIN_BCS_BATCH(batch, 6); + OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); +} - assert(frame_store[i].frame_store_id != -1); +void +gen6_mfd_avc_phantom_slice(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *next_slice_param, + struct intel_batchbuffer *batch) +{ + gen6_mfd_avc_phantom_slice_state(ctx, pic_param, next_slice_param, batch); + gen6_mfd_avc_phantom_slice_bsd_object(ctx, pic_param, batch); +} - for (j = 0; j < MAX_GEN_REFERENCE_FRAMES; j++) { - VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[j]; - if (ref_pic->flags & VA_PICTURE_H264_INVALID) - continue; +/* Comparison function for sorting out the array of free frame store entries */ +static int +compare_avc_ref_store_func(const void *p1, const void *p2) +{ + const GenFrameStore * const fs1 = *((GenFrameStore **)p1); + const GenFrameStore * const fs2 = *((GenFrameStore **)p2); - if (frame_store[i].surface_id == ref_pic->picture_id) { - found = 1; - break; - } - } + return fs1->ref_age - fs2->ref_age; +} - /* remove it from the internal DPB */ - if (!found) { - struct object_surface *obj_surface = frame_store[i].obj_surface; - - obj_surface->flags &= ~SURFACE_REFERENCED; +void +intel_update_avc_frame_store_index( + VADriverContextP ctx, + struct decode_state *decode_state, + VAPictureParameterBufferH264 *pic_param, + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES], + GenFrameStoreContext *fs_ctx +) +{ + GenFrameStore *free_refs[MAX_GEN_REFERENCE_FRAMES]; + uint32_t used_refs = 0, add_refs = 0; + uint64_t age; + int i, n, num_free_refs; + + /* Detect changes of access unit */ + const int poc = avc_get_picture_poc(&pic_param->CurrPic); + if (fs_ctx->age == 0 || fs_ctx->prev_poc != poc) + fs_ctx->age++; + fs_ctx->prev_poc = poc; + age = fs_ctx->age; + + /* Tag entries that are still available in our Frame Store */ + for (i = 0; i < ARRAY_ELEMS(decode_state->reference_objects); i++) { + struct object_surface * const obj_surface = + decode_state->reference_objects[i]; + if (!obj_surface) + continue; - if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) { - dri_bo_unreference(obj_surface->bo); - obj_surface->bo = NULL; - obj_surface->flags &= ~SURFACE_REF_DIS_MASK; + GenAvcSurface * const avc_surface = obj_surface->private_data; + if (!avc_surface) + continue; + if (avc_surface->frame_store_id >= 0) { + GenFrameStore * const fs = + &frame_store[avc_surface->frame_store_id]; + if (fs->surface_id == obj_surface->base.id) { + fs->obj_surface = obj_surface; + fs->ref_age = age; + used_refs |= 1 << fs->frame_store_id; + continue; } - - if (obj_surface->free_private_data) - obj_surface->free_private_data(&obj_surface->private_data); - - frame_store[i].surface_id = VA_INVALID_ID; - frame_store[i].frame_store_id = -1; - frame_store[i].obj_surface = NULL; } + add_refs |= 1 << i; } - for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) { - VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[i]; - int found = 0; - - if (ref_pic->flags & VA_PICTURE_H264_INVALID || - ref_pic->picture_id == VA_INVALID_SURFACE || - decode_state->reference_objects[i] == NULL) + /* Build and sort out the list of retired candidates. The resulting + list is ordered by increasing age when they were last used */ + for (i = 0, n = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) { + if (!(used_refs & (1 << i))) { + GenFrameStore * const fs = &frame_store[i]; + fs->obj_surface = NULL; + free_refs[n++] = fs; + } + } + num_free_refs = n; + qsort(&free_refs[0], n, sizeof(free_refs[0]), compare_avc_ref_store_func); + + /* Append the new reference frames */ + for (i = 0, n = 0; i < ARRAY_ELEMS(decode_state->reference_objects); i++) { + struct object_surface * const obj_surface = + decode_state->reference_objects[i]; + if (!obj_surface || !(add_refs & (1 << i))) continue; - for (j = 0; j < MAX_GEN_REFERENCE_FRAMES; j++) { - if (frame_store[j].surface_id == ref_pic->picture_id) { - found = 1; - break; - } + GenAvcSurface * const avc_surface = obj_surface->private_data; + if (!avc_surface) + continue; + if (n < num_free_refs) { + GenFrameStore * const fs = free_refs[n++]; + fs->surface_id = obj_surface->base.id; + fs->obj_surface = obj_surface; + fs->frame_store_id = fs - frame_store; + fs->ref_age = age; + avc_surface->frame_store_id = fs->frame_store_id; + continue; } + WARN_ONCE("No free slot found for DPB reference list!!!\n"); + } +} - /* add the new reference frame into the internal DPB */ - if (!found) { - int frame_idx; - struct object_surface *obj_surface = decode_state->reference_objects[i]; +void +gen75_update_avc_frame_store_index( + VADriverContextP ctx, + struct decode_state *decode_state, + VAPictureParameterBufferH264 *pic_param, + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES] +) +{ + int i, n; + + /* Construct the Frame Store array, in compact form. i.e. empty or + invalid entries are discarded. */ + for (i = 0, n = 0; i < ARRAY_ELEMS(decode_state->reference_objects); i++) { + struct object_surface * const obj_surface = + decode_state->reference_objects[i]; + if (!obj_surface) + continue; - /* - * Sometimes a dummy frame comes from the upper layer library, call i965_check_alloc_surface_bo() - * to ake sure the store buffer is allocated for this reference frame - */ - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420); + GenFrameStore * const fs = &frame_store[n]; + fs->surface_id = obj_surface->base.id; + fs->obj_surface = obj_surface; + fs->frame_store_id = n++; + } - /* Find a free frame store index */ - for (frame_idx = 0; frame_idx < MAX_GEN_REFERENCE_FRAMES; frame_idx++) { - for (j = 0; j < MAX_GEN_REFERENCE_FRAMES; j++) { - if (frame_store[j].surface_id == VA_INVALID_ID || - frame_store[j].obj_surface == NULL) - continue; + /* Any remaining entry is marked as invalid */ + for (; n < MAX_GEN_REFERENCE_FRAMES; n++) { + GenFrameStore * const fs = &frame_store[n]; + fs->surface_id = VA_INVALID_ID; + fs->obj_surface = NULL; + fs->frame_store_id = -1; + } +} - if (frame_store[j].frame_store_id == frame_idx) /* the store index is in use */ - break; - } +bool +gen75_fill_avc_picid_list( + uint16_t pic_ids[16], + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES] +) +{ + int i, pic_id; - if (j == MAX_GEN_REFERENCE_FRAMES) - break; - } + /* Fill in with known picture IDs. The Frame Store array is in + compact form, i.e. empty entries are only to be found at the + end of the array: there are no holes in the set of active + reference frames */ + for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) { + GenFrameStore * const fs = &frame_store[i]; + if (!fs->obj_surface) + break; + pic_id = avc_get_picture_id(fs->obj_surface); + if (pic_id < 0) + return false; + pic_ids[i] = pic_id; + } - assert(frame_idx < MAX_GEN_REFERENCE_FRAMES); + /* When an element of the list is not relevant the value of the + picture ID shall be set to 0 */ + for (; i < MAX_GEN_REFERENCE_FRAMES; i++) + pic_ids[i] = 0; + return true; +} - for (j = 0; j < MAX_GEN_REFERENCE_FRAMES; j++) { - if (frame_store[j].surface_id == VA_INVALID_ID || - frame_store[j].obj_surface == NULL) { - frame_store[j].surface_id = ref_pic->picture_id; - frame_store[j].frame_store_id = frame_idx; - frame_store[j].obj_surface = obj_surface; - break; - } - } - } - } +bool +gen75_send_avc_picid_state( + struct intel_batchbuffer *batch, + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES] +) +{ + uint16_t pic_ids[16]; - /* sort */ - for (i = 0; i < MAX_GEN_REFERENCE_FRAMES - 1; i++) { - if (frame_store[i].surface_id != VA_INVALID_ID && - frame_store[i].obj_surface != NULL && - frame_store[i].frame_store_id == i) - continue; + if (!gen75_fill_avc_picid_list(pic_ids, frame_store)) + return false; - for (j = i + 1; j < MAX_GEN_REFERENCE_FRAMES; j++) { - if (frame_store[j].surface_id != VA_INVALID_ID && - frame_store[j].obj_surface != NULL && - frame_store[j].frame_store_id == i) { - VASurfaceID id = frame_store[i].surface_id; - int frame_idx = frame_store[i].frame_store_id; - struct object_surface *obj_surface = frame_store[i].obj_surface; - - frame_store[i].surface_id = frame_store[j].surface_id; - frame_store[i].frame_store_id = frame_store[j].frame_store_id; - frame_store[i].obj_surface = frame_store[j].obj_surface; - frame_store[j].surface_id = id; - frame_store[j].frame_store_id = frame_idx; - frame_store[j].obj_surface = obj_surface; - break; - } - } - } + BEGIN_BCS_BATCH(batch, 10); + OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2)); + OUT_BCS_BATCH(batch, 0); // enable Picture ID Remapping + intel_batchbuffer_data(batch, pic_ids, sizeof(pic_ids)); + ADVANCE_BCS_BATCH(batch); + return true; } void @@ -517,14 +771,70 @@ intel_update_vc1_frame_store_index(VADriverContextP ctx, } +void +intel_update_vp8_frame_store_index(VADriverContextP ctx, + struct decode_state *decode_state, + VAPictureParameterBufferVP8 *pic_param, + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]) +{ + struct object_surface *obj_surface; + int i; + + obj_surface = decode_state->reference_objects[0]; + + if (pic_param->last_ref_frame == VA_INVALID_ID || + !obj_surface || + !obj_surface->bo) { + frame_store[0].surface_id = VA_INVALID_ID; + frame_store[0].obj_surface = NULL; + } else { + frame_store[0].surface_id = pic_param->last_ref_frame; + frame_store[0].obj_surface = obj_surface; + } + + obj_surface = decode_state->reference_objects[1]; + + if (pic_param->golden_ref_frame == VA_INVALID_ID || + !obj_surface || + !obj_surface->bo) { + frame_store[1].surface_id = frame_store[0].surface_id; + frame_store[1].obj_surface = frame_store[0].obj_surface; + } else { + frame_store[1].surface_id = pic_param->golden_ref_frame; + frame_store[1].obj_surface = obj_surface; + } + + obj_surface = decode_state->reference_objects[2]; + + if (pic_param->alt_ref_frame == VA_INVALID_ID || + !obj_surface || + !obj_surface->bo) { + frame_store[2].surface_id = frame_store[0].surface_id; + frame_store[2].obj_surface = frame_store[0].obj_surface; + } else { + frame_store[2].surface_id = pic_param->alt_ref_frame; + frame_store[2].obj_surface = obj_surface; + } + + for (i = 3; i < MAX_GEN_REFERENCE_FRAMES; i++) { + frame_store[i].surface_id = frame_store[i % 2].surface_id; + frame_store[i].obj_surface = frame_store[i % 2].obj_surface; + } + +} + static VAStatus intel_decoder_check_avc_parameter(VADriverContextP ctx, + VAProfile h264_profile, struct decode_state *decode_state) { struct i965_driver_data *i965 = i965_driver_data(ctx); VAPictureParameterBufferH264 *pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer; + VAStatus va_status; struct object_surface *obj_surface; int i; + VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param; + int j; assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID)); assert(pic_param->CurrPic.picture_id != VA_INVALID_SURFACE); @@ -538,27 +848,71 @@ intel_decoder_check_avc_parameter(VADriverContextP ctx, if (pic_param->CurrPic.picture_id != decode_state->current_render_target) goto error; - for (i = 0; i < 16; i++) { - if (pic_param->ReferenceFrames[i].flags & VA_PICTURE_H264_INVALID || - pic_param->ReferenceFrames[i].picture_id == VA_INVALID_SURFACE) - break; - else { - obj_surface = SURFACE(pic_param->ReferenceFrames[i].picture_id); - assert(obj_surface); + if ((h264_profile != VAProfileH264Baseline)) { + if (pic_param->num_slice_groups_minus1 || + pic_param->pic_fields.bits.redundant_pic_cnt_present_flag) { + WARN_ONCE("Unsupported the FMO/ASO constraints!!!\n"); + goto error; + } + } - if (!obj_surface) - goto error; + /* Fill in the reference objects array with the actual VA surface + objects with 1:1 correspondance with any entry in ReferenceFrames[], + i.e. including "holes" for invalid entries, that are expanded + to NULL in the reference_objects[] array */ + for (i = 0; i < ARRAY_ELEMS(pic_param->ReferenceFrames); i++) { + const VAPictureH264 * const va_pic = &pic_param->ReferenceFrames[i]; - if (!obj_surface->bo) { /* a reference frame without store buffer */ - WARN_ONCE("Invalid reference frame!!!\n"); - } + obj_surface = NULL; + if (!(va_pic->flags & VA_PICTURE_H264_INVALID) && + va_pic->picture_id != VA_INVALID_ID) { + obj_surface = SURFACE(pic_param->ReferenceFrames[i].picture_id); + if (!obj_surface) + return VA_STATUS_ERROR_INVALID_SURFACE; - decode_state->reference_objects[i] = obj_surface; + /* + * Sometimes a dummy frame comes from the upper layer + * library, call i965_check_alloc_surface_bo() to make + * sure the store buffer is allocated for this reference + * frame + */ + va_status = avc_ensure_surface_bo(ctx, decode_state, obj_surface, + pic_param); + if (va_status != VA_STATUS_SUCCESS) + return va_status; } + decode_state->reference_objects[i] = obj_surface; } - for ( ; i < 16; i++) - decode_state->reference_objects[i] = NULL; + for (j = 0; j < decode_state->num_slice_params; j++) { + assert(decode_state->slice_params && decode_state->slice_params[j]->buffer); + slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer; + + if (j == decode_state->num_slice_params - 1) + next_slice_group_param = NULL; + else + next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer; + + for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) { + + if (i < decode_state->slice_params[j]->num_elements - 1) + next_slice_param = slice_param + 1; + else + next_slice_param = next_slice_group_param; + + if (next_slice_param != NULL) { + /* If the mb position of next_slice is less than or equal to the current slice, + * discard the current frame. + */ + if (next_slice_param->first_mb_in_slice <= slice_param->first_mb_in_slice) { + next_slice_param = NULL; + WARN_ONCE("!!!incorrect slice_param. The first_mb_in_slice of next_slice is less" + " than or equal to that in current slice\n"); + goto error; + } + } + } + } return VA_STATUS_SUCCESS; @@ -617,7 +971,12 @@ intel_decoder_check_vc1_parameter(VADriverContextP ctx, VAPictureParameterBufferVC1 *pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer; struct object_surface *obj_surface; int i = 0; - + + if (pic_param->sequence_fields.bits.interlace == 1 && + pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */ + return VA_STATUS_ERROR_DECODING_ERROR; + } + if (pic_param->picture_fields.bits.picture_type == 0 || pic_param->picture_fields.bits.picture_type == 3) { } else if (pic_param->picture_fields.bits.picture_type == 1 || @@ -654,6 +1013,48 @@ error: return VA_STATUS_ERROR_INVALID_PARAMETER; } +static VAStatus +intel_decoder_check_vp8_parameter(VADriverContextP ctx, + struct decode_state *decode_state) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer; + struct object_surface *obj_surface; + int i = 0; + + if (pic_param->last_ref_frame != VA_INVALID_SURFACE) { + obj_surface = SURFACE(pic_param->last_ref_frame); + + if (obj_surface && obj_surface->bo) + decode_state->reference_objects[i++] = obj_surface; + else + decode_state->reference_objects[i++] = NULL; + } + + if (pic_param->golden_ref_frame != VA_INVALID_SURFACE) { + obj_surface = SURFACE(pic_param->golden_ref_frame); + + if (obj_surface && obj_surface->bo) + decode_state->reference_objects[i++] = obj_surface; + else + decode_state->reference_objects[i++] = NULL; + } + + if (pic_param->alt_ref_frame != VA_INVALID_SURFACE) { + obj_surface = SURFACE(pic_param->alt_ref_frame); + + if (obj_surface && obj_surface->bo) + decode_state->reference_objects[i++] = obj_surface; + else + decode_state->reference_objects[i++] = NULL; + } + + for ( ; i < 16; i++) + decode_state->reference_objects[i] = NULL; + + return VA_STATUS_SUCCESS; +} + VAStatus intel_decoder_sanity_check_input(VADriverContextP ctx, VAProfile profile, @@ -679,10 +1080,12 @@ intel_decoder_sanity_check_input(VADriverContextP ctx, vaStatus = intel_decoder_check_mpeg2_parameter(ctx, decode_state); break; - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: - vaStatus = intel_decoder_check_avc_parameter(ctx, decode_state); + case VAProfileH264StereoHigh: + case VAProfileH264MultiviewHigh: + vaStatus = intel_decoder_check_avc_parameter(ctx, profile, decode_state); break; case VAProfileVC1Simple: @@ -695,6 +1098,10 @@ intel_decoder_sanity_check_input(VADriverContextP ctx, vaStatus = VA_STATUS_SUCCESS; break; + case VAProfileVP8Version0_3: + vaStatus = intel_decoder_check_vp8_parameter(ctx, decode_state); + break; + default: vaStatus = VA_STATUS_ERROR_INVALID_PARAMETER; break; @@ -703,3 +1110,68 @@ intel_decoder_sanity_check_input(VADriverContextP ctx, out: return vaStatus; } + +/* + * Return the next slice paramter + * + * Input: + * slice_param: the current slice + * *group_idx & *element_idx the current slice position in slice groups + * Output: + * Return the next slice parameter + * *group_idx & *element_idx the next slice position in slice groups, + * if the next slice is NULL, *group_idx & *element_idx will be ignored + */ +VASliceParameterBufferMPEG2 * +intel_mpeg2_find_next_slice(struct decode_state *decode_state, + VAPictureParameterBufferMPEG2 *pic_param, + VASliceParameterBufferMPEG2 *slice_param, + int *group_idx, + int *element_idx) +{ + VASliceParameterBufferMPEG2 *next_slice_param; + unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16; + int j = *group_idx, i = *element_idx + 1; + + for (; j < decode_state->num_slice_params; j++) { + for (; i < decode_state->slice_params[j]->num_elements; i++) { + next_slice_param = ((VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer) + i; + + if ((next_slice_param->slice_vertical_position * width_in_mbs + next_slice_param->slice_horizontal_position) >= + (slice_param->slice_vertical_position * width_in_mbs + slice_param->slice_horizontal_position)) { + *group_idx = j; + *element_idx = i; + + return next_slice_param; + } + } + + i = 0; + } + + return NULL; +} + +/* Ensure the segmentation buffer is large enough for the supplied + number of MBs, or re-allocate it */ +bool +intel_ensure_vp8_segmentation_buffer(VADriverContextP ctx, GenBuffer *buf, + unsigned int mb_width, unsigned int mb_height) +{ + struct i965_driver_data * const i965 = i965_driver_data(ctx); + /* The segmentation map is a 64-byte aligned linear buffer, with + each cache line holding only 8 bits for 4 continuous MBs */ + const unsigned int buf_size = ((mb_width + 3) / 4) * 64 * mb_height; + + if (buf->valid) { + if (buf->bo && buf->bo->size >= buf_size) + return true; + drm_intel_bo_unreference(buf->bo); + buf->valid = false; + } + + buf->bo = drm_intel_bo_alloc(i965->intel.bufmgr, "segmentation map", + buf_size, 0x1000); + buf->valid = buf->bo != NULL; + return buf->valid; +} diff --git a/src/i965_decoder_utils.h b/src/i965_decoder_utils.h index 2a71f3e..3e6acdd 100644 --- a/src/i965_decoder_utils.h +++ b/src/i965_decoder_utils.h @@ -43,9 +43,23 @@ mpeg2_set_reference_surfaces( VAPictureParameterBufferMPEG2 *pic_param ); +VAStatus +avc_ensure_surface_bo( + VADriverContextP ctx, + struct decode_state *decode_state, + struct object_surface *obj_surface, + const VAPictureParameterBufferH264 *pic_param +); + void avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix); +int +avc_get_picture_id(struct object_surface *obj_surface); + +VAPictureH264 * +avc_find_picture(VASurfaceID id, VAPictureH264 *pic_list, int pic_list_count); + unsigned int avc_get_first_mb_bit_offset( dri_bo *slice_data_bo, @@ -75,20 +89,69 @@ gen6_send_avc_ref_idx_state( const GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES] ); +void +gen6_mfd_avc_phantom_slice(VADriverContextP ctx, + VAPictureParameterBufferH264 *pic_param, + VASliceParameterBufferH264 *next_slice_param, + struct intel_batchbuffer *batch +); + VAStatus intel_decoder_sanity_check_input(VADriverContextP ctx, VAProfile profile, struct decode_state *decode_state); void -intel_update_avc_frame_store_index(VADriverContextP ctx, - struct decode_state *decode_state, - VAPictureParameterBufferH264 *pic_param, - GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]); +intel_update_avc_frame_store_index( + VADriverContextP ctx, + struct decode_state *decode_state, + VAPictureParameterBufferH264 *pic_param, + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES], + GenFrameStoreContext *fs_ctx +); + +void +gen75_update_avc_frame_store_index( + VADriverContextP ctx, + struct decode_state *decode_state, + VAPictureParameterBufferH264 *pic_param, + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES] +); + +bool +gen75_fill_avc_picid_list( + uint16_t pic_ids[16], + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES] +); + +bool +gen75_send_avc_picid_state( + struct intel_batchbuffer *batch, + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES] +); void intel_update_vc1_frame_store_index(VADriverContextP ctx, struct decode_state *decode_state, VAPictureParameterBufferVC1 *pic_param, GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]); + +VASliceParameterBufferMPEG2 * +intel_mpeg2_find_next_slice(struct decode_state *decode_state, + VAPictureParameterBufferMPEG2 *pic_param, + VASliceParameterBufferMPEG2 *slice_param, + int *group_idx, + int *element_idx); + + +void +intel_update_vp8_frame_store_index(VADriverContextP ctx, + struct decode_state *decode_state, + VAPictureParameterBufferVP8 *pic_param, + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]); + +bool +intel_ensure_vp8_segmentation_buffer(VADriverContextP ctx, GenBuffer *buf, + unsigned int mb_width, unsigned int mb_height); + #endif /* I965_DECODER_UTILS_H */ diff --git a/src/i965_defines.h b/src/i965_defines.h index 8e4350a..6bf8e0d 100755 --- a/src/i965_defines.h +++ b/src/i965_defines.h @@ -107,6 +107,11 @@ # define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT 27 # define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT 25 +#define GEN8_3DSTATE_RASTER CMD(3, 0, 0x50) +# define GEN8_3DSTATE_RASTER_CULL_BOTH (0 << 16) +# define GEN8_3DSTATE_RASTER_CULL_NONE (1 << 16) +# define GEN8_3DSTATE_RASTER_CULL_FRONT (2 << 16) +# define GEN8_3DSTATE_RASTER_CULL_BACK (3 << 16) #define GEN6_3DSTATE_WM CMD(3, 0, 0x14) /* DW2 */ @@ -174,6 +179,10 @@ #define GEN6_3DSTATE_CONSTANT_GS CMD(3, 0, 0x16) #define GEN6_3DSTATE_CONSTANT_PS CMD(3, 0, 0x17) +/* Gen8 WM_HZ_OP */ +#define GEN8_3DSTATE_WM_HZ_OP CMD(3, 0, 0x52) + + # define GEN6_3DSTATE_CONSTANT_BUFFER_3_ENABLE (1 << 15) # define GEN6_3DSTATE_CONSTANT_BUFFER_2_ENABLE (1 << 14) # define GEN6_3DSTATE_CONSTANT_BUFFER_1_ENABLE (1 << 13) @@ -189,9 +198,13 @@ # define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1) # define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1) +#define GEN8_3DSTATE_MULTISAMPLE CMD(3, 0, 0x0d) +#define GEN8_3DSTATE_SAMPLE_PATTERN CMD(3, 1, 0x1C) + /* GEN7 */ #define GEN7_3DSTATE_CLEAR_PARAMS CMD(3, 0, 0x04) #define GEN7_3DSTATE_DEPTH_BUFFER CMD(3, 0, 0x05) +#define GEN7_3DSTATE_HIER_DEPTH_BUFFER CMD(3, 0, 0x07) #define GEN7_3DSTATE_URB_VS CMD(3, 0, 0x30) #define GEN7_3DSTATE_URB_HS CMD(3, 0, 0x31) @@ -204,8 +217,14 @@ #define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS CMD(3, 1, 0x12) #define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS CMD(3, 1, 0x16) + +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS CMD(3, 1, 0x14) +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS CMD(3, 1, 0x13) +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS CMD(3, 1, 0x15) /* DW1 */ # define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16 +# define GEN8_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16 +# define GEN8_PUSH_CONSTANT_BUFFER_SIZE_SHIFT 0 #define GEN7_3DSTATE_CONSTANT_HS CMD(3, 0, 0x19) #define GEN7_3DSTATE_CONSTANT_DS CMD(3, 0, 0x1a) @@ -223,6 +242,11 @@ # define GEN7_SBE_POINT_SPRITE_LOWERLEFT (1 << 20) # define GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT 11 # define GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT 4 +# define GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH (1 << 29) +# define GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET (1 << 28) + +# define GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT 5 +#define GEN8_3DSTATE_SBE_SWIZ CMD(3, 0, 0x51) #define GEN7_3DSTATE_PS CMD(3, 0, 0x20) /* DW1: kernel pointer */ @@ -255,6 +279,37 @@ /* DW6: kernel 1 pointer */ /* DW7: kernel 2 pointer */ +# define GEN8_PS_MAX_THREADS_SHIFT 23 + +#define GEN8_3DSTATE_PSEXTRA CMD(3, 0, 0x4f) +/* DW1 */ +# define GEN8_PSX_PIXEL_SHADER_VALID (1 << 31) +# define GEN8_PSX_PSCDEPTH_OFF (0 << 26) +# define GEN8_PSX_PSCDEPTH_ON (1 << 26) +# define GEN8_PSX_PSCDEPTH_ON_GE (2 << 26) +# define GEN8_PSX_PSCDEPTH_ON_LE (3 << 26) +# define GEN8_PSX_ATTRIBUTE_ENABLE (1 << 8) + +#define GEN8_3DSTATE_PSBLEND CMD(3, 0, 0x4d) +/* DW1 */ +# define GEN8_PS_BLEND_ALPHA_TO_COVERAGE_ENABLE (1 << 31) +# define GEN8_PS_BLEND_HAS_WRITEABLE_RT (1 << 30) +# define GEN8_PS_BLEND_COLOR_BUFFER_BLEND_ENABLE (1 << 29) +# define GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_MASK INTEL_MASK(28, 24) +# define GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT 24 +# define GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_MASK INTEL_MASK(23, 19) +# define GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT 19 +# define GEN8_PS_BLEND_SRC_BLEND_FACTOR_MASK INTEL_MASK(18, 14) +# define GEN8_PS_BLEND_SRC_BLEND_FACTOR_SHIFT 14 +# define GEN8_PS_BLEND_DST_BLEND_FACTOR_MASK INTEL_MASK(13, 9) +# define GEN8_PS_BLEND_DST_BLEND_FACTOR_SHIFT 9 +# define GEN8_PS_BLEND_ALPHA_TEST_ENABLE (1 << 8) +# define GEN8_PS_BLEND_INDEPENDENT_ALPHA_BLEND_ENABLE (1 << 7) + + +#define GEN7_3DSTATE_STENCIL_BUFFER CMD(3, 0, 0x06) +#define GEN8_3DSTATE_WM_DEPTH_STENCIL CMD(3, 0, 0x4e) + #define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL CMD(3, 0, 0x21) #define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC CMD(3, 0, 0x23) @@ -270,6 +325,8 @@ #define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS CMD(3, 0, 0x2b) #define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS CMD(3, 0, 0x2e) #define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS CMD(3, 0, 0x2f) +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS CMD(3, 0, 0x2c) +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS CMD(3, 0, 0x2d) #define MFX(pipeline, op, sub_opa, sub_opb) \ (3 << 29 | \ @@ -328,6 +385,11 @@ #define MFD_JPEG_BSD_OBJECT MFX(2, 7, 1, 8) +#define MFX_VP8_PIC_STATE MFX(2, 4, 0, 0) + +#define MFD_VP8_BSD_OBJECT MFX(2, 4, 1, 8) + + #define VEB(pipeline, op, sub_opa, sub_opb) \ (3 << 29 | \ (pipeline) << 27 | \ @@ -610,6 +672,8 @@ #define VE1_VFCOMPONENT_2_SHIFT 20 #define VE1_VFCOMPONENT_3_SHIFT 16 #define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT 0 +#define GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT 26 /* for GEN8 */ +#define GEN8_VE0_VALID (1 << 25) /* for GEN8 */ #define VB0_BUFFER_INDEX_SHIFT 27 #define GEN6_VB0_BUFFER_INDEX_SHIFT 26 @@ -619,6 +683,8 @@ #define GEN6_VB0_INSTANCEDATA (1 << 20) #define GEN7_VB0_ADDRESS_MODIFYENABLE (1 << 14) #define VB0_BUFFER_PITCH_SHIFT 0 +#define GEN8_VB0_BUFFER_INDEX_SHIFT 26 +#define GEN8_VB0_MOCS_SHIFT 16 #define _3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15) #define _3DPRIMITIVE_VERTEX_RANDOM (1 << 15) @@ -649,6 +715,8 @@ #define _3DPRIM_LINESTRIP_CONT_BF 0x14 #define _3DPRIM_TRIFAN_NOSTIPPLE 0x15 +#define GEN8_3DSTATE_VF_TOPOLOGY CMD(3, 0, 0x4b) + #define I965_TILEWALK_XMAJOR 0 #define I965_TILEWALK_YMAJOR 1 @@ -705,6 +773,8 @@ #define MFX_FORMAT_VC1 1 #define MFX_FORMAT_AVC 2 #define MFX_FORMAT_JPEG 3 +#define MFX_FORMAT_SVC 4 +#define MFX_FORMAT_VP8 5 #define MFX_SHORT_MODE 0 #define MFX_LONG_MODE 1 @@ -752,9 +822,4 @@ #define SUBSAMPLE_YUV411 5 #define SUBSAMPLE_RGBX 6 -#define URB_SIZE(intel) (IS_GEN7(intel->device_id) ? 4096 : \ - IS_GEN6(intel->device_id) ? 1024 : \ - IS_IRONLAKE(intel->device_id) ? 1024 : \ - IS_G4X(intel->device_id) ? 384 : 256) - #endif /* _I965_DEFINES_H_ */ diff --git a/src/i965_device_info.c b/src/i965_device_info.c new file mode 100644 index 0000000..ea835da --- /dev/null +++ b/src/i965_device_info.c @@ -0,0 +1,532 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include "i965_drv_video.h" + +#include <string.h> +#include <strings.h> +#include <errno.h> +#include <cpuid.h> + +/* Extra set of chroma formats supported for H.264 decoding (beyond YUV 4:2:0) */ +#define EXTRA_H264_DEC_CHROMA_FORMATS \ + (VA_RT_FORMAT_YUV400) + +/* Extra set of chroma formats supported for JPEG decoding (beyond YUV 4:2:0) */ +#define EXTRA_JPEG_DEC_CHROMA_FORMATS \ + (VA_RT_FORMAT_YUV400 | VA_RT_FORMAT_YUV411 | VA_RT_FORMAT_YUV422 | \ + VA_RT_FORMAT_YUV444) + +/* Defines VA profile as a 32-bit unsigned integer mask */ +#define VA_PROFILE_MASK(PROFILE) \ + (1U << VAProfile##PROFILE) + +extern struct hw_context *i965_proc_context_init(VADriverContextP, struct object_config *); +extern struct hw_context *g4x_dec_hw_context_init(VADriverContextP, struct object_config *); +extern bool genx_render_init(VADriverContextP); + +static struct hw_codec_info g4x_hw_codec_info = { + .dec_hw_context_init = g4x_dec_hw_context_init, + .enc_hw_context_init = NULL, + .proc_hw_context_init = NULL, + .render_init = genx_render_init, + .post_processing_context_init = NULL, + + .max_width = 2048, + .max_height = 2048, + .min_linear_wpitch = 16, + .min_linear_hpitch = 16, + + .has_mpeg2_decoding = 1, + .has_h264_decoding = 1, + + .num_filters = 0, +}; + +extern struct hw_context *ironlake_dec_hw_context_init(VADriverContextP, struct object_config *); +extern void i965_post_processing_context_init(VADriverContextP, void *, struct intel_batchbuffer *); + +static struct hw_codec_info ilk_hw_codec_info = { + .dec_hw_context_init = ironlake_dec_hw_context_init, + .enc_hw_context_init = NULL, + .proc_hw_context_init = i965_proc_context_init, + .render_init = genx_render_init, + .post_processing_context_init = i965_post_processing_context_init, + + .max_width = 2048, + .max_height = 2048, + .min_linear_wpitch = 16, + .min_linear_hpitch = 16, + + .has_mpeg2_decoding = 1, + .has_h264_decoding = 1, + .has_vpp = 1, + .has_accelerated_putimage = 1, + + .num_filters = 0, +}; + +extern struct hw_context *gen6_dec_hw_context_init(VADriverContextP, struct object_config *); +extern struct hw_context *gen6_enc_hw_context_init(VADriverContextP, struct object_config *); +static struct hw_codec_info snb_hw_codec_info = { + .dec_hw_context_init = gen6_dec_hw_context_init, + .enc_hw_context_init = gen6_enc_hw_context_init, + .proc_hw_context_init = i965_proc_context_init, + .render_init = genx_render_init, + .post_processing_context_init = i965_post_processing_context_init, + + .max_width = 2048, + .max_height = 2048, + .min_linear_wpitch = 16, + .min_linear_hpitch = 16, + + .h264_mvc_dec_profiles = VA_PROFILE_MASK(H264StereoHigh), + .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS, + + .has_mpeg2_decoding = 1, + .has_h264_decoding = 1, + .has_h264_encoding = 1, + .has_vc1_decoding = 1, + .has_vpp = 1, + .has_accelerated_getimage = 1, + .has_accelerated_putimage = 1, + .has_tiled_surface = 1, + .has_di_motion_adptive = 1, + + .num_filters = 2, + .filters = { + { VAProcFilterNoiseReduction, I965_RING_NULL }, + { VAProcFilterDeinterlacing, I965_RING_NULL }, + }, +}; + +extern struct hw_context *gen7_dec_hw_context_init(VADriverContextP, struct object_config *); +extern struct hw_context *gen7_enc_hw_context_init(VADriverContextP, struct object_config *); +static struct hw_codec_info ivb_hw_codec_info = { + .dec_hw_context_init = gen7_dec_hw_context_init, + .enc_hw_context_init = gen7_enc_hw_context_init, + .proc_hw_context_init = i965_proc_context_init, + .render_init = genx_render_init, + .post_processing_context_init = i965_post_processing_context_init, + + .max_width = 4096, + .max_height = 4096, + .min_linear_wpitch = 64, + .min_linear_hpitch = 16, + + .h264_mvc_dec_profiles = VA_PROFILE_MASK(H264StereoHigh), + .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS, + .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS, + + .has_mpeg2_decoding = 1, + .has_mpeg2_encoding = 1, + .has_h264_decoding = 1, + .has_h264_encoding = 1, + .has_vc1_decoding = 1, + .has_jpeg_decoding = 1, + .has_vpp = 1, + .has_accelerated_getimage = 1, + .has_accelerated_putimage = 1, + .has_tiled_surface = 1, + .has_di_motion_adptive = 1, + + .num_filters = 2, + .filters = { + { VAProcFilterNoiseReduction, I965_RING_NULL }, + { VAProcFilterDeinterlacing, I965_RING_NULL }, + }, +}; + +static void hsw_hw_codec_preinit(VADriverContextP ctx, struct hw_codec_info *codec_info); + +extern struct hw_context *gen75_dec_hw_context_init(VADriverContextP, struct object_config *); +extern struct hw_context *gen75_enc_hw_context_init(VADriverContextP, struct object_config *); +extern struct hw_context *gen75_proc_context_init(VADriverContextP, struct object_config *); +static struct hw_codec_info hsw_hw_codec_info = { + .dec_hw_context_init = gen75_dec_hw_context_init, + .enc_hw_context_init = gen75_enc_hw_context_init, + .proc_hw_context_init = gen75_proc_context_init, + .render_init = genx_render_init, + .post_processing_context_init = i965_post_processing_context_init, + .preinit_hw_codec = hsw_hw_codec_preinit, + + .max_width = 4096, + .max_height = 4096, + .min_linear_wpitch = 64, + .min_linear_hpitch = 16, + + .h264_mvc_dec_profiles = (VA_PROFILE_MASK(H264StereoHigh) | + VA_PROFILE_MASK(H264MultiviewHigh)), + .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS, + .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS, + + .has_mpeg2_decoding = 1, + .has_mpeg2_encoding = 1, + .has_h264_decoding = 1, + .has_h264_encoding = 1, + .has_vc1_decoding = 1, + .has_jpeg_decoding = 1, + .has_vpp = 1, + .has_accelerated_getimage = 1, + .has_accelerated_putimage = 1, + .has_tiled_surface = 1, + .has_di_motion_adptive = 1, + .has_di_motion_compensated = 1, + .has_h264_mvc_encoding = 1, + + .num_filters = 5, + .filters = { + { VAProcFilterNoiseReduction, I965_RING_VEBOX }, + { VAProcFilterDeinterlacing, I965_RING_VEBOX }, + { VAProcFilterSharpening, I965_RING_NULL }, + { VAProcFilterColorBalance, I965_RING_VEBOX}, + { VAProcFilterSkinToneEnhancement, I965_RING_VEBOX}, + }, +}; + +extern struct hw_context *gen8_dec_hw_context_init(VADriverContextP, struct object_config *); +extern struct hw_context *gen8_enc_hw_context_init(VADriverContextP, struct object_config *); +extern void gen8_post_processing_context_init(VADriverContextP, void *, struct intel_batchbuffer *); +static struct hw_codec_info bdw_hw_codec_info = { + .dec_hw_context_init = gen8_dec_hw_context_init, + .enc_hw_context_init = gen8_enc_hw_context_init, + .proc_hw_context_init = gen75_proc_context_init, + .render_init = gen8_render_init, + .post_processing_context_init = gen8_post_processing_context_init, + + .max_width = 4096, + .max_height = 4096, + .min_linear_wpitch = 64, + .min_linear_hpitch = 16, + + .h264_mvc_dec_profiles = (VA_PROFILE_MASK(H264StereoHigh) | + VA_PROFILE_MASK(H264MultiviewHigh)), + .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS, + .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS, + + .has_mpeg2_decoding = 1, + .has_mpeg2_encoding = 1, + .has_h264_decoding = 1, + .has_h264_encoding = 1, + .has_vc1_decoding = 1, + .has_jpeg_decoding = 1, + .has_vpp = 1, + .has_accelerated_getimage = 1, + .has_accelerated_putimage = 1, + .has_tiled_surface = 1, + .has_di_motion_adptive = 1, + .has_di_motion_compensated = 1, + .has_vp8_decoding = 1, + .has_h264_mvc_encoding = 1, + + .num_filters = 5, + .filters = { + { VAProcFilterNoiseReduction, I965_RING_VEBOX }, + { VAProcFilterDeinterlacing, I965_RING_VEBOX }, + { VAProcFilterSharpening, I965_RING_NULL }, /* need to rebuild the shader for BDW */ + { VAProcFilterColorBalance, I965_RING_VEBOX}, + { VAProcFilterSkinToneEnhancement, I965_RING_VEBOX}, + }, +}; + +static struct hw_codec_info chv_hw_codec_info = { + .dec_hw_context_init = gen8_dec_hw_context_init, + .enc_hw_context_init = gen8_enc_hw_context_init, + .proc_hw_context_init = gen75_proc_context_init, + .render_init = gen8_render_init, + .post_processing_context_init = gen8_post_processing_context_init, + + .max_width = 4096, + .max_height = 4096, + .min_linear_wpitch = 64, + .min_linear_hpitch = 16, + + .h264_mvc_dec_profiles = (VA_PROFILE_MASK(H264StereoHigh) | + VA_PROFILE_MASK(H264MultiviewHigh)), + .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS, + .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS, + + .has_mpeg2_decoding = 1, + .has_mpeg2_encoding = 1, + .has_h264_decoding = 1, + .has_h264_encoding = 1, + .has_vc1_decoding = 1, + .has_jpeg_decoding = 1, + .has_vpp = 1, + .has_accelerated_getimage = 1, + .has_accelerated_putimage = 1, + .has_tiled_surface = 1, + .has_di_motion_adptive = 1, + .has_di_motion_compensated = 1, + .has_vp8_decoding = 1, + .has_h264_mvc_encoding = 1, + + .num_filters = 5, + .filters = { + { VAProcFilterNoiseReduction, I965_RING_VEBOX }, + { VAProcFilterDeinterlacing, I965_RING_VEBOX }, + { VAProcFilterSharpening, I965_RING_NULL }, /* need to rebuild the shader for BDW */ + { VAProcFilterColorBalance, I965_RING_VEBOX}, + { VAProcFilterSkinToneEnhancement, I965_RING_VEBOX}, + }, +}; + +struct hw_codec_info * +i965_get_codec_info(int devid) +{ + switch (devid) { +#undef CHIPSET +#define CHIPSET(id, family, dev, str) case id: return &family##_hw_codec_info; +#include "i965_pciids.h" + default: + return NULL; + } +} + +static const struct intel_device_info g4x_device_info = { + .gen = 4, + + .urb_size = 384, + .max_wm_threads = 50, /* 10 * 5 */ + + .is_g4x = 1, +}; + +static const struct intel_device_info ilk_device_info = { + .gen = 5, + + .urb_size = 1024, + .max_wm_threads = 72, /* 12 * 6 */ +}; + +static const struct intel_device_info snb_gt1_device_info = { + .gen = 6, + .gt = 1, + + .urb_size = 1024, + .max_wm_threads = 40, +}; + +static const struct intel_device_info snb_gt2_device_info = { + .gen = 6, + .gt = 2, + + .urb_size = 1024, + .max_wm_threads = 80, +}; + +static const struct intel_device_info ivb_gt1_device_info = { + .gen = 7, + .gt = 1, + + .urb_size = 4096, + .max_wm_threads = 48, + + .is_ivybridge = 1, +}; + +static const struct intel_device_info ivb_gt2_device_info = { + .gen = 7, + .gt = 2, + + .urb_size = 4096, + .max_wm_threads = 172, + + .is_ivybridge = 1, +}; + +static const struct intel_device_info byt_device_info = { + .gen = 7, + .gt = 1, + + .urb_size = 4096, + .max_wm_threads = 48, + + .is_ivybridge = 1, + .is_baytrail = 1, +}; + +static const struct intel_device_info hsw_gt1_device_info = { + .gen = 7, + .gt = 1, + + .urb_size = 4096, + .max_wm_threads = 102, + + .is_haswell = 1, +}; + +static const struct intel_device_info hsw_gt2_device_info = { + .gen = 7, + .gt = 2, + + .urb_size = 4096, + .max_wm_threads = 204, + + .is_haswell = 1, +}; + +static const struct intel_device_info hsw_gt3_device_info = { + .gen = 7, + .gt = 3, + + .urb_size = 4096, + .max_wm_threads = 408, + + .is_haswell = 1, +}; + +static const struct intel_device_info bdw_device_info = { + .gen = 8, + + .urb_size = 4096, + .max_wm_threads = 64, /* per PSD */ +}; + +static const struct intel_device_info chv_device_info = { + .gen = 8, + + .urb_size = 4096, + .max_wm_threads = 64, /* per PSD */ + + .is_cherryview = 1, +}; + +const struct intel_device_info * +i965_get_device_info(int devid) +{ + switch (devid) { +#undef CHIPSET +#define CHIPSET(id, family, dev, str) case id: return &dev##_device_info; +#include "i965_pciids.h" + default: + return NULL; + } +} + +static void cpuid(unsigned int op, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) +{ + __cpuid_count(op, 0, *eax, *ebx, *ecx, *edx); +} + +/* + * This function doesn't check the length. And the caller should + * assure that the length of input string should be greater than 48. + */ +static int intel_driver_detect_cpustring(char *model_id) +{ + uint32_t *rdata; + + if (model_id == NULL) + return -EINVAL; + + rdata = (uint32_t *)model_id; + + /* obtain the max supported extended CPUID info */ + cpuid(0x80000000, &rdata[0], &rdata[1], &rdata[2], &rdata[3]); + + /* If the max extended CPUID info is less than 0x80000004, fail */ + if (rdata[0] < 0x80000004) + return -EINVAL; + + /* obtain the CPUID string */ + cpuid(0x80000002, &rdata[0], &rdata[1], &rdata[2], &rdata[3]); + cpuid(0x80000003, &rdata[4], &rdata[5], &rdata[6], &rdata[7]); + cpuid(0x80000004, &rdata[8], &rdata[9], &rdata[10], &rdata[11]); + + *(model_id + 48) = '\0'; + return 0; +} + +/* + * the hook_list for HSW. + * It is captured by /proc/cpuinfo and the space character is stripped. + */ +const static char *hsw_cpu_hook_list[] = { +"Intel(R)Pentium(R)3556U", +"Intel(R)Pentium(R)3560Y", +"Intel(R)Pentium(R)3550M", +"Intel(R)Celeron(R)2980U", +"Intel(R)Celeron(R)2955U", +"Intel(R)Celeron(R)2950M", +}; + +static void hsw_hw_codec_preinit(VADriverContextP ctx, struct hw_codec_info *codec_info) +{ + char model_string[64]; + char *model_ptr, *tmp_ptr; + int i, model_len, list_len; + bool found; + + memset(model_string, 0, sizeof(model_string)); + + /* If it can't detect cpu model_string, leave it alone */ + if (intel_driver_detect_cpustring(model_string)) + return; + + /* strip the cpufreq info */ + model_ptr = model_string; + tmp_ptr = strstr(model_ptr, "@"); + + if (tmp_ptr) + *tmp_ptr = '\0'; + + /* strip the space character and convert to the lower case */ + model_ptr = model_string; + model_len = strlen(model_string); + for (i = 0; i < model_len; i++) { + if (model_string[i] != ' ') { + *model_ptr = model_string[i]; + model_ptr++; + } + } + *model_ptr = '\0'; + + found = false; + list_len = sizeof(hsw_cpu_hook_list) / sizeof(char *); + model_len = strlen(model_string); + for (i = 0; i < list_len; i++) { + model_ptr = (char *)hsw_cpu_hook_list[i]; + + if (strlen(model_ptr) != model_len) + continue; + + if (strncasecmp(model_string, model_ptr, model_len) == 0) { + found = true; + break; + } + } + + if (found) { + codec_info->has_h264_encoding = 0; + codec_info->has_h264_mvc_encoding = 0; + codec_info->has_mpeg2_encoding = 0; + } + return; +} diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 992bfa5..aa521e5 100755..100644 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -28,6 +28,7 @@ */ #include "sysdeps.h" +#include <unistd.h> #ifdef HAVE_VA_X11 # include "i965_output_dri.h" @@ -37,6 +38,7 @@ # include "i965_output_wayland.h" #endif +#include "intel_version.h" #include "intel_driver.h" #include "intel_memman.h" #include "intel_batchbuffer.h" @@ -78,6 +80,22 @@ #define HAS_TILED_SURFACE(ctx) ((ctx)->codec_info->has_tiled_surface) +#define HAS_VP8_DECODING(ctx) ((ctx)->codec_info->has_vp8_decoding && \ + (ctx)->intel.has_bsd) + +#define HAS_VP8_ENCODING(ctx) ((ctx)->codec_info->has_vp8_encoding && \ + (ctx)->intel.has_bsd) + +#define HAS_H264_MVC_DECODING(ctx) \ + (HAS_H264_DECODING(ctx) && (ctx)->codec_info->h264_mvc_dec_profiles) + +#define HAS_H264_MVC_DECODING_PROFILE(ctx, profile) \ + (HAS_H264_MVC_DECODING(ctx) && \ + ((ctx)->codec_info->h264_mvc_dec_profiles & (1U << profile))) + +#define HAS_H264_MVC_ENCODING(ctx) ((ctx)->codec_info->has_h264_mvc_encoding && \ + (ctx)->intel.has_bsd) + static int get_sampling_from_fourcc(unsigned int fourcc); /* Check whether we are rendering to X11 (VA/X11 or VA/GLX API) */ @@ -88,6 +106,117 @@ static int get_sampling_from_fourcc(unsigned int fourcc); #define IS_VA_WAYLAND(ctx) \ (((ctx)->display_type & VA_DISPLAY_MAJOR_MASK) == VA_DISPLAY_WAYLAND) +#define I965_BIT 1 +#define I965_2BITS (I965_BIT << 1) +#define I965_4BITS (I965_BIT << 2) +#define I965_8BITS (I965_BIT << 3) +#define I965_16BITS (I965_BIT << 4) +#define I965_32BITS (I965_BIT << 5) + +#define PLANE_0 0 +#define PLANE_1 1 +#define PLANE_2 2 + +#define OFFSET_0 0 +#define OFFSET_4 4 +#define OFFSET_8 8 +#define OFFSET_16 16 +#define OFFSET_24 24 + +/* hfactor, vfactor, num_planes, bpp[], num_components, components[] */ +#define I_NV12 2, 2, 2, {I965_8BITS, I965_4BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_1, OFFSET_0}, {PLANE_1, OFFSET_8} } +#define I_I420 2, 2, 3, {I965_8BITS, I965_2BITS, I965_2BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_1, OFFSET_0}, {PLANE_2, OFFSET_0} } +#define I_IYUV I_I420 +#define I_IMC3 I_I420 +#define I_YV12 2, 2, 3, {I965_8BITS, I965_2BITS, I965_2BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_2, OFFSET_0}, {PLANE_1, OFFSET_0} } +#define I_IMC1 I_YV12 + +#define I_422H 2, 1, 3, {I965_8BITS, I965_4BITS, I965_4BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_1, OFFSET_0}, {PLANE_2, OFFSET_0} } +#define I_422V 1, 2, 3, {I965_8BITS, I965_4BITS, I965_4BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_1, OFFSET_0}, {PLANE_2, OFFSET_0} } +#define I_YV16 2, 1, 3, {I965_8BITS, I965_4BITS, I965_4BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_2, OFFSET_0}, {PLANE_1, OFFSET_0} } +#define I_YUY2 2, 1, 1, {I965_32BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_0, OFFSET_8}, {PLANE_0, OFFSET_24} } +#define I_UYVY 2, 1, 1, {I965_32BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_0, OFFSET_8}, {PLANE_0, OFFSET_16} } + +#define I_444P 1, 1, 3, {I965_8BITS, I965_8BITS, I965_8BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_1, OFFSET_0}, {PLANE_2, OFFSET_0} } + +#define I_411P 4, 1, 3, {I965_8BITS, I965_2BITS, I965_2BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_1, OFFSET_0}, {PLANE_2, OFFSET_0} } + +#define I_Y800 1, 1, 1, {I965_8BITS}, 1, { {PLANE_0, OFFSET_0} } + +#define I_RGBA 1, 1, 1, {I965_32BITS}, 4, { {PLANE_0, OFFSET_0}, {PLANE_0, OFFSET_8}, {PLANE_0, OFFSET_16}, {PLANE_0, OFFSET_24} } +#define I_RGBX 1, 1, 1, {I965_32BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_0, OFFSET_8}, {PLANE_0, OFFSET_16} } +#define I_BGRA 1, 1, 1, {I965_32BITS}, 4, { {PLANE_0, OFFSET_16}, {PLANE_0, OFFSET_8}, {PLANE_0, OFFSET_0}, {PLANE_0, OFFSET_24} } +#define I_BGRX 1, 1, 1, {I965_32BITS}, 3, { {PLANE_0, OFFSET_16}, {PLANE_0, OFFSET_8}, {PLANE_0, OFFSET_0} } + +#define I_ARGB 1, 1, 1, {I965_32BITS}, 4, { {PLANE_0, OFFSET_8}, {PLANE_0, OFFSET_16}, {PLANE_0, OFFSET_24}, {PLANE_0, OFFSET_0} } +#define I_ABGR 1, 1, 1, {I965_32BITS}, 4, { {PLANE_0, OFFSET_24}, {PLANE_0, OFFSET_16}, {PLANE_0, OFFSET_8}, {PLANE_0, OFFSET_0} } + +#define I_IA88 1, 1, 1, {I965_16BITS}, 2, { {PLANE_0, OFFSET_0}, {PLANE_0, OFFSET_8} } +#define I_AI88 1, 1, 1, {I965_16BITS}, 2, { {PLANE_0, OFFSET_8}, {PLANE_0, OFFSET_0} } + +#define I_IA44 1, 1, 1, {I965_8BITS}, 2, { {PLANE_0, OFFSET_0}, {PLANE_0, OFFSET_4} } +#define I_AI44 1, 1, 1, {I965_8BITS}, 2, { {PLANE_0, OFFSET_4}, {PLANE_0, OFFSET_0} } + +/* flag */ +#define I_S 1 +#define I_I 2 +#define I_SI (I_S | I_I) + +#define DEF_FOUCC_INFO(FOURCC, FORMAT, SUB, FLAG) { VA_FOURCC_##FOURCC, I965_COLOR_##FORMAT, SUBSAMPLE_##SUB, FLAG, I_##FOURCC } +#define DEF_YUV(FOURCC, SUB, FLAG) DEF_FOUCC_INFO(FOURCC, YUV, SUB, FLAG) +#define DEF_RGB(FOURCC, SUB, FLAG) DEF_FOUCC_INFO(FOURCC, RGB, SUB, FLAG) +#define DEF_INDEX(FOURCC, SUB, FLAG) DEF_FOUCC_INFO(FOURCC, INDEX, SUB, FLAG) + +static const i965_fourcc_info i965_fourcc_infos[] = { + DEF_YUV(NV12, YUV420, I_SI), + DEF_YUV(I420, YUV420, I_SI), + DEF_YUV(IYUV, YUV420, I_S), + DEF_YUV(IMC3, YUV420, I_S), + DEF_YUV(YV12, YUV420, I_SI), + DEF_YUV(IMC1, YUV420, I_S), + + DEF_YUV(422H, YUV422H, I_SI), + DEF_YUV(422V, YUV422V, I_S), + DEF_YUV(YV16, YUV422H, I_S), + DEF_YUV(YUY2, YUV422H, I_SI), + DEF_YUV(UYVY, YUV422H, I_SI), + + DEF_YUV(444P, YUV444, I_S), + + DEF_YUV(411P, YUV411, I_S), + + DEF_YUV(Y800, YUV400, I_S), + + DEF_RGB(RGBA, RGBX, I_SI), + DEF_RGB(RGBX, RGBX, I_SI), + DEF_RGB(BGRA, RGBX, I_SI), + DEF_RGB(BGRX, RGBX, I_SI), + + DEF_RGB(ARGB, RGBX, I_I), + DEF_RGB(ABGR, RGBX, I_I), + + DEF_INDEX(IA88, RGBX, I_I), + DEF_INDEX(AI88, RGBX, I_I), + + DEF_INDEX(IA44, RGBX, I_I), + DEF_INDEX(AI44, RGBX, I_I) +}; + +const i965_fourcc_info * +get_fourcc_info(unsigned int fourcc) +{ + unsigned int i; + + for (i = 0; ARRAY_ELEMS(i965_fourcc_infos); i++) { + const i965_fourcc_info * const info = &i965_fourcc_infos[i]; + + if (info->fourcc == fourcc) + return info; + } + + return NULL; +} + enum { I965_SURFACETYPE_RGBA = 1, I965_SURFACETYPE_YUV, @@ -97,6 +226,30 @@ enum { /* List of supported display attributes */ static const VADisplayAttribute i965_display_attributes[] = { { + VADisplayAttribBrightness, + -100, 100, DEFAULT_BRIGHTNESS, + VA_DISPLAY_ATTRIB_GETTABLE | VA_DISPLAY_ATTRIB_SETTABLE + }, + + { + VADisplayAttribContrast, + 0, 100, DEFAULT_CONTRAST, + VA_DISPLAY_ATTRIB_GETTABLE | VA_DISPLAY_ATTRIB_SETTABLE + }, + + { + VADisplayAttribHue, + -180, 180, DEFAULT_HUE, + VA_DISPLAY_ATTRIB_GETTABLE | VA_DISPLAY_ATTRIB_SETTABLE + }, + + { + VADisplayAttribSaturation, + 0, 100, DEFAULT_SATURATION, + VA_DISPLAY_ATTRIB_GETTABLE | VA_DISPLAY_ATTRIB_SETTABLE + }, + + { VADisplayAttribRotation, 0, 3, VA_ROTATION_NONE, VA_DISPLAY_ATTRIB_GETTABLE|VA_DISPLAY_ATTRIB_SETTABLE @@ -112,19 +265,21 @@ typedef struct { static const i965_image_format_map_t i965_image_formats_map[I965_MAX_IMAGE_FORMATS + 1] = { { I965_SURFACETYPE_YUV, - { VA_FOURCC('Y','V','1','2'), VA_LSB_FIRST, 12, } }, + { VA_FOURCC_YV12, VA_LSB_FIRST, 12, } }, { I965_SURFACETYPE_YUV, - { VA_FOURCC('I','4','2','0'), VA_LSB_FIRST, 12, } }, + { VA_FOURCC_I420, VA_LSB_FIRST, 12, } }, { I965_SURFACETYPE_YUV, - { VA_FOURCC('N','V','1','2'), VA_LSB_FIRST, 12, } }, + { VA_FOURCC_NV12, VA_LSB_FIRST, 12, } }, { I965_SURFACETYPE_YUV, - { VA_FOURCC('Y','U','Y','2'), VA_LSB_FIRST, 16, } }, + { VA_FOURCC_YUY2, VA_LSB_FIRST, 16, } }, { I965_SURFACETYPE_YUV, - { VA_FOURCC('U','Y','V','Y'), VA_LSB_FIRST, 16, } }, + { VA_FOURCC_UYVY, VA_LSB_FIRST, 16, } }, + { I965_SURFACETYPE_YUV, + { VA_FOURCC_422H, VA_LSB_FIRST, 16, } }, { I965_SURFACETYPE_RGBA, - { VA_FOURCC('R','G','B','X'), VA_LSB_FIRST, 32, 24, 0x000000ff, 0x0000ff00, 0x00ff0000 } }, + { VA_FOURCC_RGBX, VA_LSB_FIRST, 32, 24, 0x000000ff, 0x0000ff00, 0x00ff0000 } }, { I965_SURFACETYPE_RGBA, - { VA_FOURCC('B','G','R','X'), VA_LSB_FIRST, 32, 24, 0x00ff0000, 0x0000ff00, 0x000000ff } }, + { VA_FOURCC_BGRX, VA_LSB_FIRST, 32, 24, 0x00ff0000, 0x0000ff00, 0x000000ff } }, }; /* List of supported subpicture formats */ @@ -142,23 +297,23 @@ typedef struct { static const i965_subpic_format_map_t i965_subpic_formats_map[I965_MAX_SUBPIC_FORMATS + 1] = { { I965_SURFACETYPE_INDEXED, I965_SURFACEFORMAT_P4A4_UNORM, - { VA_FOURCC('I','A','4','4'), VA_MSB_FIRST, 8, }, + { VA_FOURCC_IA44, VA_MSB_FIRST, 8, }, COMMON_SUBPICTURE_FLAGS }, { I965_SURFACETYPE_INDEXED, I965_SURFACEFORMAT_A4P4_UNORM, - { VA_FOURCC('A','I','4','4'), VA_MSB_FIRST, 8, }, + { VA_FOURCC_AI44, VA_MSB_FIRST, 8, }, COMMON_SUBPICTURE_FLAGS }, { I965_SURFACETYPE_INDEXED, I965_SURFACEFORMAT_P8A8_UNORM, - { VA_FOURCC('I','A','8','8'), VA_MSB_FIRST, 16, }, + { VA_FOURCC_IA88, VA_MSB_FIRST, 16, }, COMMON_SUBPICTURE_FLAGS }, { I965_SURFACETYPE_INDEXED, I965_SURFACEFORMAT_A8P8_UNORM, - { VA_FOURCC('A','I','8','8'), VA_MSB_FIRST, 16, }, + { VA_FOURCC_AI88, VA_MSB_FIRST, 16, }, COMMON_SUBPICTURE_FLAGS }, { I965_SURFACETYPE_RGBA, I965_SURFACEFORMAT_B8G8R8A8_UNORM, - { VA_FOURCC('B','G','R','A'), VA_LSB_FIRST, 32, + { VA_FOURCC_BGRA, VA_LSB_FIRST, 32, 32, 0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000 }, COMMON_SUBPICTURE_FLAGS }, { I965_SURFACETYPE_RGBA, I965_SURFACEFORMAT_R8G8B8A8_UNORM, - { VA_FOURCC('R','G','B','A'), VA_LSB_FIRST, 32, + { VA_FOURCC_RGBA, VA_LSB_FIRST, 32, 32, 0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000 }, COMMON_SUBPICTURE_FLAGS }, }; @@ -181,117 +336,36 @@ get_subpic_format(const VAImageFormat *va_format) return NULL; } -extern struct hw_context *i965_proc_context_init(VADriverContextP, struct object_config *); -extern struct hw_context *g4x_dec_hw_context_init(VADriverContextP, struct object_config *); -static struct hw_codec_info g4x_hw_codec_info = { - .dec_hw_context_init = g4x_dec_hw_context_init, - .enc_hw_context_init = NULL, - .proc_hw_context_init = NULL, - .max_width = 2048, - .max_height = 2048, - - .has_mpeg2_decoding = 1, - .has_h264_decoding = 1, - - .num_filters = 0, -}; - -extern struct hw_context *ironlake_dec_hw_context_init(VADriverContextP, struct object_config *); -static struct hw_codec_info ironlake_hw_codec_info = { - .dec_hw_context_init = ironlake_dec_hw_context_init, - .enc_hw_context_init = NULL, - .proc_hw_context_init = i965_proc_context_init, - .max_width = 2048, - .max_height = 2048, +/* Checks whether the surface is in busy state */ +static bool +is_surface_busy(struct i965_driver_data *i965, + struct object_surface *obj_surface) +{ + assert(obj_surface != NULL); - .has_mpeg2_decoding = 1, - .has_h264_decoding = 1, - .has_vpp = 1, - .has_accelerated_putimage = 1, + if (obj_surface->locked_image_id != VA_INVALID_ID) + return true; + if (obj_surface->derived_image_id != VA_INVALID_ID) + return true; + return false; +} - .num_filters = 0, -}; +/* Checks whether the image is in busy state */ +static bool +is_image_busy(struct i965_driver_data *i965, struct object_image *obj_image) +{ + struct object_buffer *obj_buffer; -extern struct hw_context *gen6_dec_hw_context_init(VADriverContextP, struct object_config *); -extern struct hw_context *gen6_enc_hw_context_init(VADriverContextP, struct object_config *); -static struct hw_codec_info gen6_hw_codec_info = { - .dec_hw_context_init = gen6_dec_hw_context_init, - .enc_hw_context_init = gen6_enc_hw_context_init, - .proc_hw_context_init = i965_proc_context_init, - .max_width = 2048, - .max_height = 2048, - - .has_mpeg2_decoding = 1, - .has_mpeg2_encoding = 1, - .has_h264_decoding = 1, - .has_h264_encoding = 1, - .has_vc1_decoding = 1, - .has_vpp = 1, - .has_accelerated_getimage = 1, - .has_accelerated_putimage = 1, - .has_tiled_surface = 1, - - .num_filters = 2, - .filters = { - VAProcFilterNoiseReduction, - VAProcFilterDeinterlacing, - }, -}; + assert(obj_image != NULL); -extern struct hw_context *gen7_dec_hw_context_init(VADriverContextP, struct object_config *); -extern struct hw_context *gen7_enc_hw_context_init(VADriverContextP, struct object_config *); -static struct hw_codec_info gen7_hw_codec_info = { - .dec_hw_context_init = gen7_dec_hw_context_init, - .enc_hw_context_init = gen7_enc_hw_context_init, - .proc_hw_context_init = i965_proc_context_init, - .max_width = 4096, - .max_height = 4096, - - .has_mpeg2_decoding = 1, - .has_mpeg2_encoding = 1, - .has_h264_decoding = 1, - .has_h264_encoding = 1, - .has_vc1_decoding = 1, - .has_jpeg_decoding = 1, - .has_vpp = 1, - .has_accelerated_getimage = 1, - .has_accelerated_putimage = 1, - .has_tiled_surface = 1, - - .num_filters = 2, - .filters = { - VAProcFilterNoiseReduction, - VAProcFilterDeinterlacing, - }, -}; + if (obj_image->derived_surface != VA_INVALID_ID) + return true; -extern struct hw_context *gen75_proc_context_init(VADriverContextP, struct object_config *); -static struct hw_codec_info gen75_hw_codec_info = { - .dec_hw_context_init = gen75_dec_hw_context_init, - .enc_hw_context_init = gen75_enc_hw_context_init, - .proc_hw_context_init = gen75_proc_context_init, - .max_width = 4096, - .max_height = 4096, - - .has_mpeg2_decoding = 1, - .has_mpeg2_encoding = 1, - .has_h264_decoding = 1, - .has_h264_encoding = 1, - .has_vc1_decoding = 1, - .has_jpeg_decoding = 1, - .has_vpp = 1, - .has_accelerated_getimage = 1, - .has_accelerated_putimage = 1, - .has_tiled_surface = 1, - .has_di_motion_adptive = 1, - .num_filters = 4, - .filters = { - VAProcFilterNoiseReduction, - VAProcFilterDeinterlacing, - VAProcFilterSharpening, - VAProcFilterColorBalance, - }, -}; + obj_buffer = BUFFER(obj_image->image.buf); + if (obj_buffer && obj_buffer->export_refcount > 0) + return true; + return false; +} #define I965_PACKED_HEADER_BASE 0 #define I965_PACKED_MISC_HEADER_BASE 3 @@ -304,7 +378,7 @@ va_enc_packed_type_to_idx(int packed_type) if (packed_type & VAEncPackedHeaderMiscMask) { idx = I965_PACKED_MISC_HEADER_BASE; packed_type = (~VAEncPackedHeaderMiscMask & packed_type); - assert(packed_type > 0); + ASSERT_RET(packed_type > 0, 0); idx += (packed_type - 1); } else { idx = I965_PACKED_HEADER_BASE; @@ -324,16 +398,15 @@ va_enc_packed_type_to_idx(int packed_type) default: /* Should not get here */ - assert(0); + ASSERT_RET(0, 0); break; } } - assert(idx < 4); + ASSERT_RET(idx < 4, 0); return idx; } - VAStatus i965_QueryConfigProfiles(VADriverContextP ctx, VAProfile *profile_list, /* out */ @@ -350,10 +423,14 @@ i965_QueryConfigProfiles(VADriverContextP ctx, if (HAS_H264_DECODING(i965) || HAS_H264_ENCODING(i965)) { - profile_list[i++] = VAProfileH264Baseline; + profile_list[i++] = VAProfileH264ConstrainedBaseline; profile_list[i++] = VAProfileH264Main; profile_list[i++] = VAProfileH264High; } + if (HAS_H264_MVC_DECODING_PROFILE(i965, VAProfileH264MultiviewHigh)) + profile_list[i++] = VAProfileH264MultiviewHigh; + if (HAS_H264_MVC_DECODING_PROFILE(i965, VAProfileH264StereoHigh)) + profile_list[i++] = VAProfileH264StereoHigh; if (HAS_VC1_DECODING(i965)) { profile_list[i++] = VAProfileVC1Simple; @@ -369,8 +446,18 @@ i965_QueryConfigProfiles(VADriverContextP ctx, profile_list[i++] = VAProfileJPEGBaseline; } + if (HAS_VP8_DECODING(i965) || + HAS_VP8_ENCODING(i965)) { + profile_list[i++] = VAProfileVP8Version0_3; + } + + if (HAS_H264_MVC_ENCODING(i965)) { + profile_list[i++] = VAProfileH264MultiviewHigh; + profile_list[i++] = VAProfileH264StereoHigh; + } + /* If the assert fails then I965_MAX_PROFILES needs to be bigger */ - assert(i <= I965_MAX_PROFILES); + ASSERT_RET(i <= I965_MAX_PROFILES, VA_STATUS_ERROR_OPERATION_FAILED); *num_profiles = i; return VA_STATUS_SUCCESS; @@ -396,16 +483,24 @@ i965_QueryConfigEntrypoints(VADriverContextP ctx, break; - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: if (HAS_H264_DECODING(i965)) entrypoint_list[n++] = VAEntrypointVLD; - + if (HAS_H264_ENCODING(i965)) entrypoint_list[n++] = VAEntrypointEncSlice; break; + case VAProfileH264MultiviewHigh: + case VAProfileH264StereoHigh: + if (HAS_H264_MVC_DECODING_PROFILE(i965, profile)) + entrypoint_list[n++] = VAEntrypointVLD; + + if (HAS_H264_MVC_ENCODING(i965)) + entrypoint_list[n++] = VAEntrypointEncSlice; + break; case VAProfileVC1Simple: case VAProfileVC1Main: @@ -424,16 +519,139 @@ i965_QueryConfigEntrypoints(VADriverContextP ctx, entrypoint_list[n++] = VAEntrypointVLD; break; + case VAProfileVP8Version0_3: + if (HAS_VP8_DECODING(i965)) + entrypoint_list[n++] = VAEntrypointVLD; + + if (HAS_VP8_ENCODING(i965)) + entrypoint_list[n++] = VAEntrypointEncSlice; + default: break; } /* If the assert fails then I965_MAX_ENTRYPOINTS needs to be bigger */ - assert(n <= I965_MAX_ENTRYPOINTS); + ASSERT_RET(n <= I965_MAX_ENTRYPOINTS, VA_STATUS_ERROR_OPERATION_FAILED); *num_entrypoints = n; return n > 0 ? VA_STATUS_SUCCESS : VA_STATUS_ERROR_UNSUPPORTED_PROFILE; } +static VAStatus +i965_validate_config(VADriverContextP ctx, VAProfile profile, + VAEntrypoint entrypoint) +{ + struct i965_driver_data * const i965 = i965_driver_data(ctx); + VAStatus va_status; + + /* Validate profile & entrypoint */ + switch (profile) { + case VAProfileMPEG2Simple: + case VAProfileMPEG2Main: + if ((HAS_MPEG2_DECODING(i965) && entrypoint == VAEntrypointVLD) || + (HAS_MPEG2_ENCODING(i965) && entrypoint == VAEntrypointEncSlice)) { + va_status = VA_STATUS_SUCCESS; + } else { + va_status = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; + } + break; + + case VAProfileH264ConstrainedBaseline: + case VAProfileH264Main: + case VAProfileH264High: + if ((HAS_H264_DECODING(i965) && entrypoint == VAEntrypointVLD) || + (HAS_H264_ENCODING(i965) && entrypoint == VAEntrypointEncSlice)) { + va_status = VA_STATUS_SUCCESS; + } else { + va_status = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; + } + break; + + case VAProfileVC1Simple: + case VAProfileVC1Main: + case VAProfileVC1Advanced: + if (HAS_VC1_DECODING(i965) && entrypoint == VAEntrypointVLD) { + va_status = VA_STATUS_SUCCESS; + } else { + va_status = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; + } + break; + + case VAProfileNone: + if (HAS_VPP(i965) && VAEntrypointVideoProc == entrypoint) { + va_status = VA_STATUS_SUCCESS; + } else { + va_status = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; + } + break; + + case VAProfileJPEGBaseline: + if (HAS_JPEG_DECODING(i965) && entrypoint == VAEntrypointVLD) { + va_status = VA_STATUS_SUCCESS; + } else { + va_status = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; + } + break; + + case VAProfileVP8Version0_3: + if ((HAS_VP8_DECODING(i965) && entrypoint == VAEntrypointVLD) || + (HAS_VP8_ENCODING(i965) && entrypoint == VAEntrypointEncSlice)) { + va_status = VA_STATUS_SUCCESS; + } else { + va_status = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; + } + break; + + case VAProfileH264MultiviewHigh: + case VAProfileH264StereoHigh: + if ((HAS_H264_MVC_DECODING_PROFILE(i965, profile) && + entrypoint == VAEntrypointVLD) || + (HAS_H264_MVC_ENCODING(i965) && entrypoint == VAEntrypointEncSlice)) { + va_status = VA_STATUS_SUCCESS; + } else { + va_status = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; + } + + break; + + default: + va_status = VA_STATUS_ERROR_UNSUPPORTED_PROFILE; + break; + } + return va_status; +} + +static uint32_t +i965_get_default_chroma_formats(VADriverContextP ctx, VAProfile profile, + VAEntrypoint entrypoint) +{ + struct i965_driver_data * const i965 = i965_driver_data(ctx); + uint32_t chroma_formats = VA_RT_FORMAT_YUV420; + + switch (profile) { + case VAProfileH264ConstrainedBaseline: + case VAProfileH264Main: + case VAProfileH264High: + if (HAS_H264_DECODING(i965) && entrypoint == VAEntrypointVLD) + chroma_formats |= i965->codec_info->h264_dec_chroma_formats; + break; + + case VAProfileH264MultiviewHigh: + case VAProfileH264StereoHigh: + if (HAS_H264_MVC_DECODING(i965) && entrypoint == VAEntrypointVLD) + chroma_formats |= i965->codec_info->h264_dec_chroma_formats; + break; + + case VAProfileJPEGBaseline: + if (HAS_JPEG_DECODING(i965) && entrypoint == VAEntrypointVLD) + chroma_formats |= i965->codec_info->jpeg_dec_chroma_formats; + break; + + default: + break; + } + return chroma_formats; +} + VAStatus i965_GetConfigAttributes(VADriverContextP ctx, VAProfile profile, @@ -441,25 +659,44 @@ i965_GetConfigAttributes(VADriverContextP ctx, VAConfigAttrib *attrib_list, /* in/out */ int num_attribs) { + VAStatus va_status; + struct i965_driver_data *i965 = i965_driver_data(ctx); int i; + va_status = i965_validate_config(ctx, profile, entrypoint); + if (va_status != VA_STATUS_SUCCESS) + return va_status; + /* Other attributes don't seem to be defined */ /* What to do if we don't know the attribute? */ for (i = 0; i < num_attribs; i++) { switch (attrib_list[i].type) { case VAConfigAttribRTFormat: - attrib_list[i].value = VA_RT_FORMAT_YUV420; + attrib_list[i].value = i965_get_default_chroma_formats(ctx, + profile, entrypoint); break; case VAConfigAttribRateControl: if (entrypoint == VAEntrypointEncSlice) { - attrib_list[i].value = VA_RC_CBR | VA_RC_CQP; + attrib_list[i].value = VA_RC_CQP; + + if (profile != VAProfileMPEG2Main && + profile != VAProfileMPEG2Simple) + attrib_list[i].value |= VA_RC_CBR; break; } case VAConfigAttribEncPackedHeaders: if (entrypoint == VAEntrypointEncSlice) { attrib_list[i].value = VA_ENC_PACKED_HEADER_SEQUENCE | VA_ENC_PACKED_HEADER_PICTURE | VA_ENC_PACKED_HEADER_MISC; + if (profile == VAProfileH264ConstrainedBaseline || + profile == VAProfileH264Main || + profile == VAProfileH264High || + profile == VAProfileH264StereoHigh || + profile == VAProfileH264MultiviewHigh) { + attrib_list[i].value |= (VA_ENC_PACKED_HEADER_RAW_DATA | + VA_ENC_PACKED_HEADER_SLICE); + } break; } @@ -469,6 +706,16 @@ i965_GetConfigAttributes(VADriverContextP ctx, break; } + case VAConfigAttribEncQualityRange: + if (entrypoint == VAEntrypointEncSlice) { + attrib_list[i].value = 1; + if (profile == VAProfileH264ConstrainedBaseline || + profile == VAProfileH264Main || + profile == VAProfileH264High ) + attrib_list[i].value = ENCODER_QUALITY_RANGE; + break; + } + default: /* Do nothing */ attrib_list[i].value = VA_ATTRIB_NOT_SUPPORTED; @@ -485,29 +732,49 @@ i965_destroy_config(struct object_heap *heap, struct object_base *obj) object_heap_free(heap, obj); } -static VAStatus -i965_update_attribute(struct object_config *obj_config, VAConfigAttrib *attrib) +static VAConfigAttrib * +i965_lookup_config_attribute(struct object_config *obj_config, + VAConfigAttribType type) { int i; - /* Check existing attrbiutes */ for (i = 0; i < obj_config->num_attribs; i++) { - if (obj_config->attrib_list[i].type == attrib->type) { - /* Update existing attribute */ - obj_config->attrib_list[i].value = attrib->value; - return VA_STATUS_SUCCESS; - } + VAConfigAttrib * const attrib = &obj_config->attrib_list[i]; + if (attrib->type == type) + return attrib; } + return NULL; +} + +static VAStatus +i965_append_config_attribute(struct object_config *obj_config, + const VAConfigAttrib *new_attrib) +{ + VAConfigAttrib *attrib; + + if (obj_config->num_attribs >= I965_MAX_CONFIG_ATTRIBUTES) + return VA_STATUS_ERROR_MAX_NUM_EXCEEDED; + + attrib = &obj_config->attrib_list[obj_config->num_attribs++]; + attrib->type = new_attrib->type; + attrib->value = new_attrib->value; + return VA_STATUS_SUCCESS; +} + +static VAStatus +i965_ensure_config_attribute(struct object_config *obj_config, + const VAConfigAttrib *new_attrib) +{ + VAConfigAttrib *attrib; - if (obj_config->num_attribs < I965_MAX_CONFIG_ATTRIBUTES) { - i = obj_config->num_attribs; - obj_config->attrib_list[i].type = attrib->type; - obj_config->attrib_list[i].value = attrib->value; - obj_config->num_attribs++; + /* Check for existing attributes */ + attrib = i965_lookup_config_attribute(obj_config, new_attrib->type); + if (attrib) { + /* Update existing attribute */ + attrib->value = new_attrib->value; return VA_STATUS_SUCCESS; } - - return VA_STATUS_ERROR_MAX_NUM_EXCEEDED; + return i965_append_config_attribute(obj_config, new_attrib); } VAStatus @@ -524,63 +791,7 @@ i965_CreateConfig(VADriverContextP ctx, int i; VAStatus vaStatus; - /* Validate profile & entrypoint */ - switch (profile) { - case VAProfileMPEG2Simple: - case VAProfileMPEG2Main: - if ((HAS_MPEG2_DECODING(i965) && VAEntrypointVLD == entrypoint) || - (HAS_MPEG2_ENCODING(i965) && VAEntrypointEncSlice == entrypoint)) { - vaStatus = VA_STATUS_SUCCESS; - } else { - vaStatus = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; - } - break; - - case VAProfileH264Baseline: - case VAProfileH264Main: - case VAProfileH264High: - if ((HAS_H264_DECODING(i965) && VAEntrypointVLD == entrypoint) || - (HAS_H264_ENCODING(i965) && VAEntrypointEncSlice == entrypoint)) { - vaStatus = VA_STATUS_SUCCESS; - } else { - vaStatus = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; - } - - break; - - case VAProfileVC1Simple: - case VAProfileVC1Main: - case VAProfileVC1Advanced: - if (HAS_VC1_DECODING(i965) && VAEntrypointVLD == entrypoint) { - vaStatus = VA_STATUS_SUCCESS; - } else { - vaStatus = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; - } - - break; - - case VAProfileNone: - if (HAS_VPP(i965) && VAEntrypointVideoProc == entrypoint) { - vaStatus = VA_STATUS_SUCCESS; - } else { - vaStatus = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; - } - - break; - - case VAProfileJPEGBaseline: - if (HAS_JPEG_DECODING(i965) && VAEntrypointVLD == entrypoint) { - vaStatus = VA_STATUS_SUCCESS; - } else { - vaStatus = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; - } - - break; - - default: - vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE; - break; - } + vaStatus = i965_validate_config(ctx, profile, entrypoint); if (VA_STATUS_SUCCESS != vaStatus) { return vaStatus; @@ -596,16 +807,23 @@ i965_CreateConfig(VADriverContextP ctx, obj_config->profile = profile; obj_config->entrypoint = entrypoint; - obj_config->attrib_list[0].type = VAConfigAttribRTFormat; - obj_config->attrib_list[0].value = VA_RT_FORMAT_YUV420; - obj_config->num_attribs = 1; - - for(i = 0; i < num_attribs; i++) { - vaStatus = i965_update_attribute(obj_config, &(attrib_list[i])); + obj_config->num_attribs = 0; - if (VA_STATUS_SUCCESS != vaStatus) { + for (i = 0; i < num_attribs; i++) { + vaStatus = i965_ensure_config_attribute(obj_config, &attrib_list[i]); + if (vaStatus != VA_STATUS_SUCCESS) break; - } + } + + if (vaStatus == VA_STATUS_SUCCESS) { + VAConfigAttrib attrib, *attrib_found; + attrib.type = VAConfigAttribRTFormat; + attrib.value = i965_get_default_chroma_formats(ctx, profile, entrypoint); + attrib_found = i965_lookup_config_attribute(obj_config, attrib.type); + if (!attrib_found || !attrib_found->value) + vaStatus = i965_append_config_attribute(obj_config, &attrib); + else if (!(attrib_found->value & attrib.value)) + vaStatus = VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT; } /* Error recovery */ @@ -646,7 +864,7 @@ VAStatus i965_QueryConfigAttributes(VADriverContextP ctx, VAStatus vaStatus = VA_STATUS_SUCCESS; int i; - assert(obj_config); + ASSERT_RET(obj_config, VA_STATUS_ERROR_INVALID_CONFIG); *profile = obj_config->profile; *entrypoint = obj_config->entrypoint; *num_attribs = obj_config->num_attribs; @@ -658,10 +876,11 @@ VAStatus i965_QueryConfigAttributes(VADriverContextP ctx, return vaStatus; } -static void -i965_destroy_surface(struct object_heap *heap, struct object_base *obj) +void +i965_destroy_surface_storage(struct object_surface *obj_surface) { - struct object_surface *obj_surface = (struct object_surface *)obj; + if (!obj_surface) + return; dri_bo_unreference(obj_surface->bo); obj_surface->bo = NULL; @@ -670,7 +889,14 @@ i965_destroy_surface(struct object_heap *heap, struct object_base *obj) obj_surface->free_private_data(&obj_surface->private_data); obj_surface->private_data = NULL; } +} + +static void +i965_destroy_surface(struct object_heap *heap, struct object_base *obj) +{ + struct object_surface *obj_surface = (struct object_surface *)obj; + i965_destroy_surface_storage(obj_surface); object_heap_free(heap, obj); } @@ -687,9 +913,10 @@ i965_surface_native_memory(VADriverContextP ctx, return VA_STATUS_SUCCESS; // todo, should we disable tiling for 422 format? - if (expected_fourcc == VA_FOURCC('I', '4', '2', '0') || - expected_fourcc == VA_FOURCC('I', 'Y', 'U', 'V') || - expected_fourcc == VA_FOURCC('Y', 'V', '1', '2')) + if (expected_fourcc == VA_FOURCC_I420 || + expected_fourcc == VA_FOURCC_IYUV || + expected_fourcc == VA_FOURCC_YV12 || + expected_fourcc == VA_FOURCC_YV16) tiling = 0; i965_check_alloc_surface_bo(ctx, obj_surface, tiling, expected_fourcc, get_sampling_from_fourcc(expected_fourcc)); @@ -711,9 +938,9 @@ i965_suface_external_memory(VADriverContextP ctx, index > memory_attibute->num_buffers) return VA_STATUS_ERROR_INVALID_PARAMETER; - assert(obj_surface->orig_width == memory_attibute->width); - assert(obj_surface->orig_height == memory_attibute->height); - assert(memory_attibute->num_planes >= 1); + ASSERT_RET(obj_surface->orig_width == memory_attibute->width, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(obj_surface->orig_height == memory_attibute->height, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(memory_attibute->num_planes >= 1, VA_STATUS_ERROR_INVALID_PARAMETER); obj_surface->fourcc = memory_attibute->pixel_format; obj_surface->width = memory_attibute->pitches[0]; @@ -728,9 +955,9 @@ i965_suface_external_memory(VADriverContextP ctx, obj_surface->x_cr_offset = 0; switch (obj_surface->fourcc) { - case VA_FOURCC('N', 'V', '1', '2'): - assert(memory_attibute->num_planes == 2); - assert(memory_attibute->pitches[0] == memory_attibute->pitches[1]); + case VA_FOURCC_NV12: + ASSERT_RET(memory_attibute->num_planes == 2, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(memory_attibute->pitches[0] == memory_attibute->pitches[1], VA_STATUS_ERROR_INVALID_PARAMETER); obj_surface->subsampling = SUBSAMPLE_YUV420; obj_surface->y_cb_offset = obj_surface->height; @@ -741,10 +968,10 @@ i965_suface_external_memory(VADriverContextP ctx, break; - case VA_FOURCC('Y', 'V', '1', '2'): - case VA_FOURCC('I', 'M', 'C', '1'): - assert(memory_attibute->num_planes == 3); - assert(memory_attibute->pitches[1] == memory_attibute->pitches[2]); + case VA_FOURCC_YV12: + case VA_FOURCC_IMC1: + ASSERT_RET(memory_attibute->num_planes == 3, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(memory_attibute->pitches[1] == memory_attibute->pitches[2], VA_STATUS_ERROR_INVALID_PARAMETER); obj_surface->subsampling = SUBSAMPLE_YUV420; obj_surface->y_cr_offset = obj_surface->height; @@ -755,11 +982,11 @@ i965_suface_external_memory(VADriverContextP ctx, break; - case VA_FOURCC('I', '4', '2', '0'): - case VA_FOURCC('I', 'Y', 'U', 'V'): - case VA_FOURCC('I', 'M', 'C', '3'): - assert(memory_attibute->num_planes == 3); - assert(memory_attibute->pitches[1] == memory_attibute->pitches[2]); + case VA_FOURCC_I420: + case VA_FOURCC_IYUV: + case VA_FOURCC_IMC3: + ASSERT_RET(memory_attibute->num_planes == 3, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(memory_attibute->pitches[1] == memory_attibute->pitches[2], VA_STATUS_ERROR_INVALID_PARAMETER); obj_surface->subsampling = SUBSAMPLE_YUV420; obj_surface->y_cb_offset = obj_surface->height; @@ -770,9 +997,9 @@ i965_suface_external_memory(VADriverContextP ctx, break; - case VA_FOURCC('Y', 'U', 'Y', '2'): - case VA_FOURCC('U', 'Y', 'V', 'Y'): - assert(memory_attibute->num_planes == 1); + case VA_FOURCC_YUY2: + case VA_FOURCC_UYVY: + ASSERT_RET(memory_attibute->num_planes == 1, VA_STATUS_ERROR_INVALID_PARAMETER); obj_surface->subsampling = SUBSAMPLE_YUV422H; obj_surface->y_cb_offset = 0; @@ -783,11 +1010,11 @@ i965_suface_external_memory(VADriverContextP ctx, break; - case VA_FOURCC('R', 'G', 'B', 'A'): - case VA_FOURCC('R', 'G', 'B', 'X'): - case VA_FOURCC('B', 'G', 'R', 'A'): - case VA_FOURCC('B', 'G', 'R', 'X'): - assert(memory_attibute->num_planes == 1); + case VA_FOURCC_RGBA: + case VA_FOURCC_RGBX: + case VA_FOURCC_BGRA: + case VA_FOURCC_BGRX: + ASSERT_RET(memory_attibute->num_planes == 1, VA_STATUS_ERROR_INVALID_PARAMETER); obj_surface->subsampling = SUBSAMPLE_RGBX; obj_surface->y_cb_offset = 0; @@ -798,8 +1025,8 @@ i965_suface_external_memory(VADriverContextP ctx, break; - case VA_FOURCC('Y', '8', '0', '0'): /* monochrome surface */ - assert(memory_attibute->num_planes == 1); + case VA_FOURCC_Y800: /* monochrome surface */ + ASSERT_RET(memory_attibute->num_planes == 1, VA_STATUS_ERROR_INVALID_PARAMETER); obj_surface->subsampling = SUBSAMPLE_YUV400; obj_surface->y_cb_offset = 0; @@ -810,9 +1037,9 @@ i965_suface_external_memory(VADriverContextP ctx, break; - case VA_FOURCC('4', '1', '1', 'P'): - assert(memory_attibute->num_planes == 3); - assert(memory_attibute->pitches[1] == memory_attibute->pitches[2]); + case VA_FOURCC_411P: + ASSERT_RET(memory_attibute->num_planes == 3, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(memory_attibute->pitches[1] == memory_attibute->pitches[2], VA_STATUS_ERROR_INVALID_PARAMETER); obj_surface->subsampling = SUBSAMPLE_YUV411; obj_surface->y_cb_offset = 0; @@ -823,9 +1050,9 @@ i965_suface_external_memory(VADriverContextP ctx, break; - case VA_FOURCC('4', '2', '2', 'H'): - assert(memory_attibute->num_planes == 3); - assert(memory_attibute->pitches[1] == memory_attibute->pitches[2]); + case VA_FOURCC_422H: + ASSERT_RET(memory_attibute->num_planes == 3, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(memory_attibute->pitches[1] == memory_attibute->pitches[2], VA_STATUS_ERROR_INVALID_PARAMETER); obj_surface->subsampling = SUBSAMPLE_YUV422H; obj_surface->y_cb_offset = obj_surface->height; @@ -836,11 +1063,24 @@ i965_suface_external_memory(VADriverContextP ctx, break; - case VA_FOURCC('4', '2', '2', 'V'): + case VA_FOURCC_YV16: assert(memory_attibute->num_planes == 3); assert(memory_attibute->pitches[1] == memory_attibute->pitches[2]); obj_surface->subsampling = SUBSAMPLE_YUV422H; + obj_surface->y_cr_offset = memory_attibute->offsets[1] / obj_surface->width; + obj_surface->y_cb_offset = memory_attibute->offsets[2] / obj_surface->width; + obj_surface->cb_cr_width = obj_surface->orig_width / 2; + obj_surface->cb_cr_height = obj_surface->orig_height; + obj_surface->cb_cr_pitch = memory_attibute->pitches[1]; + + break; + + case VA_FOURCC_422V: + ASSERT_RET(memory_attibute->num_planes == 3, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(memory_attibute->pitches[1] == memory_attibute->pitches[2], VA_STATUS_ERROR_INVALID_PARAMETER); + + obj_surface->subsampling = SUBSAMPLE_YUV422H; obj_surface->y_cb_offset = obj_surface->height; obj_surface->y_cr_offset = memory_attibute->offsets[2] / obj_surface->width; obj_surface->cb_cr_width = obj_surface->orig_width; @@ -849,9 +1089,9 @@ i965_suface_external_memory(VADriverContextP ctx, break; - case VA_FOURCC('4', '4', '4', 'P'): - assert(memory_attibute->num_planes == 3); - assert(memory_attibute->pitches[1] == memory_attibute->pitches[2]); + case VA_FOURCC_444P: + ASSERT_RET(memory_attibute->num_planes == 3, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(memory_attibute->pitches[1] == memory_attibute->pitches[2], VA_STATUS_ERROR_INVALID_PARAMETER); obj_surface->subsampling = SUBSAMPLE_YUV444; obj_surface->y_cb_offset = obj_surface->height; @@ -882,6 +1122,18 @@ i965_suface_external_memory(VADriverContextP ctx, return VA_STATUS_SUCCESS; } +/* byte-per-pixel of the first plane */ +static int +bpp_1stplane_by_fourcc(unsigned int fourcc) +{ + const i965_fourcc_info *info = get_fourcc_info(fourcc); + + if (info && (info->flag & I_S)) + return info->bpp[0] / 8; + else + return 0; +} + static VAStatus i965_CreateSurfaces2( VADriverContextP ctx, @@ -904,24 +1156,26 @@ i965_CreateSurfaces2( for (i = 0; i < num_attribs && attrib_list; i++) { if ((attrib_list[i].type == VASurfaceAttribPixelFormat) && (attrib_list[i].flags & VA_SURFACE_ATTRIB_SETTABLE)) { - assert(attrib_list[i].value.type == VAGenericValueTypeInteger); + ASSERT_RET(attrib_list[i].value.type == VAGenericValueTypeInteger, VA_STATUS_ERROR_INVALID_PARAMETER); expected_fourcc = attrib_list[i].value.value.i; } if ((attrib_list[i].type == VASurfaceAttribMemoryType) && (attrib_list[i].flags & VA_SURFACE_ATTRIB_SETTABLE)) { - assert(attrib_list[i].value.type == VAGenericValueTypeInteger); + ASSERT_RET(attrib_list[i].value.type == VAGenericValueTypeInteger, VA_STATUS_ERROR_INVALID_PARAMETER); if (attrib_list[i].value.value.i == VA_SURFACE_ATTRIB_MEM_TYPE_KERNEL_DRM) memory_type = I965_SURFACE_MEM_GEM_FLINK; /* flinked GEM handle */ else if (attrib_list[i].value.value.i == VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME) memory_type = I965_SURFACE_MEM_DRM_PRIME; /* drm prime fd */ + else if (attrib_list[i].value.value.i == VA_SURFACE_ATTRIB_MEM_TYPE_VA) + memory_type = I965_SURFACE_MEM_NATIVE; /* va native memory, to be allocated */ } if ((attrib_list[i].type == VASurfaceAttribExternalBufferDescriptor) && (attrib_list[i].flags == VA_SURFACE_ATTRIB_SETTABLE)) { - assert(attrib_list[i].value.type == VAGenericValueTypePointer); + ASSERT_RET(attrib_list[i].value.type == VAGenericValueTypePointer, VA_STATUS_ERROR_INVALID_PARAMETER); memory_attibute = (VASurfaceAttribExternalBuffers *)attrib_list[i].value.value.p; } } @@ -950,6 +1204,9 @@ i965_CreateSurfaces2( obj_surface->status = VASurfaceReady; obj_surface->orig_width = width; obj_surface->orig_height = height; + obj_surface->user_disable_tiling = false; + obj_surface->user_h_stride_set = false; + obj_surface->user_v_stride_set = false; obj_surface->subpic_render_idx = 0; for(j = 0; j < I965_MAX_SUBPIC_SUM; j++){ @@ -957,18 +1214,49 @@ i965_CreateSurfaces2( obj_surface->obj_subpic[j] = NULL; } - obj_surface->width = ALIGN(width, 16); - obj_surface->height = ALIGN(height, 16); + assert(i965->codec_info->min_linear_wpitch); + assert(i965->codec_info->min_linear_hpitch); + obj_surface->width = ALIGN(width, i965->codec_info->min_linear_wpitch); + obj_surface->height = ALIGN(height, i965->codec_info->min_linear_hpitch); obj_surface->flags = SURFACE_REFERENCED; obj_surface->fourcc = 0; obj_surface->bo = NULL; obj_surface->locked_image_id = VA_INVALID_ID; + obj_surface->derived_image_id = VA_INVALID_ID; obj_surface->private_data = NULL; obj_surface->free_private_data = NULL; obj_surface->subsampling = SUBSAMPLE_YUV420; switch (memory_type) { case I965_SURFACE_MEM_NATIVE: + if (memory_attibute) { + if (!(memory_attibute->flags & VA_SURFACE_EXTBUF_DESC_ENABLE_TILING)) + obj_surface->user_disable_tiling = true; + + if (memory_attibute->pixel_format) { + if (expected_fourcc) + ASSERT_RET(memory_attibute->pixel_format == expected_fourcc, VA_STATUS_ERROR_INVALID_PARAMETER); + else + expected_fourcc = memory_attibute->pixel_format; + } + ASSERT_RET(expected_fourcc, VA_STATUS_ERROR_INVALID_PARAMETER); + if (memory_attibute->pitches[0]) { + int bpp_1stplane = bpp_1stplane_by_fourcc(expected_fourcc); + ASSERT_RET(bpp_1stplane, VA_STATUS_ERROR_INVALID_PARAMETER); + obj_surface->width = memory_attibute->pitches[0]/bpp_1stplane; + obj_surface->user_h_stride_set = true; + ASSERT_RET(IS_ALIGNED(obj_surface->width, 16), VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(obj_surface->width >= width, VA_STATUS_ERROR_INVALID_PARAMETER); + + if (memory_attibute->offsets[1]) { + ASSERT_RET(!memory_attibute->offsets[0], VA_STATUS_ERROR_INVALID_PARAMETER); + obj_surface->height = memory_attibute->offsets[1]/memory_attibute->pitches[0]; + obj_surface->user_v_stride_set = true; + ASSERT_RET(IS_ALIGNED(obj_surface->height, 16), VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(obj_surface->height >= height, VA_STATUS_ERROR_INVALID_PARAMETER); + } + } + } i965_surface_native_memory(ctx, obj_surface, format, @@ -1030,7 +1318,7 @@ i965_DestroySurfaces(VADriverContextP ctx, for (i = num_surfaces; i--; ) { struct object_surface *obj_surface = SURFACE(surface_list[i]); - assert(obj_surface); + ASSERT_RET(obj_surface, VA_STATUS_ERROR_INVALID_SURFACE); i965_destroy_surface(&i965->surface_heap, (struct object_base *)obj_surface); } @@ -1073,7 +1361,7 @@ i965_guess_surface_format(VADriverContextP ctx, struct object_context *obj_context = NULL; struct object_config *obj_config = NULL; - *fourcc = VA_FOURCC('Y', 'V', '1', '2'); + *fourcc = VA_FOURCC_YV12; *is_tiled = 0; if (i965->current_context_id == VA_INVALID_ID) @@ -1090,8 +1378,10 @@ i965_guess_surface_format(VADriverContextP ctx, if (!obj_config) return; - if (IS_GEN6(i965->intel.device_id) || IS_GEN7(i965->intel.device_id)) { - *fourcc = VA_FOURCC('N', 'V', '1', '2'); + if (IS_GEN6(i965->intel.device_info) || + IS_GEN7(i965->intel.device_info) || + IS_GEN8(i965->intel.device_info)) { + *fourcc = VA_FOURCC_NV12; *is_tiled = 1; return; } @@ -1099,12 +1389,12 @@ i965_guess_surface_format(VADriverContextP ctx, switch (obj_config->profile) { case VAProfileMPEG2Simple: case VAProfileMPEG2Main: - *fourcc = VA_FOURCC('I', '4', '2', '0'); + *fourcc = VA_FOURCC_I420; *is_tiled = 0; break; default: - *fourcc = VA_FOURCC('N', 'V', '1', '2'); + *fourcc = VA_FOURCC_NV12; *is_tiled = 0; break; } @@ -1183,7 +1473,7 @@ i965_DestroySubpicture(VADriverContextP ctx, if (!obj_subpic) return VA_STATUS_ERROR_INVALID_SUBPICTURE; - assert(obj_subpic->obj_image); + ASSERT_RET(obj_subpic->obj_image, VA_STATUS_ERROR_INVALID_SUBPICTURE); i965_destroy_subpic(&i965->subpic_heap, (struct object_base *)obj_subpic); return VA_STATUS_SUCCESS; } @@ -1254,7 +1544,7 @@ i965_AssociateSubpicture(VADriverContextP ctx, if (!obj_subpic) return VA_STATUS_ERROR_INVALID_SUBPICTURE; - assert(obj_subpic->obj_image); + ASSERT_RET(obj_subpic->obj_image, VA_STATUS_ERROR_INVALID_SUBPICTURE); obj_subpic->src_rect.x = src_x; obj_subpic->src_rect.y = src_y; @@ -1399,6 +1689,28 @@ i965_destroy_context(struct object_heap *heap, struct object_base *obj) i965_release_buffer_store(&obj_context->codec_state.encode.slice_params_ext[i]); free(obj_context->codec_state.encode.slice_params_ext); + if (obj_context->codec_state.encode.slice_rawdata_index) { + free(obj_context->codec_state.encode.slice_rawdata_index); + obj_context->codec_state.encode.slice_rawdata_index = NULL; + } + if (obj_context->codec_state.encode.slice_rawdata_count) { + free(obj_context->codec_state.encode.slice_rawdata_count); + obj_context->codec_state.encode.slice_rawdata_count = NULL; + } + + if (obj_context->codec_state.encode.slice_header_index) { + free(obj_context->codec_state.encode.slice_header_index); + obj_context->codec_state.encode.slice_header_index = NULL; + } + + for (i = 0; i < obj_context->codec_state.encode.num_packed_header_params_ext; i++) + i965_release_buffer_store(&obj_context->codec_state.encode.packed_header_params_ext[i]); + free(obj_context->codec_state.encode.packed_header_params_ext); + + for (i = 0; i < obj_context->codec_state.encode.num_packed_header_data_ext; i++) + i965_release_buffer_store(&obj_context->codec_state.encode.packed_header_data_ext[i]); + free(obj_context->codec_state.encode.packed_header_data_ext); + } else { assert(obj_context->codec_state.decode.num_slice_params <= obj_context->codec_state.decode.max_slice_params); assert(obj_context->codec_state.decode.num_slice_datas <= obj_context->codec_state.decode.max_slice_datas); @@ -1435,6 +1747,7 @@ i965_CreateContext(VADriverContextP ctx, struct i965_render_state *render_state = &i965->render_state; struct object_config *obj_config = CONFIG(config_id); struct object_context *obj_context = NULL; + VAConfigAttrib *attrib; VAStatus vaStatus = VA_STATUS_SUCCESS; int contextID; int i; @@ -1463,7 +1776,7 @@ i965_CreateContext(VADriverContextP ctx, render_state->inited = 1; switch (obj_config->profile) { - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: if (!HAS_H264_DECODING(i965) && @@ -1471,8 +1784,14 @@ i965_CreateContext(VADriverContextP ctx, return VA_STATUS_ERROR_UNSUPPORTED_PROFILE; render_state->interleaved_uv = 1; break; + case VAProfileH264MultiviewHigh: + case VAProfileH264StereoHigh: + if (!HAS_H264_MVC_DECODING(i965)) + return VA_STATUS_ERROR_UNSUPPORTED_PROFILE; + render_state->interleaved_uv = 1; + break; default: - render_state->interleaved_uv = !!(IS_GEN6(i965->intel.device_id) || IS_GEN7(i965->intel.device_id)); + render_state->interleaved_uv = !!(IS_GEN6(i965->intel.device_info) || IS_GEN7(i965->intel.device_info) || IS_GEN8(i965->intel.device_info)); break; } @@ -1504,12 +1823,45 @@ i965_CreateContext(VADriverContextP ctx, assert(i965->codec_info->proc_hw_context_init); obj_context->hw_context = i965->codec_info->proc_hw_context_init(ctx, obj_config); } else if (VAEntrypointEncSlice == obj_config->entrypoint) { /*encode routin only*/ + VAConfigAttrib *packed_attrib; obj_context->codec_type = CODEC_ENC; memset(&obj_context->codec_state.encode, 0, sizeof(obj_context->codec_state.encode)); obj_context->codec_state.encode.current_render_target = VA_INVALID_ID; obj_context->codec_state.encode.max_slice_params = NUM_SLICES; obj_context->codec_state.encode.slice_params = calloc(obj_context->codec_state.encode.max_slice_params, sizeof(*obj_context->codec_state.encode.slice_params)); + obj_context->codec_state.encode.max_packed_header_params_ext = NUM_SLICES; + obj_context->codec_state.encode.packed_header_params_ext = + calloc(obj_context->codec_state.encode.max_packed_header_params_ext, + sizeof(struct buffer_store *)); + + obj_context->codec_state.encode.max_packed_header_data_ext = NUM_SLICES; + obj_context->codec_state.encode.packed_header_data_ext = + calloc(obj_context->codec_state.encode.max_packed_header_data_ext, + sizeof(struct buffer_store *)); + + obj_context->codec_state.encode.max_slice_num = NUM_SLICES; + obj_context->codec_state.encode.slice_rawdata_index = + calloc(obj_context->codec_state.encode.max_slice_num, sizeof(int)); + obj_context->codec_state.encode.slice_rawdata_count = + calloc(obj_context->codec_state.encode.max_slice_num, sizeof(int)); + + obj_context->codec_state.encode.slice_header_index = + calloc(obj_context->codec_state.encode.max_slice_num, sizeof(int)); + + obj_context->codec_state.encode.slice_index = 0; + packed_attrib = i965_lookup_config_attribute(obj_config, VAConfigAttribEncPackedHeaders); + if (packed_attrib) + obj_context->codec_state.encode.packed_header_flag = packed_attrib->value; + else { + /* use the default value. SPS/PPS/RAWDATA is passed from user + * while Slice_header data is generated by driver. + */ + obj_context->codec_state.encode.packed_header_flag = + VA_ENC_PACKED_HEADER_SEQUENCE | + VA_ENC_PACKED_HEADER_PICTURE | + VA_ENC_PACKED_HEADER_RAW_DATA; + } assert(i965->codec_info->enc_hw_context_init); obj_context->hw_context = i965->codec_info->enc_hw_context_init(ctx, obj_config); } else { @@ -1528,6 +1880,11 @@ i965_CreateContext(VADriverContextP ctx, } } + attrib = i965_lookup_config_attribute(obj_config, VAConfigAttribRTFormat); + if (!attrib) + return VA_STATUS_ERROR_INVALID_CONFIG; + obj_context->codec_state.base.chroma_formats = attrib->value; + /* Error recovery */ if (VA_STATUS_SUCCESS != vaStatus) { i965_destroy_context(&i965->context_heap, (struct object_base *)obj_context); @@ -1544,7 +1901,7 @@ i965_DestroyContext(VADriverContextP ctx, VAContextID context) struct i965_driver_data *i965 = i965_driver_data(ctx); struct object_context *obj_context = CONTEXT(context); - assert(obj_context); + ASSERT_RET(obj_context, VA_STATUS_ERROR_INVALID_CONTEXT); if (i965->current_context_id == context) i965->current_context_id = VA_INVALID_ID; @@ -1602,6 +1959,7 @@ i965_create_buffer_internal(VADriverContextP ctx, case VAProcPipelineParameterBufferType: case VAProcFilterParameterBufferType: case VAHuffmanTableBufferType: + case VAProbabilityBufferType: /* Ok */ break; @@ -1625,6 +1983,7 @@ i965_create_buffer_internal(VADriverContextP ctx, obj_buffer->num_elements = num_elements; obj_buffer->size_element = size; obj_buffer->type = type; + obj_buffer->export_refcount = 0; obj_buffer->buffer_store = NULL; buffer_store = calloc(1, sizeof(struct buffer_store)); assert(buffer_store); @@ -1638,7 +1997,8 @@ i965_create_buffer_internal(VADriverContextP ctx, dri_bo_subdata(buffer_store->bo, 0, size * num_elements, data); } else if (type == VASliceDataBufferType || type == VAImageBufferType || - type == VAEncCodedBufferType) { + type == VAEncCodedBufferType || + type == VAProbabilityBufferType) { buffer_store->bo = dri_bo_alloc(i965->intel.bufmgr, "Buffer", size * num_elements, 64); @@ -1705,10 +2065,7 @@ i965_BufferSetNumElements(VADriverContextP ctx, struct object_buffer *obj_buffer = BUFFER(buf_id); VAStatus vaStatus = VA_STATUS_SUCCESS; - assert(obj_buffer); - - if (!obj_buffer) - return VA_STATUS_ERROR_INVALID_BUFFER; + ASSERT_RET(obj_buffer, VA_STATUS_ERROR_INVALID_BUFFER); if ((num_elements < 0) || (num_elements > obj_buffer->max_num_elements)) { @@ -1732,11 +2089,11 @@ i965_MapBuffer(VADriverContextP ctx, struct object_buffer *obj_buffer = BUFFER(buf_id); VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN; - assert(obj_buffer && obj_buffer->buffer_store); - assert(obj_buffer->buffer_store->bo || obj_buffer->buffer_store->buffer); - assert(!(obj_buffer->buffer_store->bo && obj_buffer->buffer_store->buffer)); + ASSERT_RET(obj_buffer && obj_buffer->buffer_store, VA_STATUS_ERROR_INVALID_BUFFER); + ASSERT_RET(obj_buffer->buffer_store->bo || obj_buffer->buffer_store->buffer, VA_STATUS_ERROR_INVALID_BUFFER); + ASSERT_RET(!(obj_buffer->buffer_store->bo && obj_buffer->buffer_store->buffer), VA_STATUS_ERROR_INVALID_BUFFER); - if (!obj_buffer || !obj_buffer->buffer_store) + if (obj_buffer->export_refcount > 0) return VA_STATUS_ERROR_INVALID_BUFFER; if (NULL != obj_buffer->buffer_store->bo) { @@ -1749,7 +2106,7 @@ i965_MapBuffer(VADriverContextP ctx, else dri_bo_map(obj_buffer->buffer_store->bo, 1); - assert(obj_buffer->buffer_store->bo->virtual); + ASSERT_RET(obj_buffer->buffer_store->bo->virtual, VA_STATUS_ERROR_OPERATION_FAILED); *pbuf = obj_buffer->buffer_store->bo->virtual; if (obj_buffer->type == VAEncCodedBufferType) { @@ -1762,20 +2119,21 @@ i965_MapBuffer(VADriverContextP ctx, coded_buffer_segment->base.buf = buffer = (unsigned char *)(obj_buffer->buffer_store->bo->virtual) + I965_CODEDBUFFER_HEADER_SIZE; - if (coded_buffer_segment->codec == CODED_H264) { + if (coded_buffer_segment->codec == CODEC_H264 || + coded_buffer_segment->codec == CODEC_H264_MVC) { delimiter0 = H264_DELIMITER0; delimiter1 = H264_DELIMITER1; delimiter2 = H264_DELIMITER2; delimiter3 = H264_DELIMITER3; delimiter4 = H264_DELIMITER4; - } else if (coded_buffer_segment->codec == CODED_MPEG2) { + } else if (coded_buffer_segment->codec == CODEC_MPEG2) { delimiter0 = MPEG2_DELIMITER0; delimiter1 = MPEG2_DELIMITER1; delimiter2 = MPEG2_DELIMITER2; delimiter3 = MPEG2_DELIMITER3; delimiter4 = MPEG2_DELIMITER4; } else { - assert(0); + ASSERT_RET(0, VA_STATUS_ERROR_UNSUPPORTED_PROFILE); } for (i = 0; i < obj_buffer->size_element - I965_CODEDBUFFER_HEADER_SIZE - 3 - 0x1000; i++) { @@ -1814,13 +2172,13 @@ i965_UnmapBuffer(VADriverContextP ctx, VABufferID buf_id) struct object_buffer *obj_buffer = BUFFER(buf_id); VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN; - assert(obj_buffer && obj_buffer->buffer_store); - assert(obj_buffer->buffer_store->bo || obj_buffer->buffer_store->buffer); - assert(!(obj_buffer->buffer_store->bo && obj_buffer->buffer_store->buffer)); - - if (!obj_buffer || !obj_buffer->buffer_store) + if ((buf_id & OBJECT_HEAP_OFFSET_MASK) != BUFFER_ID_OFFSET) return VA_STATUS_ERROR_INVALID_BUFFER; + ASSERT_RET(obj_buffer && obj_buffer->buffer_store, VA_STATUS_ERROR_INVALID_BUFFER); + ASSERT_RET(obj_buffer->buffer_store->bo || obj_buffer->buffer_store->buffer, VA_STATUS_ERROR_OPERATION_FAILED); + ASSERT_RET(!(obj_buffer->buffer_store->bo && obj_buffer->buffer_store->buffer), VA_STATUS_ERROR_OPERATION_FAILED); + if (NULL != obj_buffer->buffer_store->bo) { unsigned int tiling, swizzle; @@ -1846,10 +2204,7 @@ i965_DestroyBuffer(VADriverContextP ctx, VABufferID buffer_id) struct i965_driver_data *i965 = i965_driver_data(ctx); struct object_buffer *obj_buffer = BUFFER(buffer_id); - assert(obj_buffer); - - if (!obj_buffer) - return VA_STATUS_ERROR_INVALID_BUFFER; + ASSERT_RET(obj_buffer, VA_STATUS_ERROR_INVALID_BUFFER); i965_destroy_buffer(&i965->buffer_heap, (struct object_base *)obj_buffer); @@ -1868,18 +2223,13 @@ i965_BeginPicture(VADriverContextP ctx, VAStatus vaStatus; int i; - assert(obj_context); - - if (!obj_context) - return VA_STATUS_ERROR_INVALID_CONTEXT; - - assert(obj_surface); - - if (!obj_surface) - return VA_STATUS_ERROR_INVALID_SURFACE; - + ASSERT_RET(obj_context, VA_STATUS_ERROR_INVALID_CONTEXT); + ASSERT_RET(obj_surface, VA_STATUS_ERROR_INVALID_SURFACE); obj_config = obj_context->obj_config; - assert(obj_config); + ASSERT_RET(obj_config, VA_STATUS_ERROR_INVALID_CONFIG); + + if (is_surface_busy(i965, obj_surface)) + return VA_STATUS_ERROR_SURFACE_BUSY; switch (obj_config->profile) { case VAProfileMPEG2Simple: @@ -1887,12 +2237,22 @@ i965_BeginPicture(VADriverContextP ctx, vaStatus = VA_STATUS_SUCCESS; break; - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: vaStatus = VA_STATUS_SUCCESS; break; + case VAProfileH264MultiviewHigh: + case VAProfileH264StereoHigh: + if (HAS_H264_MVC_DECODING_PROFILE(i965, obj_config->profile) || + HAS_H264_MVC_ENCODING(i965)) { + vaStatus = VA_STATUS_SUCCESS; + } else { + ASSERT_RET(0, VA_STATUS_ERROR_UNSUPPORTED_PROFILE); + } + break; + case VAProfileVC1Simple: case VAProfileVC1Main: case VAProfileVC1Advanced: @@ -1907,9 +2267,12 @@ i965_BeginPicture(VADriverContextP ctx, vaStatus = VA_STATUS_SUCCESS; break; + case VAProfileVP8Version0_3: + vaStatus = VA_STATUS_SUCCESS; + break; + default: - assert(0); - vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE; + ASSERT_RET(0, VA_STATUS_ERROR_UNSUPPORTED_PROFILE); break; } @@ -1939,6 +2302,20 @@ i965_BeginPicture(VADriverContextP ctx, obj_context->codec_state.encode.num_slice_params_ext = 0; obj_context->codec_state.encode.current_render_target = render_target; /*This is input new frame*/ obj_context->codec_state.encode.last_packed_header_type = 0; + memset(obj_context->codec_state.encode.slice_rawdata_index, 0, + sizeof(int) * obj_context->codec_state.encode.max_slice_num); + memset(obj_context->codec_state.encode.slice_rawdata_count, 0, + sizeof(int) * obj_context->codec_state.encode.max_slice_num); + memset(obj_context->codec_state.encode.slice_header_index, 0, + sizeof(int) * obj_context->codec_state.encode.max_slice_num); + + for (i = 0; i < obj_context->codec_state.encode.num_packed_header_params_ext; i++) + i965_release_buffer_store(&obj_context->codec_state.encode.packed_header_params_ext[i]); + for (i = 0; i < obj_context->codec_state.encode.num_packed_header_data_ext; i++) + i965_release_buffer_store(&obj_context->codec_state.encode.packed_header_data_ext[i]); + obj_context->codec_state.encode.num_packed_header_params_ext = 0; + obj_context->codec_state.encode.num_packed_header_data_ext = 0; + obj_context->codec_state.encode.slice_index = 0; } else { obj_context->codec_state.decode.current_render_target = render_target; i965_release_buffer_store(&obj_context->codec_state.decode.pic_param); @@ -1967,8 +2344,6 @@ i965_BeginPicture(VADriverContextP ctx, struct object_buffer *obj_buffer) \ { \ struct category##_state *category = &obj_context->codec_state.category; \ - assert(obj_buffer->buffer_store->bo == NULL); \ - assert(obj_buffer->buffer_store->buffer); \ i965_release_buffer_store(&category->member); \ i965_reference_buffer_store(&category->member, obj_buffer->buffer_store); \ return VA_STATUS_SUCCESS; \ @@ -1999,6 +2374,7 @@ DEF_RENDER_DECODE_SINGLE_BUFFER_FUNC(picture_parameter, pic_param) DEF_RENDER_DECODE_SINGLE_BUFFER_FUNC(iq_matrix, iq_matrix) DEF_RENDER_DECODE_SINGLE_BUFFER_FUNC(bit_plane, bit_plane) DEF_RENDER_DECODE_SINGLE_BUFFER_FUNC(huffman_table, huffman_table) +DEF_RENDER_DECODE_SINGLE_BUFFER_FUNC(probability_data, probability_data) #define DEF_RENDER_DECODE_MULTI_BUFFER_FUNC(name, member) DEF_RENDER_MULTI_BUFFER_FUNC(decode, name, member) DEF_RENDER_DECODE_MULTI_BUFFER_FUNC(slice_parameter, slice_params) @@ -2015,14 +2391,10 @@ i965_decoder_render_picture(VADriverContextP ctx, VAStatus vaStatus = VA_STATUS_SUCCESS; int i; - assert(obj_context); - - if (!obj_context) - return VA_STATUS_ERROR_INVALID_CONTEXT; + ASSERT_RET(obj_context, VA_STATUS_ERROR_INVALID_CONTEXT); for (i = 0; i < num_buffers && vaStatus == VA_STATUS_SUCCESS; i++) { struct object_buffer *obj_buffer = BUFFER(buffers[i]); - assert(obj_buffer); if (!obj_buffer) return VA_STATUS_ERROR_INVALID_BUFFER; @@ -2052,6 +2424,10 @@ i965_decoder_render_picture(VADriverContextP ctx, vaStatus = I965_RENDER_DECODE_BUFFER(huffman_table); break; + case VAProbabilityBufferType: + vaStatus = I965_RENDER_DECODE_BUFFER(probability_data); + break; + default: vaStatus = VA_STATUS_ERROR_UNSUPPORTED_BUFFERTYPE; break; @@ -2077,6 +2453,9 @@ DEF_RENDER_ENCODE_SINGLE_BUFFER_FUNC(picture_parameter_ext, pic_param_ext) // DEF_RENDER_ENCODE_MULTI_BUFFER_FUNC(slice_parameter, slice_params) DEF_RENDER_ENCODE_MULTI_BUFFER_FUNC(slice_parameter_ext, slice_params_ext) +DEF_RENDER_ENCODE_MULTI_BUFFER_FUNC(packed_header_params_ext, packed_header_params_ext) +DEF_RENDER_ENCODE_MULTI_BUFFER_FUNC(packed_header_data_ext, packed_header_data_ext) + static VAStatus i965_encoder_render_packed_header_parameter_buffer(VADriverContextP ctx, struct object_context *obj_context, @@ -2085,8 +2464,8 @@ i965_encoder_render_packed_header_parameter_buffer(VADriverContextP ctx, { struct encode_state *encode = &obj_context->codec_state.encode; - assert(obj_buffer->buffer_store->bo == NULL); - assert(obj_buffer->buffer_store->buffer); + ASSERT_RET(obj_buffer->buffer_store->bo == NULL, VA_STATUS_ERROR_INVALID_BUFFER); + ASSERT_RET(obj_buffer->buffer_store->buffer, VA_STATUS_ERROR_INVALID_BUFFER); i965_release_buffer_store(&encode->packed_header_param[type_index]); i965_reference_buffer_store(&encode->packed_header_param[type_index], obj_buffer->buffer_store); @@ -2101,8 +2480,8 @@ i965_encoder_render_packed_header_data_buffer(VADriverContextP ctx, { struct encode_state *encode = &obj_context->codec_state.encode; - assert(obj_buffer->buffer_store->bo == NULL); - assert(obj_buffer->buffer_store->buffer); + ASSERT_RET(obj_buffer->buffer_store->bo == NULL, VA_STATUS_ERROR_INVALID_BUFFER); + ASSERT_RET(obj_buffer->buffer_store->buffer, VA_STATUS_ERROR_INVALID_BUFFER); i965_release_buffer_store(&encode->packed_header_data[type_index]); i965_reference_buffer_store(&encode->packed_header_data[type_index], obj_buffer->buffer_store); @@ -2117,10 +2496,14 @@ i965_encoder_render_misc_parameter_buffer(VADriverContextP ctx, struct encode_state *encode = &obj_context->codec_state.encode; VAEncMiscParameterBuffer *param = NULL; - assert(obj_buffer->buffer_store->bo == NULL); - assert(obj_buffer->buffer_store->buffer); + ASSERT_RET(obj_buffer->buffer_store->bo == NULL, VA_STATUS_ERROR_INVALID_BUFFER); + ASSERT_RET(obj_buffer->buffer_store->buffer, VA_STATUS_ERROR_INVALID_BUFFER); param = (VAEncMiscParameterBuffer *)obj_buffer->buffer_store->buffer; + + if (param->type >= ARRAY_ELEMS(encode->misc_param)) + return VA_STATUS_ERROR_INVALID_PARAMETER; + i965_release_buffer_store(&encode->misc_param[param->type]); i965_reference_buffer_store(&encode->misc_param[param->type], obj_buffer->buffer_store); @@ -2136,16 +2519,14 @@ i965_encoder_render_picture(VADriverContextP ctx, struct i965_driver_data *i965 = i965_driver_data(ctx); struct object_context *obj_context = CONTEXT(context); VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN; + struct encode_state *encode; int i; - assert(obj_context); - - if (!obj_context) - return VA_STATUS_ERROR_INVALID_CONTEXT; + ASSERT_RET(obj_context, VA_STATUS_ERROR_INVALID_CONTEXT); + encode = &obj_context->codec_state.encode; for (i = 0; i < num_buffers; i++) { struct object_buffer *obj_buffer = BUFFER(buffers[i]); - assert(obj_buffer); if (!obj_buffer) return VA_STATUS_ERROR_INVALID_BUFFER; @@ -2169,34 +2550,141 @@ i965_encoder_render_picture(VADriverContextP ctx, case VAEncSliceParameterBufferType: vaStatus = I965_RENDER_ENCODE_BUFFER(slice_parameter_ext); + if (vaStatus == VA_STATUS_SUCCESS) { + /* When the max number of slices is updated, it also needs + * to reallocate the arrays that is used to store + * the packed data index/count for the slice + */ + if (!(encode->packed_header_flag & VA_ENC_PACKED_HEADER_SLICE)) { + encode->slice_index++; + } + if (encode->slice_index == encode->max_slice_num) { + int slice_num = encode->max_slice_num; + encode->slice_rawdata_index = realloc(encode->slice_rawdata_index, + (slice_num + NUM_SLICES) * sizeof(int)); + encode->slice_rawdata_count = realloc(encode->slice_rawdata_count, + (slice_num + NUM_SLICES) * sizeof(int)); + encode->slice_header_index = realloc(encode->slice_header_index, + (slice_num + NUM_SLICES) * sizeof(int)); + memset(encode->slice_rawdata_index + slice_num, 0, + sizeof(int) * NUM_SLICES); + memset(encode->slice_rawdata_count + slice_num, 0, + sizeof(int) * NUM_SLICES); + memset(encode->slice_header_index + slice_num, 0, + sizeof(int) * NUM_SLICES); + + encode->max_slice_num += NUM_SLICES; + if ((encode->slice_rawdata_index == NULL) || + (encode->slice_header_index == NULL) || + (encode->slice_rawdata_count == NULL)) { + vaStatus = VA_STATUS_ERROR_ALLOCATION_FAILED; + return vaStatus; + } + } + } break; case VAEncPackedHeaderParameterBufferType: { - struct encode_state *encode = &obj_context->codec_state.encode; VAEncPackedHeaderParameterBuffer *param = (VAEncPackedHeaderParameterBuffer *)obj_buffer->buffer_store->buffer; encode->last_packed_header_type = param->type; - vaStatus = i965_encoder_render_packed_header_parameter_buffer(ctx, + if ((param->type == VAEncPackedHeaderRawData) || + (param->type == VAEncPackedHeaderSlice)) { + vaStatus = I965_RENDER_ENCODE_BUFFER(packed_header_params_ext); + } else { + vaStatus = i965_encoder_render_packed_header_parameter_buffer(ctx, obj_context, obj_buffer, va_enc_packed_type_to_idx(encode->last_packed_header_type)); + } break; } case VAEncPackedHeaderDataBufferType: { - struct encode_state *encode = &obj_context->codec_state.encode; - - assert(encode->last_packed_header_type == VAEncPackedHeaderSequence || - encode->last_packed_header_type == VAEncPackedHeaderPicture || - encode->last_packed_header_type == VAEncPackedHeaderSlice || + if (encode->last_packed_header_type == 0) { + WARN_ONCE("the packed header data is passed without type!\n"); + vaStatus = VA_STATUS_ERROR_INVALID_PARAMETER; + return vaStatus; + } + if (encode->last_packed_header_type == VAEncPackedHeaderRawData || + encode->last_packed_header_type == VAEncPackedHeaderSlice) { + vaStatus = I965_RENDER_ENCODE_BUFFER(packed_header_data_ext); + + /* When the PACKED_SLICE_HEADER flag is passed, it will use + * the packed_slice_header as the delimeter to decide how + * the packed rawdata is inserted for the given slice. + * Otherwise it will use the VAEncSequenceParameterBuffer + * as the delimeter + */ + if (encode->packed_header_flag & VA_ENC_PACKED_HEADER_SLICE) { + /* store the first index of the packed header data for current slice */ + if (encode->slice_rawdata_index[encode->slice_index] == 0) { + encode->slice_rawdata_index[encode->slice_index] = + SLICE_PACKED_DATA_INDEX_TYPE | (encode->num_packed_header_data_ext - 1); + } + encode->slice_rawdata_count[encode->slice_index]++; + if (encode->last_packed_header_type == VAEncPackedHeaderSlice) { + /* find one packed slice_header delimeter. And the following + * packed data is for the next slice + */ + encode->slice_header_index[encode->slice_index] = + SLICE_PACKED_DATA_INDEX_TYPE | (encode->num_packed_header_data_ext - 1); + encode->slice_index++; + /* Reallocate the buffer to record the index/count of + * packed_data for one slice. + */ + if (encode->slice_index == encode->max_slice_num) { + int slice_num = encode->max_slice_num; + + encode->slice_rawdata_index = realloc(encode->slice_rawdata_index, + (slice_num + NUM_SLICES) * sizeof(int)); + encode->slice_rawdata_count = realloc(encode->slice_rawdata_count, + (slice_num + NUM_SLICES) * sizeof(int)); + encode->slice_header_index = realloc(encode->slice_header_index, + (slice_num + NUM_SLICES) * sizeof(int)); + memset(encode->slice_rawdata_index + slice_num, 0, + sizeof(int) * NUM_SLICES); + memset(encode->slice_rawdata_count + slice_num, 0, + sizeof(int) * NUM_SLICES); + memset(encode->slice_header_index + slice_num, 0, + sizeof(int) * NUM_SLICES); + encode->max_slice_num += NUM_SLICES; + } + } + } else { + if (vaStatus == VA_STATUS_SUCCESS) { + /* store the first index of the packed header data for current slice */ + if (encode->slice_rawdata_index[encode->slice_index] == 0) { + encode->slice_rawdata_index[encode->slice_index] = + SLICE_PACKED_DATA_INDEX_TYPE | (encode->num_packed_header_data_ext - 1); + } + encode->slice_rawdata_count[encode->slice_index]++; + if (encode->last_packed_header_type == VAEncPackedHeaderSlice) { + if (encode->slice_header_index[encode->slice_index] == 0) { + encode->slice_header_index[encode->slice_index] = + SLICE_PACKED_DATA_INDEX_TYPE | (encode->num_packed_header_data_ext - 1); + } else { + WARN_ONCE("Multi slice header data is passed for" + " slice %d!\n", encode->slice_index); + } + } + } + } + } else { + ASSERT_RET(encode->last_packed_header_type == VAEncPackedHeaderSequence || + encode->last_packed_header_type == VAEncPackedHeaderPicture || + encode->last_packed_header_type == VAEncPackedHeaderSlice || (((encode->last_packed_header_type & VAEncPackedHeaderMiscMask) == VAEncPackedHeaderMiscMask) && - ((encode->last_packed_header_type & (~VAEncPackedHeaderMiscMask)) != 0))); - vaStatus = i965_encoder_render_packed_header_data_buffer(ctx, + ((encode->last_packed_header_type & (~VAEncPackedHeaderMiscMask)) != 0)), + VA_STATUS_ERROR_ENCODING_ERROR); + vaStatus = i965_encoder_render_packed_header_data_buffer(ctx, obj_context, obj_buffer, va_enc_packed_type_to_idx(encode->last_packed_header_type)); + } + encode->last_packed_header_type = 0; break; } @@ -2231,14 +2719,10 @@ i965_proc_render_picture(VADriverContextP ctx, VAStatus vaStatus = VA_STATUS_SUCCESS; int i; - assert(obj_context); - - if (!obj_context) - return VA_STATUS_ERROR_INVALID_CONTEXT; + ASSERT_RET(obj_context, VA_STATUS_ERROR_INVALID_CONTEXT); for (i = 0; i < num_buffers && vaStatus == VA_STATUS_SUCCESS; i++) { struct object_buffer *obj_buffer = BUFFER(buffers[i]); - assert(obj_buffer); if (!obj_buffer) return VA_STATUS_ERROR_INVALID_BUFFER; @@ -2269,13 +2753,13 @@ i965_RenderPicture(VADriverContextP ctx, VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN; obj_context = CONTEXT(context); - assert(obj_context); + ASSERT_RET(obj_context, VA_STATUS_ERROR_INVALID_CONTEXT); + + if (num_buffers <= 0) + return VA_STATUS_ERROR_INVALID_PARAMETER; - if (!obj_context) - return VA_STATUS_ERROR_INVALID_CONTEXT; - obj_config = obj_context->obj_config; - assert(obj_config); + ASSERT_RET(obj_config, VA_STATUS_ERROR_INVALID_CONFIG); if (VAEntrypointVideoProc == obj_config->entrypoint) { vaStatus = i965_proc_render_picture(ctx, context, buffers, num_buffers); @@ -2295,33 +2779,58 @@ i965_EndPicture(VADriverContextP ctx, VAContextID context) struct object_context *obj_context = CONTEXT(context); struct object_config *obj_config; - assert(obj_context); - - if (!obj_context) - return VA_STATUS_ERROR_INVALID_CONTEXT; - + ASSERT_RET(obj_context, VA_STATUS_ERROR_INVALID_CONTEXT); obj_config = obj_context->obj_config; - assert(obj_config); + ASSERT_RET(obj_config, VA_STATUS_ERROR_INVALID_CONFIG); if (obj_context->codec_type == CODEC_PROC) { - assert(VAEntrypointVideoProc == obj_config->entrypoint); + ASSERT_RET(VAEntrypointVideoProc == obj_config->entrypoint, VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT); } else if (obj_context->codec_type == CODEC_ENC) { - assert(VAEntrypointEncSlice == obj_config->entrypoint); - - assert(obj_context->codec_state.encode.pic_param || - obj_context->codec_state.encode.pic_param_ext); - assert(obj_context->codec_state.encode.seq_param || - obj_context->codec_state.encode.seq_param_ext); - assert(obj_context->codec_state.encode.num_slice_params >= 1 || - obj_context->codec_state.encode.num_slice_params_ext >= 1); + ASSERT_RET(VAEntrypointEncSlice == obj_config->entrypoint, VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT); + + if (obj_context->codec_state.encode.num_packed_header_params_ext != + obj_context->codec_state.encode.num_packed_header_data_ext) { + WARN_ONCE("the packed header/data is not paired for encoding!\n"); + return VA_STATUS_ERROR_INVALID_PARAMETER; + } + if (!(obj_context->codec_state.encode.pic_param || + obj_context->codec_state.encode.pic_param_ext)) { + return VA_STATUS_ERROR_INVALID_PARAMETER; + } + if (!(obj_context->codec_state.encode.seq_param || + obj_context->codec_state.encode.seq_param_ext)) { + return VA_STATUS_ERROR_INVALID_PARAMETER; + } + if ((obj_context->codec_state.encode.num_slice_params <=0) && + (obj_context->codec_state.encode.num_slice_params_ext <=0)) { + return VA_STATUS_ERROR_INVALID_PARAMETER; + } + + if ((obj_context->codec_state.encode.packed_header_flag & VA_ENC_PACKED_HEADER_SLICE) && + (obj_context->codec_state.encode.num_slice_params_ext != + obj_context->codec_state.encode.slice_index)) { + WARN_ONCE("packed slice_header data is missing for some slice" + " under packed SLICE_HEADER mode\n"); + return VA_STATUS_ERROR_INVALID_PARAMETER; + } } else { - assert(obj_context->codec_state.decode.pic_param); - assert(obj_context->codec_state.decode.num_slice_params >= 1); - assert(obj_context->codec_state.decode.num_slice_datas >= 1); - assert(obj_context->codec_state.decode.num_slice_params == obj_context->codec_state.decode.num_slice_datas); + if (obj_context->codec_state.decode.pic_param == NULL) { + return VA_STATUS_ERROR_INVALID_PARAMETER; + } + if (obj_context->codec_state.decode.num_slice_params <=0) { + return VA_STATUS_ERROR_INVALID_PARAMETER; + } + if (obj_context->codec_state.decode.num_slice_datas <=0) { + return VA_STATUS_ERROR_INVALID_PARAMETER; + } + + if (obj_context->codec_state.decode.num_slice_params != + obj_context->codec_state.decode.num_slice_datas) { + return VA_STATUS_ERROR_INVALID_PARAMETER; + } } - assert(obj_context->hw_context->run); + ASSERT_RET(obj_context->hw_context->run, VA_STATUS_ERROR_OPERATION_FAILED); return obj_context->hw_context->run(ctx, obj_config->profile, &obj_context->codec_state, obj_context->hw_context); } @@ -2332,7 +2841,7 @@ i965_SyncSurface(VADriverContextP ctx, struct i965_driver_data *i965 = i965_driver_data(ctx); struct object_surface *obj_surface = SURFACE(render_target); - assert(obj_surface); + ASSERT_RET(obj_surface, VA_STATUS_ERROR_INVALID_SURFACE); if(obj_surface->bo) drm_intel_bo_wait_rendering(obj_surface->bo); @@ -2348,7 +2857,7 @@ i965_QuerySurfaceStatus(VADriverContextP ctx, struct i965_driver_data *i965 = i965_driver_data(ctx); struct object_surface *obj_surface = SURFACE(render_target); - assert(obj_surface); + ASSERT_RET(obj_surface, VA_STATUS_ERROR_INVALID_SURFACE); if (obj_surface->bo) { if (drm_intel_bo_busy(obj_surface->bo)){ @@ -2410,7 +2919,16 @@ i965_display_attributes_init(VADriverContextP ctx) ); i965->rotation_attrib = get_display_attribute(ctx, VADisplayAttribRotation); - if (!i965->rotation_attrib) { + i965->brightness_attrib = get_display_attribute(ctx, VADisplayAttribBrightness); + i965->contrast_attrib = get_display_attribute(ctx, VADisplayAttribContrast); + i965->hue_attrib = get_display_attribute(ctx, VADisplayAttribHue); + i965->saturation_attrib = get_display_attribute(ctx, VADisplayAttribSaturation); + + if (!i965->rotation_attrib || + !i965->brightness_attrib || + !i965->contrast_attrib || + !i965->hue_attrib || + !i965->saturation_attrib) { goto error; } return true; @@ -2553,7 +3071,7 @@ i965_CreateImage(VADriverContextP ctx, struct object_image *obj_image; VAStatus va_status = VA_STATUS_ERROR_OPERATION_FAILED; VAImageID image_id; - unsigned int width2, height2, size2, size; + unsigned int size2, size, awidth, aheight; out_image->image_id = VA_INVALID_ID; out_image->buf = VA_INVALID_ID; @@ -2573,83 +3091,101 @@ i965_CreateImage(VADriverContextP ctx, image->image_id = image_id; image->buf = VA_INVALID_ID; - size = width * height; - width2 = (width + 1) / 2; - height2 = (height + 1) / 2; - size2 = width2 * height2; + awidth = ALIGN(width, i965->codec_info->min_linear_wpitch); + + if ((format->fourcc == VA_FOURCC_YV12) || + (format->fourcc == VA_FOURCC_I420)) { + if (awidth % 128 != 0) { + awidth = ALIGN(width, 128); + } + } + + aheight = ALIGN(height, i965->codec_info->min_linear_hpitch); + size = awidth * aheight; + size2 = (awidth / 2) * (aheight / 2); image->num_palette_entries = 0; image->entry_bytes = 0; memset(image->component_order, 0, sizeof(image->component_order)); switch (format->fourcc) { - case VA_FOURCC('I','A','4','4'): - case VA_FOURCC('A','I','4','4'): + case VA_FOURCC_IA44: + case VA_FOURCC_AI44: image->num_planes = 1; - image->pitches[0] = width; + image->pitches[0] = awidth; image->offsets[0] = 0; - image->data_size = image->offsets[0] + image->pitches[0] * height; + image->data_size = image->offsets[0] + image->pitches[0] * aheight; image->num_palette_entries = 16; image->entry_bytes = 3; image->component_order[0] = 'R'; image->component_order[1] = 'G'; image->component_order[2] = 'B'; break; - case VA_FOURCC('I','A','8','8'): - case VA_FOURCC('A','I','8','8'): + case VA_FOURCC_IA88: + case VA_FOURCC_AI88: image->num_planes = 1; - image->pitches[0] = width * 2; + image->pitches[0] = awidth * 2; image->offsets[0] = 0; - image->data_size = image->offsets[0] + image->pitches[0] * height; + image->data_size = image->offsets[0] + image->pitches[0] * aheight; image->num_palette_entries = 256; image->entry_bytes = 3; image->component_order[0] = 'R'; image->component_order[1] = 'G'; image->component_order[2] = 'B'; break; - case VA_FOURCC('A','R','G','B'): - case VA_FOURCC('A','B','G','R'): - case VA_FOURCC('B','G','R','A'): - case VA_FOURCC('R','G','B','A'): - case VA_FOURCC('B','G','R','X'): - case VA_FOURCC('R','G','B','X'): + case VA_FOURCC_ARGB: + case VA_FOURCC_ABGR: + case VA_FOURCC_BGRA: + case VA_FOURCC_RGBA: + case VA_FOURCC_BGRX: + case VA_FOURCC_RGBX: image->num_planes = 1; - image->pitches[0] = width * 4; + image->pitches[0] = awidth * 4; image->offsets[0] = 0; - image->data_size = image->offsets[0] + image->pitches[0] * height; + image->data_size = image->offsets[0] + image->pitches[0] * aheight; break; - case VA_FOURCC('Y','V','1','2'): + case VA_FOURCC_YV12: image->num_planes = 3; - image->pitches[0] = width; + image->pitches[0] = awidth; image->offsets[0] = 0; - image->pitches[1] = width2; - image->offsets[1] = size + size2; - image->pitches[2] = width2; - image->offsets[2] = size; + image->pitches[1] = awidth / 2; + image->offsets[1] = size; + image->pitches[2] = awidth / 2; + image->offsets[2] = size + size2; image->data_size = size + 2 * size2; break; - case VA_FOURCC('I','4','2','0'): + case VA_FOURCC_I420: image->num_planes = 3; - image->pitches[0] = width; + image->pitches[0] = awidth; image->offsets[0] = 0; - image->pitches[1] = width2; + image->pitches[1] = awidth / 2; image->offsets[1] = size; - image->pitches[2] = width2; + image->pitches[2] = awidth / 2; image->offsets[2] = size + size2; image->data_size = size + 2 * size2; break; - case VA_FOURCC('N','V','1','2'): + case VA_FOURCC_422H: + image->num_planes = 3; + image->pitches[0] = awidth; + image->offsets[0] = 0; + image->pitches[1] = awidth / 2; + image->offsets[1] = size; + image->pitches[2] = awidth / 2; + image->offsets[2] = size + (awidth / 2) * aheight; + image->data_size = size + 2 * ((awidth / 2) * aheight); + break; + case VA_FOURCC_NV12: image->num_planes = 2; - image->pitches[0] = width; + image->pitches[0] = awidth; image->offsets[0] = 0; - image->pitches[1] = width; + image->pitches[1] = awidth; image->offsets[1] = size; image->data_size = size + 2 * size2; break; - case VA_FOURCC('Y','U','Y','2'): - case VA_FOURCC('U','Y','V','Y'): + case VA_FOURCC_YUY2: + case VA_FOURCC_UYVY: image->num_planes = 1; - image->pitches[0] = width * 2; + image->pitches[0] = awidth * 2; image->offsets[0] = 0; image->data_size = size * 2; break; @@ -2691,7 +3227,7 @@ i965_CreateImage(VADriverContextP ctx, return va_status; } -void +VAStatus i965_check_alloc_surface_bo(VADriverContextP ctx, struct object_surface *obj_surface, int tiled, @@ -2702,26 +3238,34 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, int region_width, region_height; if (obj_surface->bo) { - assert(obj_surface->fourcc); - assert(obj_surface->fourcc == fourcc); - assert(obj_surface->subsampling == subsampling); - return; + ASSERT_RET(obj_surface->fourcc, VA_STATUS_ERROR_INVALID_SURFACE); + ASSERT_RET(obj_surface->fourcc == fourcc, VA_STATUS_ERROR_INVALID_SURFACE); + ASSERT_RET(obj_surface->subsampling == subsampling, VA_STATUS_ERROR_INVALID_SURFACE); + return VA_STATUS_SUCCESS; } obj_surface->x_cb_offset = 0; /* X offset is always 0 */ obj_surface->x_cr_offset = 0; - if (tiled) { - assert(fourcc != VA_FOURCC('I', '4', '2', '0') && - fourcc != VA_FOURCC('I', 'Y', 'U', 'V') && - fourcc != VA_FOURCC('Y', 'V', '1', '2')); + if ((tiled && !obj_surface->user_disable_tiling)) { + ASSERT_RET(fourcc != VA_FOURCC_I420 && + fourcc != VA_FOURCC_IYUV && + fourcc != VA_FOURCC_YV12, + VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT); + if (obj_surface->user_h_stride_set) { + ASSERT_RET(IS_ALIGNED(obj_surface->width, 128), VA_STATUS_ERROR_INVALID_PARAMETER); + } else + obj_surface->width = ALIGN(obj_surface->orig_width, 128); + + if (obj_surface->user_v_stride_set) { + ASSERT_RET(IS_ALIGNED(obj_surface->height, 32), VA_STATUS_ERROR_INVALID_PARAMETER); + } else + obj_surface->height = ALIGN(obj_surface->orig_height, 32); - obj_surface->width = ALIGN(obj_surface->orig_width, 128); - obj_surface->height = ALIGN(obj_surface->orig_height, 32); region_height = obj_surface->height; switch (fourcc) { - case VA_FOURCC('N', 'V', '1', '2'): + case VA_FOURCC_NV12: assert(subsampling == SUBSAMPLE_YUV420); obj_surface->cb_cr_pitch = obj_surface->width; obj_surface->cb_cr_width = obj_surface->orig_width / 2; @@ -2733,7 +3277,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, break; - case VA_FOURCC('I', 'M', 'C', '1'): + case VA_FOURCC_IMC1: assert(subsampling == SUBSAMPLE_YUV420); obj_surface->cb_cr_pitch = obj_surface->width; obj_surface->cb_cr_width = obj_surface->orig_width / 2; @@ -2745,7 +3289,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, break; - case VA_FOURCC('I', 'M', 'C', '3'): + case VA_FOURCC_IMC3: assert(subsampling == SUBSAMPLE_YUV420); obj_surface->cb_cr_pitch = obj_surface->width; obj_surface->cb_cr_width = obj_surface->orig_width / 2; @@ -2757,7 +3301,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, break; - case VA_FOURCC('4', '2', '2', 'H'): + case VA_FOURCC_422H: assert(subsampling == SUBSAMPLE_YUV422H); obj_surface->cb_cr_pitch = obj_surface->width; obj_surface->cb_cr_width = obj_surface->orig_width / 2; @@ -2769,7 +3313,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, break; - case VA_FOURCC('4', '2', '2', 'V'): + case VA_FOURCC_422V: assert(subsampling == SUBSAMPLE_YUV422V); obj_surface->cb_cr_pitch = obj_surface->width; obj_surface->cb_cr_width = obj_surface->orig_width; @@ -2781,7 +3325,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, break; - case VA_FOURCC('4', '1', '1', 'P'): + case VA_FOURCC_411P: assert(subsampling == SUBSAMPLE_YUV411); obj_surface->cb_cr_pitch = obj_surface->width; obj_surface->cb_cr_width = obj_surface->orig_width / 4; @@ -2793,7 +3337,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, break; - case VA_FOURCC('4', '4', '4', 'P'): + case VA_FOURCC_444P: assert(subsampling == SUBSAMPLE_YUV444); obj_surface->cb_cr_pitch = obj_surface->width; obj_surface->cb_cr_width = obj_surface->orig_width; @@ -2805,44 +3349,46 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, break; - case VA_FOURCC('Y', '8', '0', '0'): + case VA_FOURCC_Y800: assert(subsampling == SUBSAMPLE_YUV400); - obj_surface->cb_cr_pitch = obj_surface->width; + obj_surface->cb_cr_pitch = 0; obj_surface->cb_cr_width = 0; obj_surface->cb_cr_height = 0; - obj_surface->y_cb_offset = obj_surface->height; - obj_surface->y_cr_offset = obj_surface->y_cb_offset + ALIGN(obj_surface->cb_cr_height, 32); + obj_surface->y_cb_offset = 0; + obj_surface->y_cr_offset = 0; region_width = obj_surface->width; - region_height = obj_surface->height + ALIGN(obj_surface->cb_cr_height, 32) * 2; + region_height = obj_surface->height; break; - case VA_FOURCC('Y', 'U', 'Y', '2'): - case VA_FOURCC('U', 'Y', 'V', 'Y'): + case VA_FOURCC_YUY2: + case VA_FOURCC_UYVY: assert(subsampling == SUBSAMPLE_YUV422H); - obj_surface->cb_cr_pitch = obj_surface->width * 2; + obj_surface->width = ALIGN(obj_surface->orig_width * 2, 128); + obj_surface->cb_cr_pitch = obj_surface->width; obj_surface->y_cb_offset = 0; obj_surface->y_cr_offset = 0; obj_surface->cb_cr_width = obj_surface->orig_width / 2; obj_surface->cb_cr_height = obj_surface->orig_height / 2; - region_width = obj_surface->width * 2; + region_width = obj_surface->width; region_height = obj_surface->height; break; - case VA_FOURCC('R', 'G', 'B', 'A'): - case VA_FOURCC('R', 'G', 'B', 'X'): - case VA_FOURCC('B', 'G', 'R', 'A'): - case VA_FOURCC('B', 'G', 'R', 'X'): + case VA_FOURCC_RGBA: + case VA_FOURCC_RGBX: + case VA_FOURCC_BGRA: + case VA_FOURCC_BGRX: assert(subsampling == SUBSAMPLE_RGBX); - region_width = obj_surface->width * 4; + obj_surface->width = ALIGN(obj_surface->orig_width * 4, 128); + region_width = obj_surface->width; region_height = obj_surface->height; break; default: /* Never get here */ - assert(0); + ASSERT_RET(0, VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT); break; } } else { @@ -2855,7 +3401,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, region_height = obj_surface->height; switch (fourcc) { - case VA_FOURCC('N', 'V', '1', '2'): + case VA_FOURCC_NV12: obj_surface->y_cb_offset = obj_surface->height; obj_surface->y_cr_offset = obj_surface->height; obj_surface->cb_cr_width = obj_surface->orig_width / 2; @@ -2864,9 +3410,18 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, region_height = obj_surface->height + obj_surface->height / 2; break; - case VA_FOURCC('Y', 'V', '1', '2'): - case VA_FOURCC('I', '4', '2', '0'): - if (fourcc == VA_FOURCC('Y', 'V', '1', '2')) { + case VA_FOURCC_YV16: + obj_surface->cb_cr_width = obj_surface->orig_width / 2; + obj_surface->cb_cr_height = obj_surface->orig_height; + obj_surface->y_cr_offset = obj_surface->height; + obj_surface->y_cb_offset = obj_surface->y_cr_offset + ALIGN(obj_surface->cb_cr_height, 32) / 2; + obj_surface->cb_cr_pitch = obj_surface->width / 2; + region_height = obj_surface->height + ALIGN(obj_surface->cb_cr_height, 32); + break; + + case VA_FOURCC_YV12: + case VA_FOURCC_I420: + if (fourcc == VA_FOURCC_YV12) { obj_surface->y_cr_offset = obj_surface->height; obj_surface->y_cb_offset = obj_surface->height + obj_surface->height / 4; } else { @@ -2880,34 +3435,36 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, region_height = obj_surface->height + obj_surface->height / 2; break; - case VA_FOURCC('Y', 'U', 'Y', '2'): - case VA_FOURCC('U', 'Y', 'V', 'Y'): + case VA_FOURCC_YUY2: + case VA_FOURCC_UYVY: + obj_surface->width = ALIGN(obj_surface->orig_width * 2, i965->codec_info->min_linear_wpitch); obj_surface->y_cb_offset = 0; obj_surface->y_cr_offset = 0; obj_surface->cb_cr_width = obj_surface->orig_width / 2; obj_surface->cb_cr_height = obj_surface->orig_height; - obj_surface->cb_cr_pitch = obj_surface->width * 2; - region_width = obj_surface->width * 2; + obj_surface->cb_cr_pitch = obj_surface->width; + region_width = obj_surface->width; region_height = obj_surface->height; break; - case VA_FOURCC('R', 'G', 'B', 'A'): - case VA_FOURCC('R', 'G', 'B', 'X'): - case VA_FOURCC('B', 'G', 'R', 'A'): - case VA_FOURCC('B', 'G', 'R', 'X'): - region_width = obj_surface->width * 4; + case VA_FOURCC_RGBA: + case VA_FOURCC_RGBX: + case VA_FOURCC_BGRA: + case VA_FOURCC_BGRX: + obj_surface->width = ALIGN(obj_surface->orig_width * 4, i965->codec_info->min_linear_wpitch); + region_width = obj_surface->width; region_height = obj_surface->height; break; default: /* Never get here */ - assert(0); + ASSERT_RET(0, VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT); break; } } obj_surface->size = ALIGN(region_width * region_height, 0x1000); - if (tiled) { + if ((tiled && !obj_surface->user_disable_tiling)) { uint32_t tiling_mode = I915_TILING_Y; /* always uses Y-tiled format */ unsigned long pitch; @@ -2920,9 +3477,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, &pitch, 0); assert(tiling_mode == I915_TILING_Y); - assert(pitch == obj_surface->width || - pitch == obj_surface->width * 2 || - pitch == obj_surface->width * 4) ; + assert(pitch == obj_surface->width); } else { obj_surface->bo = dri_bo_alloc(i965->intel.bufmgr, "vaapi surface", @@ -2933,6 +3488,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, obj_surface->fourcc = fourcc; obj_surface->subsampling = subsampling; assert(obj_surface->bo); + return VA_STATUS_SUCCESS; } VAStatus i965_DeriveImage(VADriverContextP ctx, @@ -2954,13 +3510,15 @@ VAStatus i965_DeriveImage(VADriverContextP ctx, if (!obj_surface->bo) { unsigned int is_tiled = 0; - unsigned int fourcc = VA_FOURCC('Y', 'V', '1', '2'); + unsigned int fourcc = VA_FOURCC_YV12; i965_guess_surface_format(ctx, surface, &fourcc, &is_tiled); int sampling = get_sampling_from_fourcc(fourcc); - i965_check_alloc_surface_bo(ctx, obj_surface, is_tiled, fourcc, sampling); + va_status = i965_check_alloc_surface_bo(ctx, obj_surface, is_tiled, fourcc, sampling); + if (va_status != VA_STATUS_SUCCESS) + return va_status; } - assert(obj_surface->fourcc); + ASSERT_RET(obj_surface->fourcc, VA_STATUS_ERROR_INVALID_SURFACE); w_pitch = obj_surface->width; @@ -2994,7 +3552,17 @@ VAStatus i965_DeriveImage(VADriverContextP ctx, image->format.bits_per_pixel = 12; switch (image->format.fourcc) { - case VA_FOURCC('Y', 'V', '1', '2'): + case VA_FOURCC_YV12: + image->num_planes = 3; + image->pitches[0] = w_pitch; /* Y */ + image->offsets[0] = 0; + image->pitches[1] = obj_surface->cb_cr_pitch; /* V */ + image->offsets[1] = w_pitch * obj_surface->y_cr_offset; + image->pitches[2] = obj_surface->cb_cr_pitch; /* U */ + image->offsets[2] = w_pitch * obj_surface->y_cb_offset; + break; + + case VA_FOURCC_YV16: image->num_planes = 3; image->pitches[0] = w_pitch; /* Y */ image->offsets[0] = 0; @@ -3004,7 +3572,7 @@ VAStatus i965_DeriveImage(VADriverContextP ctx, image->offsets[2] = w_pitch * obj_surface->y_cb_offset; break; - case VA_FOURCC('N', 'V', '1', '2'): + case VA_FOURCC_NV12: image->num_planes = 2; image->pitches[0] = w_pitch; /* Y */ image->offsets[0] = 0; @@ -3012,7 +3580,12 @@ VAStatus i965_DeriveImage(VADriverContextP ctx, image->offsets[1] = w_pitch * obj_surface->y_cb_offset; break; - case VA_FOURCC('I', '4', '2', '0'): + case VA_FOURCC_I420: + case VA_FOURCC_422H: + case VA_FOURCC_IMC3: + case VA_FOURCC_444P: + case VA_FOURCC_422V: + case VA_FOURCC_411P: image->num_planes = 3; image->pitches[0] = w_pitch; /* Y */ image->offsets[0] = 0; @@ -3021,21 +3594,20 @@ VAStatus i965_DeriveImage(VADriverContextP ctx, image->pitches[2] = obj_surface->cb_cr_pitch; /* V */ image->offsets[2] = w_pitch * obj_surface->y_cr_offset; break; - case VA_FOURCC('Y', 'U', 'Y', '2'): + + case VA_FOURCC_YUY2: + case VA_FOURCC_UYVY: + case VA_FOURCC_Y800: image->num_planes = 1; - image->pitches[0] = obj_surface->width * 2; /* Y, width is aligned already */ + image->pitches[0] = obj_surface->width; /* Y, width is aligned already */ image->offsets[0] = 0; - image->pitches[1] = obj_surface->width * 2; /* U */ - image->offsets[1] = 0; - image->pitches[2] = obj_surface->width * 2; /* V */ - image->offsets[2] = 0; break; - case VA_FOURCC('R', 'G', 'B', 'A'): - case VA_FOURCC('R', 'G', 'B', 'X'): - case VA_FOURCC('B', 'G', 'R', 'A'): - case VA_FOURCC('B', 'G', 'R', 'X'): + case VA_FOURCC_RGBA: + case VA_FOURCC_RGBX: + case VA_FOURCC_BGRA: + case VA_FOURCC_BGRX: image->num_planes = 1; - image->pitches[0] = obj_surface->width * 4; + image->pitches[0] = obj_surface->width; break; default: goto error; @@ -3066,6 +3638,7 @@ VAStatus i965_DeriveImage(VADriverContextP ctx, *out_image = *image; obj_surface->flags |= SURFACE_DERIVED; + obj_surface->derived_image_id = image_id; obj_image->derived_surface = surface; return VA_STATUS_SUCCESS; @@ -3109,6 +3682,7 @@ i965_DestroyImage(VADriverContextP ctx, VAImageID image) if (obj_surface) { obj_surface->flags &= ~SURFACE_DERIVED; + obj_surface->derived_image_id = VA_INVALID_ID; } i965_destroy_image(&i965->image_heap, (struct object_base *)obj_image); @@ -3146,51 +3720,12 @@ i965_SetImagePalette(VADriverContextP ctx, static int get_sampling_from_fourcc(unsigned int fourcc) { - int surface_sampling = -1; - - switch (fourcc) { - case VA_FOURCC('N', 'V', '1', '2'): - case VA_FOURCC('Y', 'V', '1', '2'): - case VA_FOURCC('I', '4', '2', '0'): - case VA_FOURCC('I', 'Y', 'U', 'V'): - case VA_FOURCC('I', 'M', 'C', '1'): - case VA_FOURCC('I', 'M', 'C', '3'): - surface_sampling = SUBSAMPLE_YUV420; - break; - case VA_FOURCC('Y', 'U', 'Y', '2'): - case VA_FOURCC('U', 'Y', 'V', 'Y'): - case VA_FOURCC('4', '2', '2', 'H'): - surface_sampling = SUBSAMPLE_YUV422H; - break; - case VA_FOURCC('4', '2', '2', 'V'): - surface_sampling = SUBSAMPLE_YUV422V; - break; - - case VA_FOURCC('4', '4', '4', 'P'): - surface_sampling = SUBSAMPLE_YUV444; - break; - - case VA_FOURCC('4', '1', '1', 'P'): - surface_sampling = SUBSAMPLE_YUV411; - break; - - case VA_FOURCC('Y', '8', '0', '0'): - surface_sampling = SUBSAMPLE_YUV400; - break; - case VA_FOURCC('R','G','B','A'): - case VA_FOURCC('R','G','B','X'): - case VA_FOURCC('B','G','R','A'): - case VA_FOURCC('B','G','R','X'): - surface_sampling = SUBSAMPLE_RGBX; - break; - default: - /* Never get here */ - assert(0); - break; - - } + const i965_fourcc_info *info = get_fourcc_info(fourcc); - return surface_sampling; + if (info && (info->flag & I_S)) + return info->subsampling; + else + return -1; } static inline void @@ -3207,7 +3742,7 @@ memcpy_pic(uint8_t *dst, unsigned int dst_stride, } } -static void +static VAStatus get_image_i420(struct object_image *obj_image, uint8_t *image_data, struct object_surface *obj_surface, const VARectangle *rect) @@ -3217,11 +3752,12 @@ get_image_i420(struct object_image *obj_image, uint8_t *image_data, const int U = obj_image->image.format.fourcc == obj_surface->fourcc ? 1 : 2; const int V = obj_image->image.format.fourcc == obj_surface->fourcc ? 2 : 1; unsigned int tiling, swizzle; + VAStatus va_status = VA_STATUS_SUCCESS; if (!obj_surface->bo) - return; + return VA_STATUS_ERROR_INVALID_SURFACE; - assert(obj_surface->fourcc); + ASSERT_RET(obj_surface->fourcc, VA_STATUS_ERROR_INVALID_SURFACE); dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle); if (tiling != I915_TILING_NONE) @@ -3230,7 +3766,7 @@ get_image_i420(struct object_image *obj_image, uint8_t *image_data, dri_bo_map(obj_surface->bo, 0); if (!obj_surface->bo->virtual) - return; + return VA_STATUS_ERROR_INVALID_SURFACE; /* Dest VA image has either I420 or YV12 format. Source VA surface alway has I420 format */ @@ -3266,18 +3802,21 @@ get_image_i420(struct object_image *obj_image, uint8_t *image_data, drm_intel_gem_bo_unmap_gtt(obj_surface->bo); else dri_bo_unmap(obj_surface->bo); + + return va_status; } -static void +static VAStatus get_image_nv12(struct object_image *obj_image, uint8_t *image_data, struct object_surface *obj_surface, const VARectangle *rect) { uint8_t *dst[2], *src[2]; unsigned int tiling, swizzle; + VAStatus va_status = VA_STATUS_SUCCESS; if (!obj_surface->bo) - return; + return VA_STATUS_ERROR_INVALID_SURFACE; assert(obj_surface->fourcc); dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle); @@ -3288,7 +3827,7 @@ get_image_nv12(struct object_image *obj_image, uint8_t *image_data, dri_bo_map(obj_surface->bo, 0); if (!obj_surface->bo->virtual) - return; + return VA_STATUS_ERROR_INVALID_SURFACE; /* Both dest VA image and source surface have NV12 format */ dst[0] = image_data + obj_image->image.offsets[0]; @@ -3314,18 +3853,21 @@ get_image_nv12(struct object_image *obj_image, uint8_t *image_data, drm_intel_gem_bo_unmap_gtt(obj_surface->bo); else dri_bo_unmap(obj_surface->bo); + + return va_status; } -static void +static VAStatus get_image_yuy2(struct object_image *obj_image, uint8_t *image_data, struct object_surface *obj_surface, const VARectangle *rect) { uint8_t *dst, *src; unsigned int tiling, swizzle; + VAStatus va_status = VA_STATUS_SUCCESS; if (!obj_surface->bo) - return; + return VA_STATUS_ERROR_INVALID_SURFACE; assert(obj_surface->fourcc); dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle); @@ -3336,7 +3878,7 @@ get_image_yuy2(struct object_image *obj_image, uint8_t *image_data, dri_bo_map(obj_surface->bo, 0); if (!obj_surface->bo->virtual) - return; + return VA_STATUS_ERROR_INVALID_SURFACE; /* Both dest VA image and source surface have YUYV format */ dst = image_data + obj_image->image.offsets[0]; @@ -3353,121 +3895,64 @@ get_image_yuy2(struct object_image *obj_image, uint8_t *image_data, drm_intel_gem_bo_unmap_gtt(obj_surface->bo); else dri_bo_unmap(obj_surface->bo); + + return va_status; } static VAStatus i965_sw_getimage(VADriverContextP ctx, - VASurfaceID surface, - int x, /* coordinates of the upper left source pixel */ - int y, - unsigned int width, /* width and height of the region */ - unsigned int height, - VAImageID image) + struct object_surface *obj_surface, struct object_image *obj_image, + const VARectangle *rect) { - struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_driver_data * const i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; - - struct object_surface *obj_surface = SURFACE(surface); - if (!obj_surface) - return VA_STATUS_ERROR_INVALID_SURFACE; - - struct object_image *obj_image = IMAGE(image); - if (!obj_image) - return VA_STATUS_ERROR_INVALID_IMAGE; - - if (x < 0 || y < 0) - return VA_STATUS_ERROR_INVALID_PARAMETER; - if (x + width > obj_surface->orig_width || - y + height > obj_surface->orig_height) - return VA_STATUS_ERROR_INVALID_PARAMETER; - if (x + width > obj_image->image.width || - y + height > obj_image->image.height) - return VA_STATUS_ERROR_INVALID_PARAMETER; + void *image_data = NULL; + VAStatus va_status; if (obj_surface->fourcc != obj_image->image.format.fourcc) return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT; - VAStatus va_status; - void *image_data = NULL; - va_status = i965_MapBuffer(ctx, obj_image->image.buf, &image_data); if (va_status != VA_STATUS_SUCCESS) return va_status; - VARectangle rect; - rect.x = x; - rect.y = y; - rect.width = width; - rect.height = height; - switch (obj_image->image.format.fourcc) { - case VA_FOURCC('Y','V','1','2'): - case VA_FOURCC('I','4','2','0'): + case VA_FOURCC_YV12: + case VA_FOURCC_I420: /* I420 is native format for MPEG-2 decoded surfaces */ if (render_state->interleaved_uv) goto operation_failed; - get_image_i420(obj_image, image_data, obj_surface, &rect); + get_image_i420(obj_image, image_data, obj_surface, rect); break; - case VA_FOURCC('N','V','1','2'): + case VA_FOURCC_NV12: /* NV12 is native format for H.264 decoded surfaces */ if (!render_state->interleaved_uv) goto operation_failed; - get_image_nv12(obj_image, image_data, obj_surface, &rect); + get_image_nv12(obj_image, image_data, obj_surface, rect); break; - case VA_FOURCC('Y','U','Y','2'): + case VA_FOURCC_YUY2: /* YUY2 is the format supported by overlay plane */ - get_image_yuy2(obj_image, image_data, obj_surface, &rect); + get_image_yuy2(obj_image, image_data, obj_surface, rect); break; default: operation_failed: va_status = VA_STATUS_ERROR_OPERATION_FAILED; break; } + if (va_status != VA_STATUS_SUCCESS) + return va_status; - i965_UnmapBuffer(ctx, obj_image->image.buf); + va_status = i965_UnmapBuffer(ctx, obj_image->image.buf); return va_status; } static VAStatus i965_hw_getimage(VADriverContextP ctx, - VASurfaceID surface, - int x, /* coordinates of the upper left source pixel */ - int y, - unsigned int width, /* width and height of the region */ - unsigned int height, - VAImageID image) + struct object_surface *obj_surface, struct object_image *obj_image, + const VARectangle *rect) { - struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_surface src_surface; struct i965_surface dst_surface; - VAStatus va_status; - VARectangle rect; - struct object_surface *obj_surface = SURFACE(surface); - struct object_image *obj_image = IMAGE(image); - - if (!obj_surface) - return VA_STATUS_ERROR_INVALID_SURFACE; - - if (!obj_image) - return VA_STATUS_ERROR_INVALID_IMAGE; - - if (x < 0 || y < 0) - return VA_STATUS_ERROR_INVALID_PARAMETER; - if (x + width > obj_surface->orig_width || - y + height > obj_surface->orig_height) - return VA_STATUS_ERROR_INVALID_PARAMETER; - if (x + width > obj_image->image.width || - y + height > obj_image->image.height) - return VA_STATUS_ERROR_INVALID_PARAMETER; - - if (!obj_surface->bo) - return VA_STATUS_SUCCESS; - assert(obj_image->bo); // image bo is always created, see i965_CreateImage() - - rect.x = x; - rect.y = y; - rect.width = width; - rect.height = height; src_surface.base = (struct object_base *)obj_surface; src_surface.type = I965_SURFACE_TYPE_SURFACE; @@ -3477,14 +3962,7 @@ i965_hw_getimage(VADriverContextP ctx, dst_surface.type = I965_SURFACE_TYPE_IMAGE; dst_surface.flags = I965_SURFACE_FLAG_FRAME; - va_status = i965_image_processing(ctx, - &src_surface, - &rect, - &dst_surface, - &rect); - - - return va_status; + return i965_image_processing(ctx, &src_surface, rect, &dst_surface, rect); } VAStatus @@ -3497,25 +3975,46 @@ i965_GetImage(VADriverContextP ctx, VAImageID image) { struct i965_driver_data * const i965 = i965_driver_data(ctx); + struct object_surface * const obj_surface = SURFACE(surface); + struct object_image * const obj_image = IMAGE(image); + VARectangle rect; VAStatus va_status; + if (!obj_surface) + return VA_STATUS_ERROR_INVALID_SURFACE; + if (!obj_surface->bo) /* don't get anything, keep previous data */ + return VA_STATUS_SUCCESS; + if (is_surface_busy(i965, obj_surface)) + return VA_STATUS_ERROR_SURFACE_BUSY; + + if (!obj_image || !obj_image->bo) + return VA_STATUS_ERROR_INVALID_IMAGE; + if (is_image_busy(i965, obj_image)) + return VA_STATUS_ERROR_SURFACE_BUSY; + + if (x < 0 || y < 0) + return VA_STATUS_ERROR_INVALID_PARAMETER; + if (x + width > obj_surface->orig_width || + y + height > obj_surface->orig_height) + return VA_STATUS_ERROR_INVALID_PARAMETER; + if (x + width > obj_image->image.width || + y + height > obj_image->image.height) + return VA_STATUS_ERROR_INVALID_PARAMETER; + + rect.x = x; + rect.y = y; + rect.width = width; + rect.height = height; + if (HAS_ACCELERATED_GETIMAGE(i965)) - va_status = i965_hw_getimage(ctx, - surface, - x, y, - width, height, - image); + va_status = i965_hw_getimage(ctx, obj_surface, obj_image, &rect); else - va_status = i965_sw_getimage(ctx, - surface, - x, y, - width, height, - image); + va_status = i965_sw_getimage(ctx, obj_surface, obj_image, &rect); return va_status; } -static void +static VAStatus put_image_i420(struct object_surface *obj_surface, const VARectangle *dst_rect, struct object_image *obj_image, uint8_t *image_data, @@ -3526,13 +4025,13 @@ put_image_i420(struct object_surface *obj_surface, const int U = obj_image->image.format.fourcc == obj_surface->fourcc ? 1 : 2; const int V = obj_image->image.format.fourcc == obj_surface->fourcc ? 2 : 1; unsigned int tiling, swizzle; + VAStatus va_status = VA_STATUS_SUCCESS; - if (!obj_surface->bo) - return; + ASSERT_RET(obj_surface->bo, VA_STATUS_ERROR_INVALID_SURFACE); - assert(obj_surface->fourcc); - assert(dst_rect->width == src_rect->width); - assert(dst_rect->height == src_rect->height); + ASSERT_RET(obj_surface->fourcc, VA_STATUS_ERROR_INVALID_SURFACE); + ASSERT_RET(dst_rect->width == src_rect->width, VA_STATUS_ERROR_UNIMPLEMENTED); + ASSERT_RET(dst_rect->height == src_rect->height, VA_STATUS_ERROR_UNIMPLEMENTED); dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle); if (tiling != I915_TILING_NONE) @@ -3541,7 +4040,7 @@ put_image_i420(struct object_surface *obj_surface, dri_bo_map(obj_surface->bo, 0); if (!obj_surface->bo->virtual) - return; + return VA_STATUS_ERROR_INVALID_SURFACE; /* Dest VA image has either I420 or YV12 format. Source VA surface alway has I420 format */ @@ -3577,9 +4076,11 @@ put_image_i420(struct object_surface *obj_surface, drm_intel_gem_bo_unmap_gtt(obj_surface->bo); else dri_bo_unmap(obj_surface->bo); + + return va_status; } -static void +static VAStatus put_image_nv12(struct object_surface *obj_surface, const VARectangle *dst_rect, struct object_image *obj_image, uint8_t *image_data, @@ -3587,13 +4088,14 @@ put_image_nv12(struct object_surface *obj_surface, { uint8_t *dst[2], *src[2]; unsigned int tiling, swizzle; + VAStatus va_status = VA_STATUS_SUCCESS; if (!obj_surface->bo) - return; + return VA_STATUS_ERROR_INVALID_SURFACE; - assert(obj_surface->fourcc); - assert(dst_rect->width == src_rect->width); - assert(dst_rect->height == src_rect->height); + ASSERT_RET(obj_surface->fourcc, VA_STATUS_ERROR_INVALID_SURFACE); + ASSERT_RET(dst_rect->width == src_rect->width, VA_STATUS_ERROR_UNIMPLEMENTED); + ASSERT_RET(dst_rect->height == src_rect->height, VA_STATUS_ERROR_UNIMPLEMENTED); dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle); if (tiling != I915_TILING_NONE) @@ -3602,7 +4104,7 @@ put_image_nv12(struct object_surface *obj_surface, dri_bo_map(obj_surface->bo, 0); if (!obj_surface->bo->virtual) - return; + return VA_STATUS_ERROR_INVALID_SURFACE; /* Both dest VA image and source surface have NV12 format */ dst[0] = (uint8_t *)obj_surface->bo->virtual; @@ -3628,9 +4130,11 @@ put_image_nv12(struct object_surface *obj_surface, drm_intel_gem_bo_unmap_gtt(obj_surface->bo); else dri_bo_unmap(obj_surface->bo); + + return va_status; } -static void +static VAStatus put_image_yuy2(struct object_surface *obj_surface, const VARectangle *dst_rect, struct object_image *obj_image, uint8_t *image_data, @@ -3638,13 +4142,12 @@ put_image_yuy2(struct object_surface *obj_surface, { uint8_t *dst, *src; unsigned int tiling, swizzle; + VAStatus va_status = VA_STATUS_SUCCESS; - if (!obj_surface->bo) - return; - - assert(obj_surface->fourcc); - assert(dst_rect->width == src_rect->width); - assert(dst_rect->height == src_rect->height); + ASSERT_RET(obj_surface->bo, VA_STATUS_ERROR_INVALID_SURFACE); + ASSERT_RET(obj_surface->fourcc, VA_STATUS_ERROR_INVALID_SURFACE); + ASSERT_RET(dst_rect->width == src_rect->width, VA_STATUS_ERROR_UNIMPLEMENTED); + ASSERT_RET(dst_rect->height == src_rect->height, VA_STATUS_ERROR_UNIMPLEMENTED); dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle); if (tiling != I915_TILING_NONE) @@ -3653,7 +4156,7 @@ put_image_yuy2(struct object_surface *obj_surface, dri_bo_map(obj_surface->bo, 0); if (!obj_surface->bo->virtual) - return; + return VA_STATUS_ERROR_INVALID_SURFACE; /* Both dest VA image and source surface have YUY2 format */ dst = (uint8_t *)obj_surface->bo->virtual; @@ -3670,45 +4173,21 @@ put_image_yuy2(struct object_surface *obj_surface, drm_intel_gem_bo_unmap_gtt(obj_surface->bo); else dri_bo_unmap(obj_surface->bo); -} + return va_status; +} static VAStatus i965_sw_putimage(VADriverContextP ctx, - VASurfaceID surface, - VAImageID image, - int src_x, - int src_y, - unsigned int src_width, - unsigned int src_height, - int dest_x, - int dest_y, - unsigned int dest_width, - unsigned int dest_height) + struct object_surface *obj_surface, struct object_image *obj_image, + const VARectangle *src_rect, const VARectangle *dst_rect) { - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct object_surface *obj_surface = SURFACE(surface); - - if (!obj_surface) - return VA_STATUS_ERROR_INVALID_SURFACE; - - struct object_image *obj_image = IMAGE(image); - if (!obj_image) - return VA_STATUS_ERROR_INVALID_IMAGE; - - if (src_x < 0 || src_y < 0) - return VA_STATUS_ERROR_INVALID_PARAMETER; - if (src_x + src_width > obj_image->image.width || - src_y + src_height > obj_image->image.height) - return VA_STATUS_ERROR_INVALID_PARAMETER; - if (dest_x < 0 || dest_y < 0) - return VA_STATUS_ERROR_INVALID_PARAMETER; - if (dest_x + dest_width > obj_surface->orig_width || - dest_y + dest_height > obj_surface->orig_height) - return VA_STATUS_ERROR_INVALID_PARAMETER; + VAStatus va_status = VA_STATUS_SUCCESS; + void *image_data = NULL; /* XXX: don't allow scaling */ - if (src_width != dest_width || src_height != dest_height) + if (src_rect->width != dst_rect->width || + src_rect->height != dst_rect->height) return VA_STATUS_ERROR_INVALID_PARAMETER; if (obj_surface->fourcc) { @@ -3719,7 +4198,7 @@ i965_sw_putimage(VADriverContextP ctx, else { /* VA is surface not used for decoding, use same VA image format */ - i965_check_alloc_surface_bo( + va_status = i965_check_alloc_surface_bo( ctx, obj_surface, 0, /* XXX: don't use tiled surface */ @@ -3727,80 +4206,42 @@ i965_sw_putimage(VADriverContextP ctx, get_sampling_from_fourcc (obj_image->image.format.fourcc)); } - VAStatus va_status; - void *image_data = NULL; + if (va_status != VA_STATUS_SUCCESS) + return va_status; va_status = i965_MapBuffer(ctx, obj_image->image.buf, &image_data); if (va_status != VA_STATUS_SUCCESS) return va_status; - - VARectangle src_rect, dest_rect; - src_rect.x = src_x; - src_rect.y = src_y; - src_rect.width = src_width; - src_rect.height = src_height; - dest_rect.x = dest_x; - dest_rect.y = dest_y; - dest_rect.width = dest_width; - dest_rect.height = dest_height; switch (obj_image->image.format.fourcc) { - case VA_FOURCC('Y','V','1','2'): - case VA_FOURCC('I','4','2','0'): - put_image_i420(obj_surface, &dest_rect, obj_image, image_data, &src_rect); + case VA_FOURCC_YV12: + case VA_FOURCC_I420: + va_status = put_image_i420(obj_surface, dst_rect, obj_image, image_data, src_rect); break; - case VA_FOURCC('N','V','1','2'): - put_image_nv12(obj_surface, &dest_rect, obj_image, image_data, &src_rect); + case VA_FOURCC_NV12: + va_status = put_image_nv12(obj_surface, dst_rect, obj_image, image_data, src_rect); break; - case VA_FOURCC('Y','U','Y','2'): - put_image_yuy2(obj_surface, &dest_rect, obj_image, image_data, &src_rect); + case VA_FOURCC_YUY2: + va_status = put_image_yuy2(obj_surface, dst_rect, obj_image, image_data, src_rect); break; default: va_status = VA_STATUS_ERROR_OPERATION_FAILED; break; } + if (va_status != VA_STATUS_SUCCESS) + return va_status; - i965_UnmapBuffer(ctx, obj_image->image.buf); + va_status = i965_UnmapBuffer(ctx, obj_image->image.buf); return va_status; } static VAStatus i965_hw_putimage(VADriverContextP ctx, - VASurfaceID surface, - VAImageID image, - int src_x, - int src_y, - unsigned int src_width, - unsigned int src_height, - int dest_x, - int dest_y, - unsigned int dest_width, - unsigned int dest_height) + struct object_surface *obj_surface, struct object_image *obj_image, + const VARectangle *src_rect, const VARectangle *dst_rect) { - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct object_surface *obj_surface = SURFACE(surface); - struct object_image *obj_image = IMAGE(image); struct i965_surface src_surface, dst_surface; VAStatus va_status = VA_STATUS_SUCCESS; - VARectangle src_rect, dst_rect; - - if (!obj_surface) - return VA_STATUS_ERROR_INVALID_SURFACE; - - if (!obj_image || !obj_image->bo) - return VA_STATUS_ERROR_INVALID_IMAGE; - - if (src_x < 0 || - src_y < 0 || - src_x + src_width > obj_image->image.width || - src_y + src_height > obj_image->image.height) - return VA_STATUS_ERROR_INVALID_PARAMETER; - - if (dest_x < 0 || - dest_y < 0 || - dest_x + dest_width > obj_surface->orig_width || - dest_y + dest_height > obj_surface->orig_height) - return VA_STATUS_ERROR_INVALID_PARAMETER; if (!obj_surface->bo) { unsigned int tiling, swizzle; @@ -3814,29 +4255,21 @@ i965_hw_putimage(VADriverContextP ctx, surface_sampling); } - assert(obj_surface->fourcc); + ASSERT_RET(obj_surface->fourcc, VA_STATUS_ERROR_INVALID_SURFACE); src_surface.base = (struct object_base *)obj_image; src_surface.type = I965_SURFACE_TYPE_IMAGE; src_surface.flags = I965_SURFACE_FLAG_FRAME; - src_rect.x = src_x; - src_rect.y = src_y; - src_rect.width = src_width; - src_rect.height = src_height; dst_surface.base = (struct object_base *)obj_surface; dst_surface.type = I965_SURFACE_TYPE_SURFACE; dst_surface.flags = I965_SURFACE_FLAG_FRAME; - dst_rect.x = dest_x; - dst_rect.y = dest_y; - dst_rect.width = dest_width; - dst_rect.height = dest_height; va_status = i965_image_processing(ctx, &src_surface, - &src_rect, + src_rect, &dst_surface, - &dst_rect); + dst_rect); return va_status; } @@ -3854,33 +4287,50 @@ i965_PutImage(VADriverContextP ctx, unsigned int dest_width, unsigned int dest_height) { - struct i965_driver_data *i965 = i965_driver_data(ctx); - VAStatus va_status = VA_STATUS_SUCCESS; + struct i965_driver_data * const i965 = i965_driver_data(ctx); + struct object_surface * const obj_surface = SURFACE(surface); + struct object_image * const obj_image = IMAGE(image); + VARectangle src_rect, dst_rect; + VAStatus va_status; + + if (!obj_surface) + return VA_STATUS_ERROR_INVALID_SURFACE; + if (is_surface_busy(i965, obj_surface)) + return VA_STATUS_ERROR_SURFACE_BUSY; + + if (!obj_image || !obj_image->bo) + return VA_STATUS_ERROR_INVALID_IMAGE; + if (is_image_busy(i965, obj_image)) + return VA_STATUS_ERROR_SURFACE_BUSY; + + if (src_x < 0 || + src_y < 0 || + src_x + src_width > obj_image->image.width || + src_y + src_height > obj_image->image.height) + return VA_STATUS_ERROR_INVALID_PARAMETER; + + src_rect.x = src_x; + src_rect.y = src_y; + src_rect.width = src_width; + src_rect.height = src_height; + + if (dest_x < 0 || + dest_y < 0 || + dest_x + dest_width > obj_surface->orig_width || + dest_y + dest_height > obj_surface->orig_height) + return VA_STATUS_ERROR_INVALID_PARAMETER; + + dst_rect.x = dest_x; + dst_rect.y = dest_y; + dst_rect.width = dest_width; + dst_rect.height = dest_height; if (HAS_ACCELERATED_PUTIMAGE(i965)) - va_status = i965_hw_putimage(ctx, - surface, - image, - src_x, - src_y, - src_width, - src_height, - dest_x, - dest_y, - dest_width, - dest_height); + va_status = i965_hw_putimage(ctx, obj_surface, obj_image, + &src_rect, &dst_rect); else - va_status = i965_sw_putimage(ctx, - surface, - image, - src_x, - src_y, - src_width, - src_height, - dest_x, - dest_y, - dest_width, - dest_height); + va_status = i965_sw_putimage(ctx, obj_surface, obj_image, + &src_rect, &dst_rect); return va_status; } @@ -3937,10 +4387,7 @@ i965_BufferInfo( i965 = i965_driver_data(ctx); obj_buffer = BUFFER(buf_id); - assert(obj_buffer); - - if (!obj_buffer) - return VA_STATUS_ERROR_INVALID_BUFFER; + ASSERT_RET(obj_buffer, VA_STATUS_ERROR_INVALID_BUFFER); *type = obj_buffer->type; *size = obj_buffer->size_element; @@ -3969,15 +4416,15 @@ i965_LockSurface( struct object_surface *obj_surface = NULL; VAImage tmpImage; - assert(fourcc); - assert(luma_stride); - assert(chroma_u_stride); - assert(chroma_v_stride); - assert(luma_offset); - assert(chroma_u_offset); - assert(chroma_v_offset); - assert(buffer_name); - assert(buffer); + ASSERT_RET(fourcc, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(luma_stride, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(chroma_u_stride, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(chroma_v_stride, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(luma_offset, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(chroma_u_offset, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(chroma_v_offset, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(buffer_name, VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET(buffer, VA_STATUS_ERROR_INVALID_PARAMETER); tmpImage.image_id = VA_INVALID_ID; @@ -4112,41 +4559,42 @@ i965_GetSurfaceAttributes( attrib_list[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; if (attrib_list[i].value.value.i == 0) { - if (IS_G4X(i965->intel.device_id)) { + if (IS_G4X(i965->intel.device_info)) { if (obj_config->profile == VAProfileMPEG2Simple || obj_config->profile == VAProfileMPEG2Main) { - attrib_list[i].value.value.i = VA_FOURCC('I', '4', '2', '0'); + attrib_list[i].value.value.i = VA_FOURCC_I420; } else { assert(0); attrib_list[i].flags = VA_SURFACE_ATTRIB_NOT_SUPPORTED; } - } else if (IS_IRONLAKE(i965->intel.device_id)) { + } else if (IS_IRONLAKE(i965->intel.device_info)) { if (obj_config->profile == VAProfileMPEG2Simple || obj_config->profile == VAProfileMPEG2Main) { - attrib_list[i].value.value.i = VA_FOURCC('I', '4', '2', '0'); - } else if (obj_config->profile == VAProfileH264Baseline || + attrib_list[i].value.value.i = VA_FOURCC_I420; + } else if (obj_config->profile == VAProfileH264ConstrainedBaseline || obj_config->profile == VAProfileH264Main || obj_config->profile == VAProfileH264High) { - attrib_list[i].value.value.i = VA_FOURCC('N', 'V', '1', '2'); + attrib_list[i].value.value.i = VA_FOURCC_NV12; } else if (obj_config->profile == VAProfileNone) { - attrib_list[i].value.value.i = VA_FOURCC('N', 'V', '1', '2'); + attrib_list[i].value.value.i = VA_FOURCC_NV12; } else { assert(0); attrib_list[i].flags = VA_SURFACE_ATTRIB_NOT_SUPPORTED; } - } else if (IS_GEN6(i965->intel.device_id)) { - attrib_list[i].value.value.i = VA_FOURCC('N', 'V', '1', '2'); - } else if (IS_GEN7(i965->intel.device_id)) { + } else if (IS_GEN6(i965->intel.device_info)) { + attrib_list[i].value.value.i = VA_FOURCC_NV12; + } else if (IS_GEN7(i965->intel.device_info) || + IS_GEN8(i965->intel.device_info)) { if (obj_config->profile == VAProfileJPEGBaseline) attrib_list[i].value.value.i = 0; /* internal format */ else - attrib_list[i].value.value.i = VA_FOURCC('N', 'V', '1', '2'); + attrib_list[i].value.value.i = VA_FOURCC_NV12; } } else { - if (IS_G4X(i965->intel.device_id)) { + if (IS_G4X(i965->intel.device_info)) { if (obj_config->profile == VAProfileMPEG2Simple || obj_config->profile == VAProfileMPEG2Main) { - if (attrib_list[i].value.value.i != VA_FOURCC('I', '4', '2', '0')) { + if (attrib_list[i].value.value.i != VA_FOURCC_I420) { attrib_list[i].value.value.i = 0; attrib_list[i].flags &= ~VA_SURFACE_ATTRIB_SETTABLE; } @@ -4154,30 +4602,30 @@ i965_GetSurfaceAttributes( assert(0); attrib_list[i].flags = VA_SURFACE_ATTRIB_NOT_SUPPORTED; } - } else if (IS_IRONLAKE(i965->intel.device_id)) { + } else if (IS_IRONLAKE(i965->intel.device_info)) { if (obj_config->profile == VAProfileMPEG2Simple || obj_config->profile == VAProfileMPEG2Main) { - if (attrib_list[i].value.value.i != VA_FOURCC('I', '4', '2', '0')) { + if (attrib_list[i].value.value.i != VA_FOURCC_I420) { attrib_list[i].value.value.i = 0; attrib_list[i].flags &= ~VA_SURFACE_ATTRIB_SETTABLE; } - } else if (obj_config->profile == VAProfileH264Baseline || + } else if (obj_config->profile == VAProfileH264ConstrainedBaseline || obj_config->profile == VAProfileH264Main || obj_config->profile == VAProfileH264High) { - if (attrib_list[i].value.value.i != VA_FOURCC('N', 'V', '1', '2')) { + if (attrib_list[i].value.value.i != VA_FOURCC_NV12) { attrib_list[i].value.value.i = 0; attrib_list[i].flags &= ~VA_SURFACE_ATTRIB_SETTABLE; } } else if (obj_config->profile == VAProfileNone) { switch (attrib_list[i].value.value.i) { - case VA_FOURCC('N', 'V', '1', '2'): - case VA_FOURCC('I', '4', '2', '0'): - case VA_FOURCC('Y', 'V', '1', '2'): - case VA_FOURCC('Y', 'U', 'Y', '2'): - case VA_FOURCC('B', 'G', 'R', 'A'): - case VA_FOURCC('B', 'G', 'R', 'X'): - case VA_FOURCC('R', 'G', 'B', 'X'): - case VA_FOURCC('R', 'G', 'B', 'A'): + case VA_FOURCC_NV12: + case VA_FOURCC_I420: + case VA_FOURCC_YV12: + case VA_FOURCC_YUY2: + case VA_FOURCC_BGRA: + case VA_FOURCC_BGRX: + case VA_FOURCC_RGBX: + case VA_FOURCC_RGBA: break; default: attrib_list[i].value.value.i = 0; @@ -4188,18 +4636,18 @@ i965_GetSurfaceAttributes( assert(0); attrib_list[i].flags = VA_SURFACE_ATTRIB_NOT_SUPPORTED; } - } else if (IS_GEN6(i965->intel.device_id)) { + } else if (IS_GEN6(i965->intel.device_info)) { if (obj_config->entrypoint == VAEntrypointEncSlice || obj_config->entrypoint == VAEntrypointVideoProc) { switch (attrib_list[i].value.value.i) { - case VA_FOURCC('N', 'V', '1', '2'): - case VA_FOURCC('I', '4', '2', '0'): - case VA_FOURCC('Y', 'V', '1', '2'): - case VA_FOURCC('Y', 'U', 'Y', '2'): - case VA_FOURCC('B', 'G', 'R', 'A'): - case VA_FOURCC('B', 'G', 'R', 'X'): - case VA_FOURCC('R', 'G', 'B', 'X'): - case VA_FOURCC('R', 'G', 'B', 'A'): + case VA_FOURCC_NV12: + case VA_FOURCC_I420: + case VA_FOURCC_YV12: + case VA_FOURCC_YUY2: + case VA_FOURCC_BGRA: + case VA_FOURCC_BGRX: + case VA_FOURCC_RGBX: + case VA_FOURCC_RGBA: break; default: attrib_list[i].value.value.i = 0; @@ -4207,18 +4655,19 @@ i965_GetSurfaceAttributes( break; } } else { - if (attrib_list[i].value.value.i != VA_FOURCC('N', 'V', '1', '2')) { + if (attrib_list[i].value.value.i != VA_FOURCC_NV12) { attrib_list[i].value.value.i = 0; attrib_list[i].flags &= ~VA_SURFACE_ATTRIB_SETTABLE; } } - } else if (IS_GEN7(i965->intel.device_id)) { + } else if (IS_GEN7(i965->intel.device_info) || + IS_GEN8(i965->intel.device_info)) { if (obj_config->entrypoint == VAEntrypointEncSlice || obj_config->entrypoint == VAEntrypointVideoProc) { switch (attrib_list[i].value.value.i) { - case VA_FOURCC('N', 'V', '1', '2'): - case VA_FOURCC('I', '4', '2', '0'): - case VA_FOURCC('Y', 'V', '1', '2'): + case VA_FOURCC_NV12: + case VA_FOURCC_I420: + case VA_FOURCC_YV12: break; default: attrib_list[i].value.value.i = 0; @@ -4230,7 +4679,7 @@ i965_GetSurfaceAttributes( attrib_list[i].value.value.i = 0; /* JPEG decoding always uses an internal format */ attrib_list[i].flags &= ~VA_SURFACE_ATTRIB_SETTABLE; } else { - if (attrib_list[i].value.value.i != VA_FOURCC('N', 'V', '1', '2')) { + if (attrib_list[i].value.value.i != VA_FOURCC_NV12) { attrib_list[i].value.value.i = 0; attrib_list[i].flags &= ~VA_SURFACE_ATTRIB_SETTABLE; } @@ -4292,47 +4741,50 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx, attribs = malloc(I965_MAX_SURFACE_ATTRIBUTES *sizeof(*attribs)); - if (IS_G4X(i965->intel.device_id)) { + if (attribs == NULL) + return VA_STATUS_ERROR_ALLOCATION_FAILED; + + if (IS_G4X(i965->intel.device_info)) { if (obj_config->profile == VAProfileMPEG2Simple || obj_config->profile == VAProfileMPEG2Main) { attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('I', '4', '2', '0'); + attribs[i].value.value.i = VA_FOURCC_I420; i++; } - } else if (IS_IRONLAKE(i965->intel.device_id)) { + } else if (IS_IRONLAKE(i965->intel.device_info)) { switch (obj_config->profile) { case VAProfileMPEG2Simple: case VAProfileMPEG2Main: attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('I', '4', '2', '0'); + attribs[i].value.value.i = VA_FOURCC_I420; i++; break; - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('N', 'V', '1', '2'); + attribs[i].value.value.i = VA_FOURCC_NV12; i++; case VAProfileNone: attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('N', 'V', '1', '2'); + attribs[i].value.value.i = VA_FOURCC_NV12; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('I', '4', '2', '0'); + attribs[i].value.value.i = VA_FOURCC_I420; i++; break; @@ -4340,102 +4792,102 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx, default: break; } - } else if (IS_GEN6(i965->intel.device_id)) { + } else if (IS_GEN6(i965->intel.device_info)) { if (obj_config->entrypoint == VAEntrypointVLD) { /* decode */ attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('N', 'V', '1', '2'); + attribs[i].value.value.i = VA_FOURCC_NV12; i++; } else if (obj_config->entrypoint == VAEntrypointEncSlice || /* encode */ obj_config->entrypoint == VAEntrypointVideoProc) { /* vpp */ attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('N', 'V', '1', '2'); + attribs[i].value.value.i = VA_FOURCC_NV12; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('I', '4', '2', '0'); + attribs[i].value.value.i = VA_FOURCC_I420; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('Y', 'V', '1', '2'); + attribs[i].value.value.i = VA_FOURCC_YV12; i++; if (obj_config->entrypoint == VAEntrypointVideoProc) { attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('Y', 'U', 'Y', '2'); + attribs[i].value.value.i = VA_FOURCC_YUY2; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('R', 'G', 'B', 'A'); + attribs[i].value.value.i = VA_FOURCC_RGBA; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('R', 'G', 'B', 'X'); + attribs[i].value.value.i = VA_FOURCC_RGBX; i++; } } - } else if (IS_GEN7(i965->intel.device_id)) { + } else if (IS_GEN7(i965->intel.device_info)) { if (obj_config->entrypoint == VAEntrypointVLD) { /* decode */ if (obj_config->profile == VAProfileJPEGBaseline) { attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('I', 'M', 'C', '3'); + attribs[i].value.value.i = VA_FOURCC_IMC3; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('I', 'M', 'C', '1'); + attribs[i].value.value.i = VA_FOURCC_IMC1; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('Y', '8', '0', '0'); + attribs[i].value.value.i = VA_FOURCC_Y800; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('4', '1', '1', 'P'); + attribs[i].value.value.i = VA_FOURCC_411P; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('4', '2', '2', 'H'); + attribs[i].value.value.i = VA_FOURCC_422H; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('4', '2', '2', 'V'); + attribs[i].value.value.i = VA_FOURCC_422V; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('4', '4', '4', 'P'); + attribs[i].value.value.i = VA_FOURCC_444P; i++; } else { attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('N', 'V', '1', '2'); + attribs[i].value.value.i = VA_FOURCC_NV12; i++; } } else if (obj_config->entrypoint == VAEntrypointEncSlice || /* encode */ @@ -4443,44 +4895,178 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx, attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('N', 'V', '1', '2'); + attribs[i].value.value.i = VA_FOURCC_NV12; + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC_I420; + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC_YV12; + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC_IMC3; + i++; + + if (obj_config->entrypoint == VAEntrypointVideoProc) { + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC_YUY2; + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC_RGBA; + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC_RGBX; + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC_BGRA; + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC_BGRX; + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC_YV16; + i++; + } + } + } else if (IS_GEN8(i965->intel.device_info)) { + if (obj_config->entrypoint == VAEntrypointVLD) { /* decode */ + if (obj_config->profile == VAProfileJPEGBaseline) { + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC_IMC3; + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC_IMC1; + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC_Y800; + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC_411P; + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC_422H; + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC_422V; + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC_444P; + i++; + } else { + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC_NV12; + i++; + } + } else if (obj_config->entrypoint == VAEntrypointEncSlice || /* encode */ + obj_config->entrypoint == VAEntrypointVideoProc) { /* vpp */ + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC_NV12; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('I', '4', '2', '0'); + attribs[i].value.value.i = VA_FOURCC_I420; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('Y', 'V', '1', '2'); + attribs[i].value.value.i = VA_FOURCC_YV12; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('I', 'M', 'C', '3'); + attribs[i].value.value.i = VA_FOURCC_IMC3; i++; if (obj_config->entrypoint == VAEntrypointVideoProc) { attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('Y', 'U', 'Y', '2'); + attribs[i].value.value.i = VA_FOURCC_YUY2; + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC_RGBA; + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC_RGBX; + i++; + + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC_BGRA; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('R', 'G', 'B', 'A'); + attribs[i].value.value.i = VA_FOURCC_BGRX; i++; attribs[i].type = VASurfaceAttribPixelFormat; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; - attribs[i].value.value.i = VA_FOURCC('R', 'G', 'B', 'X'); + attribs[i].value.value.i = VA_FOURCC_YV16; i++; } } @@ -4512,6 +5098,159 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx, return vaStatus; } + +/* Acquires buffer handle for external API usage (internal implementation) */ +static VAStatus +i965_acquire_buffer_handle(struct object_buffer *obj_buffer, + uint32_t mem_type, VABufferInfo *out_buf_info) +{ + struct buffer_store *buffer_store; + + buffer_store = obj_buffer->buffer_store; + if (!buffer_store || !buffer_store->bo) + return VA_STATUS_ERROR_INVALID_BUFFER; + + /* Synchronization point */ + drm_intel_bo_wait_rendering(buffer_store->bo); + + if (obj_buffer->export_refcount > 0) { + if (obj_buffer->export_state.mem_type != mem_type) + return VA_STATUS_ERROR_INVALID_PARAMETER; + } + else { + VABufferInfo * const buf_info = &obj_buffer->export_state; + + switch (mem_type) { + case VA_SURFACE_ATTRIB_MEM_TYPE_KERNEL_DRM: { + uint32_t name; + if (drm_intel_bo_flink(buffer_store->bo, &name) != 0) + return VA_STATUS_ERROR_INVALID_BUFFER; + buf_info->handle = name; + break; + } + case VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME: { + int fd; + if (drm_intel_bo_gem_export_to_prime(buffer_store->bo, &fd) != 0) + return VA_STATUS_ERROR_INVALID_BUFFER; + buf_info->handle = (intptr_t)fd; + break; + } + } + + buf_info->type = obj_buffer->type; + buf_info->mem_type = mem_type; + buf_info->mem_size = + obj_buffer->num_elements * obj_buffer->size_element; + } + + obj_buffer->export_refcount++; + *out_buf_info = obj_buffer->export_state; + return VA_STATUS_SUCCESS; +} + +/* Releases buffer handle after usage (internal implementation) */ +static VAStatus +i965_release_buffer_handle(struct object_buffer *obj_buffer) +{ + if (obj_buffer->export_refcount == 0) + return VA_STATUS_ERROR_INVALID_BUFFER; + + if (--obj_buffer->export_refcount == 0) { + VABufferInfo * const buf_info = &obj_buffer->export_state; + + switch (buf_info->mem_type) { + case VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME: { + close((intptr_t)buf_info->handle); + break; + } + } + buf_info->mem_type = 0; + } + return VA_STATUS_SUCCESS; +} + +/** Acquires buffer handle for external API usage */ +static VAStatus +i965_AcquireBufferHandle(VADriverContextP ctx, VABufferID buf_id, + VABufferInfo *buf_info) +{ + struct i965_driver_data * const i965 = i965_driver_data(ctx); + struct object_buffer * const obj_buffer = BUFFER(buf_id); + uint32_t i, mem_type; + + /* List of supported memory types, in preferred order */ + static const uint32_t mem_types[] = { + VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME, + VA_SURFACE_ATTRIB_MEM_TYPE_KERNEL_DRM, + 0 + }; + + if (!obj_buffer) + return VA_STATUS_ERROR_INVALID_BUFFER; + /* XXX: only VA surface|image like buffers are supported for now */ + if (obj_buffer->type != VAImageBufferType) + return VA_STATUS_ERROR_UNSUPPORTED_BUFFERTYPE; + + if (!buf_info) + return VA_STATUS_ERROR_INVALID_PARAMETER; + + if (!buf_info->mem_type) + mem_type = mem_types[0]; + else { + mem_type = 0; + for (i = 0; mem_types[i] != 0; i++) { + if (buf_info->mem_type & mem_types[i]) { + mem_type = buf_info->mem_type; + break; + } + } + if (!mem_type) + return VA_STATUS_ERROR_UNSUPPORTED_MEMORY_TYPE; + } + return i965_acquire_buffer_handle(obj_buffer, mem_type, buf_info); +} + +/** Releases buffer handle after usage from external API */ +static VAStatus +i965_ReleaseBufferHandle(VADriverContextP ctx, VABufferID buf_id) +{ + struct i965_driver_data * const i965 = i965_driver_data(ctx); + struct object_buffer * const obj_buffer = BUFFER(buf_id); + + if (!obj_buffer) + return VA_STATUS_ERROR_INVALID_BUFFER; + + return i965_release_buffer_handle(obj_buffer); +} + +static int +i965_os_has_ring_support(VADriverContextP ctx, + int ring) +{ + struct i965_driver_data *const i965 = i965_driver_data(ctx); + + switch (ring) { + case I965_RING_BSD: + return i965->intel.has_bsd; + + case I965_RING_BLT: + return i965->intel.has_blt; + + case I965_RING_VEBOX: + return i965->intel.has_vebox; + + case I965_RING_NULL: + return 1; /* Always support */ + + default: + /* should never get here */ + assert(0); + break; + } + + return 0; +} + /* * Query video processing pipeline */ @@ -4523,18 +5262,24 @@ VAStatus i965_QueryVideoProcFilters( ) { struct i965_driver_data *const i965 = i965_driver_data(ctx); - unsigned int i = 0; + unsigned int i = 0, num = 0; if (!num_filters || !filters) return VA_STATUS_ERROR_INVALID_PARAMETER; - for (i = 0; i < *num_filters && i < i965->codec_info->num_filters; i++) - filters[i] = i965->codec_info->filters[i]; + for (i = 0; i < i965->codec_info->num_filters; i++) { + if (i965_os_has_ring_support(ctx, i965->codec_info->filters[i].ring)) { + if (num == *num_filters) { + *num_filters = i965->codec_info->num_filters; - *num_filters = i; + return VA_STATUS_ERROR_MAX_NUM_EXCEEDED; + } + + filters[num++] = i965->codec_info->filters[i].type; + } + } - if (i < i965->codec_info->num_filters) - return VA_STATUS_ERROR_MAX_NUM_EXCEEDED; + *num_filters = num; return VA_STATUS_SUCCESS; } @@ -4553,6 +5298,17 @@ VAStatus i965_QueryVideoProcFilterCaps( if (!filter_caps || !num_filter_caps) return VA_STATUS_ERROR_INVALID_PARAMETER; + for (i = 0; i < i965->codec_info->num_filters; i++) { + if (type == i965->codec_info->filters[i].type && + i965_os_has_ring_support(ctx, i965->codec_info->filters[i].ring)) + break; + } + + if (i == i965->codec_info->num_filters) + return VA_STATUS_ERROR_UNSUPPORTED_FILTER; + + i = 0; + switch (type) { case VAProcFilterNoiseReduction: case VAProcFilterSharpening: @@ -4592,7 +5348,13 @@ VAStatus i965_QueryVideoProcFilterCaps( i++; cap++; } - } + + if (i965->codec_info->has_di_motion_compensated) { + cap->type = VAProcDeinterlacingMotionCompensated; + i++; + cap++; + } + } break; @@ -4616,7 +5378,7 @@ VAStatus i965_QueryVideoProcFilterCaps( cap->type = VAProcColorBalanceSaturation; cap->range.min_value = 0.0; cap->range.max_value = 10.0; - cap->range.default_value = 0.0; + cap->range.default_value = 1.0; cap->range.step = 0.1; i++; cap++; @@ -4632,7 +5394,7 @@ VAStatus i965_QueryVideoProcFilterCaps( cap->type = VAProcColorBalanceContrast; cap->range.min_value = 0.0; cap->range.max_value = 10.0; - cap->range.default_value = 0.0; + cap->range.default_value = 1.0; cap->range.step = 0.1; i++; cap++; @@ -4694,33 +5456,33 @@ VAStatus i965_QueryVideoProcPipelineCaps( } else if (base->type == VAProcFilterDeinterlacing) { VAProcFilterParameterBufferDeinterlacing *deint = (VAProcFilterParameterBufferDeinterlacing *)base; - assert(deint->algorithm == VAProcDeinterlacingBob || - deint->algorithm == VAProcDeinterlacingMotionAdaptive); + ASSERT_RET(deint->algorithm == VAProcDeinterlacingBob || + deint->algorithm == VAProcDeinterlacingMotionAdaptive || + deint->algorithm == VAProcDeinterlacingMotionCompensated, + VA_STATUS_ERROR_INVALID_PARAMETER); - if (deint->algorithm == VAProcDeinterlacingMotionAdaptive) + if (deint->algorithm == VAProcDeinterlacingMotionAdaptive || + deint->algorithm == VAProcDeinterlacingMotionCompensated); pipeline_cap->num_forward_references++; + } else if (base->type == VAProcFilterSkinToneEnhancement) { + VAProcFilterParameterBuffer *stde = (VAProcFilterParameterBuffer *)base; + (void)stde; } } return VA_STATUS_SUCCESS; } +extern struct hw_codec_info *i965_get_codec_info(int devid); + static bool i965_driver_data_init(VADriverContextP ctx) { struct i965_driver_data *i965 = i965_driver_data(ctx); - if (IS_HASWELL(i965->intel.device_id)) - i965->codec_info = &gen75_hw_codec_info; - else if (IS_G4X(i965->intel.device_id)) - i965->codec_info = &g4x_hw_codec_info; - else if (IS_IRONLAKE(i965->intel.device_id)) - i965->codec_info = &ironlake_hw_codec_info; - else if (IS_GEN6(i965->intel.device_id)) - i965->codec_info = &gen6_hw_codec_info; - else if (IS_GEN7(i965->intel.device_id)) - i965->codec_info = &gen7_hw_codec_info; - else + i965->codec_info = i965_get_codec_info(i965->intel.device_id); + + if (!i965->codec_info) return false; if (object_heap_init(&i965->config_heap, @@ -4750,6 +5512,7 @@ i965_driver_data_init(VADriverContextP ctx) goto err_subpic_heap; i965->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0); + i965->pp_batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0); _i965InitMutex(&i965->render_mutex); _i965InitMutex(&i965->pp_mutex); @@ -4781,6 +5544,9 @@ i965_driver_data_terminate(VADriverContextP ctx) if (i965->batch) intel_batchbuffer_free(i965->batch); + if (i965->pp_batch) + intel_batchbuffer_free(i965->pp_batch); + i965_destroy_heap(&i965->subpic_heap, i965_destroy_subpic); i965_destroy_heap(&i965->image_heap, i965_destroy_image); i965_destroy_heap(&i965->buffer_heap, i965_destroy_buffer); @@ -4841,11 +5607,51 @@ struct { #endif }; +static bool +ensure_vendor_string(struct i965_driver_data *i965, const char *chipset) +{ + int ret, len; + + if (i965->va_vendor[0] != '\0') + return true; + + len = 0; + ret = snprintf(i965->va_vendor, sizeof(i965->va_vendor), + "%s %s driver for %s - %d.%d.%d", + INTEL_STR_DRIVER_VENDOR, INTEL_STR_DRIVER_NAME, chipset, + INTEL_DRIVER_MAJOR_VERSION, INTEL_DRIVER_MINOR_VERSION, + INTEL_DRIVER_MICRO_VERSION); + if (ret < 0 || ret >= sizeof(i965->va_vendor)) + goto error; + len = ret; + + if (INTEL_DRIVER_PRE_VERSION > 0) { + ret = snprintf(&i965->va_vendor[len], sizeof(i965->va_vendor) - len, + ".pre%d", INTEL_DRIVER_PRE_VERSION); + if (ret < 0 || ret >= sizeof(i965->va_vendor)) + goto error; + len += ret; + + ret = snprintf(&i965->va_vendor[len], sizeof(i965->va_vendor) - len, + " (%s)", INTEL_DRIVER_GIT_VERSION); + if (ret < 0 || ret >= sizeof(i965->va_vendor)) + goto error; + len += ret; + } + return true; + +error: + i965->va_vendor[0] = '\0'; + ASSERT_RET(ret > 0 && len < sizeof(i965->va_vendor), false); + return false; +} + static VAStatus i965_Init(VADriverContextP ctx) { struct i965_driver_data *i965 = i965_driver_data(ctx); int i; + const char *chipset; for (i = 0; i < ARRAY_ELEMS(i965_sub_ops); i++) { if ((i965_sub_ops[i].display_type == 0 || @@ -4854,19 +5660,22 @@ i965_Init(VADriverContextP ctx) break; } + if (i965->codec_info->preinit_hw_codec) + i965->codec_info->preinit_hw_codec(ctx, i965->codec_info); + if (i == ARRAY_ELEMS(i965_sub_ops)) { - sprintf(i965->va_vendor, "%s %s driver - %d.%d.%d", - INTEL_STR_DRIVER_VENDOR, - INTEL_STR_DRIVER_NAME, - INTEL_DRIVER_MAJOR_VERSION, - INTEL_DRIVER_MINOR_VERSION, - INTEL_DRIVER_MICRO_VERSION); - - if (INTEL_DRIVER_PRE_VERSION > 0) { - const int len = strlen(i965->va_vendor); - sprintf(&i965->va_vendor[len], ".pre%d", INTEL_DRIVER_PRE_VERSION); + switch (i965->intel.device_id) { +#undef CHIPSET +#define CHIPSET(id, family, dev, str) case id: chipset = str; break; +#include "i965_pciids.h" + default: + chipset = "Unknown Intel Chipset"; + break; } + if (!ensure_vendor_string(i965, chipset)) + return VA_STATUS_ERROR_ALLOCATION_FAILED; + i965->current_context_id = VA_INVALID_ID; return VA_STATUS_SUCCESS; @@ -4896,6 +5705,9 @@ i965_Terminate(VADriverContextP ctx) i965_sub_ops[i - 1].display_type == (ctx->display_type & VA_DISPLAY_MAJOR_MASK)) { i965_sub_ops[i - 1].terminate(ctx); } + + free(i965); + ctx->pDriverData = NULL; } return VA_STATUS_SUCCESS; @@ -4925,7 +5737,6 @@ VA_DRIVER_INIT_FUNC( VADriverContextP ctx ) vtable->vaTerminate = i965_Terminate; vtable->vaQueryConfigEntrypoints = i965_QueryConfigEntrypoints; vtable->vaQueryConfigProfiles = i965_QueryConfigProfiles; - vtable->vaQueryConfigEntrypoints = i965_QueryConfigEntrypoints; vtable->vaQueryConfigAttributes = i965_QueryConfigAttributes; vtable->vaCreateConfig = i965_CreateConfig; vtable->vaDestroyConfig = i965_DestroyConfig; @@ -4970,6 +5781,10 @@ VA_DRIVER_INIT_FUNC( VADriverContextP ctx ) vtable->vaQuerySurfaceAttributes = i965_QuerySurfaceAttributes; vtable->vaCreateSurfaces2 = i965_CreateSurfaces2; + /* 0.36.0 */ + vtable->vaAcquireBufferHandle = i965_AcquireBufferHandle; + vtable->vaReleaseBufferHandle = i965_ReleaseBufferHandle; + vtable_vpp->vaQueryVideoProcFilters = i965_QueryVideoProcFilters; vtable_vpp->vaQueryVideoProcFilterCaps = i965_QueryVideoProcFilterCaps; vtable_vpp->vaQueryVideoProcPipelineCaps = i965_QueryVideoProcPipelineCaps; diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index e694d67..629489f 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -40,8 +40,9 @@ #include "i965_mutext.h" #include "object_heap.h" #include "intel_driver.h" +#include "i965_fourcc.h" -#define I965_MAX_PROFILES 11 +#define I965_MAX_PROFILES 20 #define I965_MAX_ENTRYPOINTS 5 #define I965_MAX_CONFIG_ATTRIBUTES 10 #define I965_MAX_IMAGE_FORMATS 10 @@ -59,6 +60,16 @@ #define I965_SURFACE_FLAG_TOP_FIELD_FIRST 0x00000001 #define I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST 0x00000002 +#define DEFAULT_BRIGHTNESS 0 +#define DEFAULT_CONTRAST 50 +#define DEFAULT_HUE 0 +#define DEFAULT_SATURATION 50 + +#define ENCODER_QUALITY_RANGE 2 +#define ENCODER_DEFAULT_QUALITY 1 +#define ENCODER_HIGH_QUALITY ENCODER_DEFAULT_QUALITY +#define ENCODER_LOW_QUALITY 2 + struct i965_surface { struct object_base *base; @@ -73,6 +84,7 @@ struct i965_kernel const uint32_t (*bin)[4]; int size; dri_bo *bo; + unsigned int kernel_offset; }; struct buffer_store @@ -94,14 +106,20 @@ struct object_config #define NUM_SLICES 10 +struct codec_state_base { + uint32_t chroma_formats; +}; + struct decode_state { + struct codec_state_base base; struct buffer_store *pic_param; struct buffer_store **slice_params; struct buffer_store *iq_matrix; struct buffer_store *bit_plane; struct buffer_store *huffman_table; struct buffer_store **slice_datas; + struct buffer_store *probability_data; VASurfaceID current_render_target; int max_slice_params; int max_slice_datas; @@ -112,8 +130,12 @@ struct decode_state struct object_surface *reference_objects[16]; /* Up to 2 reference surfaces are valid for MPEG-2,*/ }; +#define SLICE_PACKED_DATA_INDEX_TYPE 0x80000000 +#define SLICE_PACKED_DATA_INDEX_MASK 0x00FFFFFF + struct encode_state { + struct codec_state_base base; struct buffer_store *seq_param; struct buffer_store *pic_param; struct buffer_store *pic_control; @@ -131,9 +153,41 @@ struct encode_state struct buffer_store **slice_params_ext; int max_slice_params_ext; int num_slice_params_ext; + + /* Check the user-configurable packed_header attribute. + * Currently it is mainly used to check whether the packed slice_header data + * is provided by user or the driver. + * TBD: It will check for the packed SPS/PPS/MISC/RAWDATA and so on. + */ + unsigned int packed_header_flag; + /* For the packed data that needs to be inserted into video clip */ + /* currently it is mainly to track packed raw data and packed slice_header data. */ + struct buffer_store **packed_header_params_ext; + int max_packed_header_params_ext; + int num_packed_header_params_ext; + struct buffer_store **packed_header_data_ext; + int max_packed_header_data_ext; + int num_packed_header_data_ext; + + /* the index of current slice */ + int slice_index; + /* the array is determined by max_slice_params_ext */ + int max_slice_num; + /* This is to store the first index of packed data for one slice */ + int *slice_rawdata_index; + /* This is to store the number of packed data for one slice. + * Both packed rawdata and slice_header data are tracked by this + * this variable. That is to say: When one packed slice_header is parsed, + * this variable will also be increased. + */ + int *slice_rawdata_count; + + /* This is to store the index of packed slice header for one slice */ + int *slice_header_index; + int last_packed_header_type; - struct buffer_store *misc_param[8]; + struct buffer_store *misc_param[16]; VASurfaceID current_render_target; struct object_surface *input_yuv_object; @@ -144,6 +198,7 @@ struct encode_state struct proc_state { + struct codec_state_base base; struct buffer_store *pipeline_param; VASurfaceID current_render_target; @@ -155,6 +210,7 @@ struct proc_state union codec_state { + struct codec_state_base base; struct decode_state decode; struct encode_state encode; struct proc_state proc; @@ -186,12 +242,8 @@ struct object_context }; #define SURFACE_REFERENCED (1 << 0) -#define SURFACE_DISPLAYED (1 << 1) #define SURFACE_DERIVED (1 << 2) -#define SURFACE_REF_DIS_MASK ((SURFACE_REFERENCED) | \ - (SURFACE_DISPLAYED)) #define SURFACE_ALL_MASK ((SURFACE_REFERENCED) | \ - (SURFACE_DISPLAYED) | \ (SURFACE_DERIVED)) struct object_surface @@ -202,15 +254,16 @@ struct object_surface struct object_subpic *obj_subpic[I965_MAX_SUBPIC_SUM]; unsigned int subpic_render_idx; - int width; - int height; + int width; /* the pitch of plane 0 in bytes in horizontal direction */ + int height; /* the pitch of plane 0 in bytes in vertical direction */ int size; - int orig_width; - int orig_height; + int orig_width; /* the width of plane 0 in pixels */ + int orig_height; /* the height of plane 0 in pixels */ int flags; unsigned int fourcc; dri_bo *bo; VAImageID locked_image_id; + VAImageID derived_image_id; void (*free_private_data)(void **data); void *private_data; unsigned int subsampling; @@ -221,6 +274,10 @@ struct object_surface int cb_cr_width; int cb_cr_height; int cb_cr_pitch; + /* user specified attributes see: VASurfaceAttribExternalBuffers/VA_SURFACE_ATTRIB_MEM_TYPE_VA */ + uint32_t user_disable_tiling : 1; + uint32_t user_h_stride_set : 1; + uint32_t user_v_stride_set : 1; }; struct object_buffer @@ -231,6 +288,10 @@ struct object_buffer int num_elements; int size_element; VABufferType type; + + /* Export state */ + unsigned int export_refcount; + VABufferInfo export_state; }; struct object_image @@ -258,13 +319,34 @@ struct object_subpic unsigned int flags; }; +#define I965_RING_NULL 0 +#define I965_RING_BSD 1 +#define I965_RING_BLT 2 +#define I965_RING_VEBOX 3 + +struct i965_filter +{ + VAProcFilterType type; + int ring; +}; + struct hw_codec_info { struct hw_context *(*dec_hw_context_init)(VADriverContextP, struct object_config *); struct hw_context *(*enc_hw_context_init)(VADriverContextP, struct object_config *); struct hw_context *(*proc_hw_context_init)(VADriverContextP, struct object_config *); + bool (*render_init)(VADriverContextP); + void (*post_processing_context_init)(VADriverContextP, void *, struct intel_batchbuffer *); + void (*preinit_hw_codec)(VADriverContextP, struct hw_codec_info *); + int max_width; int max_height; + int min_linear_wpitch; + int min_linear_hpitch; + + unsigned int h264_mvc_dec_profiles; + unsigned int h264_dec_chroma_formats; + unsigned int jpeg_dec_chroma_formats; unsigned int has_mpeg2_decoding:1; unsigned int has_mpeg2_encoding:1; @@ -280,9 +362,12 @@ struct hw_codec_info unsigned int has_tiled_surface:1; unsigned int has_di_motion_adptive:1; unsigned int has_di_motion_compensated:1; + unsigned int has_vp8_decoding:1; + unsigned int has_vp8_encoding:1; + unsigned int has_h264_mvc_encoding:1; unsigned int num_filters; - VAProcFilterType filters[VAProcFilterCount]; + struct i965_filter filters[VAProcFilterCount]; }; @@ -302,6 +387,7 @@ struct i965_driver_data _I965Mutex render_mutex; _I965Mutex pp_mutex; struct intel_batchbuffer *batch; + struct intel_batchbuffer *pp_batch; struct i965_render_state render_state; void *pp_context; char va_vendor[256]; @@ -309,6 +395,10 @@ struct i965_driver_data VADisplayAttribute *display_attributes; unsigned int num_display_attributes; VADisplayAttribute *rotation_attrib; + VADisplayAttribute *brightness_attrib; + VADisplayAttribute *contrast_attrib; + VADisplayAttribute *hue_attrib; + VADisplayAttribute *saturation_attrib; VAContextID current_context_id; /* VA/DRI (X11) specific data */ @@ -344,7 +434,7 @@ i965_driver_data(VADriverContextP ctx) return (struct i965_driver_data *)(ctx->pDriverData); } -void +VAStatus i965_check_alloc_surface_bo(VADriverContextP ctx, struct object_surface *obj_surface, int tiled, @@ -355,8 +445,9 @@ int va_enc_packed_type_to_idx(int packed_type); /* reserve 2 byte for internal using */ -#define CODED_H264 0 -#define CODED_MPEG2 1 +#define CODEC_H264 0 +#define CODEC_MPEG2 1 +#define CODEC_H264_MVC 2 #define H264_DELIMITER0 0x00 #define H264_DELIMITER1 0x00 @@ -385,8 +476,22 @@ extern VAStatus i965_MapBuffer(VADriverContextP ctx, extern VAStatus i965_UnmapBuffer(VADriverContextP ctx, VABufferID buf_id); +extern VAStatus i965_DestroySurfaces(VADriverContextP ctx, + VASurfaceID *surface_list, + int num_surfaces); + +extern VAStatus i965_CreateSurfaces(VADriverContextP ctx, + int width, + int height, + int format, + int num_surfaces, + VASurfaceID *surfaces); + #define I965_SURFACE_MEM_NATIVE 0 #define I965_SURFACE_MEM_GEM_FLINK 1 #define I965_SURFACE_MEM_DRM_PRIME 2 +void +i965_destroy_surface_storage(struct object_surface *obj_surface); + #endif /* _I965_DRV_VIDEO_H_ */ diff --git a/src/i965_encoder.c b/src/i965_encoder.c index 4384619..f66d889 100644 --- a/src/i965_encoder.c +++ b/src/i965_encoder.c @@ -44,18 +44,6 @@ extern Bool gen6_mfc_context_init(VADriverContextP ctx, struct intel_encoder_con extern Bool gen6_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context); extern Bool gen7_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context); -VAStatus -i965_DestroySurfaces(VADriverContextP ctx, - VASurfaceID *surface_list, - int num_surfaces); -VAStatus -i965_CreateSurfaces(VADriverContextP ctx, - int width, - int height, - int format, - int num_surfaces, - VASurfaceID *surfaces); - static VAStatus intel_encoder_check_yuv_surface(VADriverContextP ctx, VAProfile profile, @@ -81,7 +69,7 @@ intel_encoder_check_yuv_surface(VADriverContextP ctx, if (!obj_surface || !obj_surface->bo) return VA_STATUS_ERROR_INVALID_PARAMETER; - if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) { + if (obj_surface->fourcc == VA_FOURCC_NV12) { unsigned int tiling = 0, swizzle = 0; dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle); @@ -116,7 +104,7 @@ intel_encoder_check_yuv_surface(VADriverContextP ctx, obj_surface = SURFACE(encoder_context->input_yuv_surface); encode_state->input_yuv_object = obj_surface; assert(obj_surface); - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); dst_surface.base = (struct object_base *)obj_surface; dst_surface.type = I965_SURFACE_TYPE_SURFACE; @@ -135,6 +123,30 @@ intel_encoder_check_yuv_surface(VADriverContextP ctx, } static VAStatus +intel_encoder_check_misc_parameter(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + + if (encode_state->misc_param[VAEncMiscParameterTypeQualityLevel] && + encode_state->misc_param[VAEncMiscParameterTypeQualityLevel]->buffer) { + VAEncMiscParameterBuffer* pMiscParam = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeQualityLevel]->buffer; + VAEncMiscParameterBufferQualityLevel* param_quality_level = (VAEncMiscParameterBufferQualityLevel*)pMiscParam->data; + encoder_context->quality_level = param_quality_level->quality_level; + + if (encoder_context->quality_level == 0) + encoder_context->quality_level = ENCODER_DEFAULT_QUALITY; + else if (encoder_context->quality_level > encoder_context->quality_range) + goto error; + } + + return VA_STATUS_SUCCESS; + +error: + return VA_STATUS_ERROR_INVALID_PARAMETER; +} + +static VAStatus intel_encoder_check_avc_parameter(VADriverContextP ctx, struct encode_state *encode_state, struct intel_encoder_context *encoder_context) @@ -267,9 +279,11 @@ intel_encoder_sanity_check_input(VADriverContextP ctx, VAStatus vaStatus; switch (profile) { - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: + case VAProfileH264MultiviewHigh: + case VAProfileH264StereoHigh: vaStatus = intel_encoder_check_avc_parameter(ctx, encode_state, encoder_context); break; @@ -288,6 +302,9 @@ intel_encoder_sanity_check_input(VADriverContextP ctx, vaStatus = intel_encoder_check_yuv_surface(ctx, profile, encode_state, encoder_context); + if (vaStatus == VA_STATUS_SUCCESS) + vaStatus = intel_encoder_check_misc_parameter(ctx, encode_state, encoder_context); + out: return vaStatus; } @@ -345,11 +362,43 @@ intel_enc_hw_context_init(VADriverContextP ctx, encoder_context->input_yuv_surface = VA_INVALID_SURFACE; encoder_context->is_tmp_id = 0; encoder_context->rate_control_mode = VA_RC_NONE; - encoder_context->profile = obj_config->profile; + encoder_context->quality_level = ENCODER_DEFAULT_QUALITY; + encoder_context->quality_range = 1; + + switch (obj_config->profile) { + case VAProfileMPEG2Simple: + case VAProfileMPEG2Main: + encoder_context->codec = CODEC_MPEG2; + break; + + case VAProfileH264ConstrainedBaseline: + case VAProfileH264Main: + case VAProfileH264High: + encoder_context->codec = CODEC_H264; + encoder_context->quality_range = ENCODER_QUALITY_RANGE; + break; + + case VAProfileH264StereoHigh: + case VAProfileH264MultiviewHigh: + encoder_context->codec = CODEC_H264_MVC; + break; + + default: + /* Never get here */ + assert(0); + break; + } for (i = 0; i < obj_config->num_attribs; i++) { if (obj_config->attrib_list[i].type == VAConfigAttribRateControl) { encoder_context->rate_control_mode = obj_config->attrib_list[i].value; + + if (encoder_context->codec == CODEC_MPEG2 && + encoder_context->rate_control_mode & VA_RC_CBR) { + WARN_ONCE("Don't support CBR for MPEG-2 encoding\n"); + encoder_context->rate_control_mode &= ~VA_RC_CBR; + } + break; } } @@ -376,6 +425,7 @@ gen6_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config) struct hw_context * gen7_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config) { + return intel_enc_hw_context_init(ctx, obj_config, gen7_vme_context_init, gen7_mfc_context_init); } @@ -384,3 +434,10 @@ gen75_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config { return intel_enc_hw_context_init(ctx, obj_config, gen75_vme_context_init, gen75_mfc_context_init); } + +struct hw_context * +gen8_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config) +{ + return intel_enc_hw_context_init(ctx, obj_config, gen8_vme_context_init, gen8_mfc_context_init); +} + diff --git a/src/i965_encoder.h b/src/i965_encoder.h index d9d6511..20d49fc 100644 --- a/src/i965_encoder.h +++ b/src/i965_encoder.h @@ -39,10 +39,12 @@ struct intel_encoder_context { struct hw_context base; - VAProfile profile; + int codec; VASurfaceID input_yuv_surface; int is_tmp_id; unsigned int rate_control_mode; + unsigned int quality_level; + unsigned int quality_range; void *vme_context; void *mfc_context; void (*vme_context_destroy)(void *vme_context); @@ -62,6 +64,8 @@ struct intel_encoder_context extern struct hw_context * gen75_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config); +extern struct hw_context * +gen8_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config); #endif /* _I965_ENCODER_H_ */ diff --git a/src/i965_encoder_utils.c b/src/i965_encoder_utils.c index 7f6f768..abd25b4 100644 --- a/src/i965_encoder_utils.c +++ b/src/i965_encoder_utils.c @@ -233,13 +233,22 @@ slice_header(avc_bitstream *bs, /* slice type */ if (IS_P_SLICE(slice_param->slice_type)) { - avc_bitstream_put_ui(bs, 0, 1); /* num_ref_idx_active_override_flag: 0 */ + avc_bitstream_put_ui(bs, slice_param->num_ref_idx_active_override_flag, 1); /* num_ref_idx_active_override_flag: */ + + if (slice_param->num_ref_idx_active_override_flag) + avc_bitstream_put_ue(bs, slice_param->num_ref_idx_l0_active_minus1); /* ref_pic_list_reordering */ avc_bitstream_put_ui(bs, 0, 1); /* ref_pic_list_reordering_flag_l0: 0 */ } else if (IS_B_SLICE(slice_param->slice_type)) { avc_bitstream_put_ui(bs, slice_param->direct_spatial_mv_pred_flag, 1); /* direct_spatial_mv_pred: 1 */ - avc_bitstream_put_ui(bs, 0, 1); /* num_ref_idx_active_override_flag: 0 */ + + avc_bitstream_put_ui(bs, slice_param->num_ref_idx_active_override_flag, 1); /* num_ref_idx_active_override_flag: */ + + if (slice_param->num_ref_idx_active_override_flag) { + avc_bitstream_put_ue(bs, slice_param->num_ref_idx_l0_active_minus1); + avc_bitstream_put_ue(bs, slice_param->num_ref_idx_l1_active_minus1); + } /* ref_pic_list_reordering */ avc_bitstream_put_ui(bs, 0, 1); /* ref_pic_list_reordering_flag_l0: 0 */ @@ -298,6 +307,7 @@ build_avc_slice_header(VAEncSequenceParameterBufferH264 *sps_param, { avc_bitstream bs; int is_idr = !!pic_param->pic_fields.bits.idr_pic_flag; + int is_ref = !!pic_param->pic_fields.bits.reference_pic_flag; avc_bitstream_start(&bs); nal_start_code_prefix(&bs); @@ -305,10 +315,12 @@ build_avc_slice_header(VAEncSequenceParameterBufferH264 *sps_param, if (IS_I_SLICE(slice_param->slice_type)) { nal_header(&bs, NAL_REF_IDC_HIGH, is_idr ? NAL_IDR : NAL_NON_IDR); } else if (IS_P_SLICE(slice_param->slice_type)) { - nal_header(&bs, NAL_REF_IDC_MEDIUM, is_idr ? NAL_IDR : NAL_NON_IDR); + assert(!is_idr); + nal_header(&bs, NAL_REF_IDC_MEDIUM, NAL_NON_IDR); } else { assert(IS_B_SLICE(slice_param->slice_type)); - nal_header(&bs, NAL_REF_IDC_NONE, is_idr ? NAL_IDR : NAL_NON_IDR); + assert(!is_idr); + nal_header(&bs, is_ref ? NAL_REF_IDC_LOW : NAL_REF_IDC_NONE, NAL_NON_IDR); } slice_header(&bs, sps_param, pic_param, slice_param); diff --git a/src/i965_fourcc.h b/src/i965_fourcc.h new file mode 100644 index 0000000..3a9f120 --- /dev/null +++ b/src/i965_fourcc.h @@ -0,0 +1,68 @@ +#ifndef _I965_FOURCC_H_ +#define _I965_FOURCC_H_ + +#ifndef VA_FOURCC_YV16 +#define VA_FOURCC_YV16 VA_FOURCC('Y','V','1','6') +#endif + +#ifndef VA_FOURCC_I420 +#define VA_FOURCC_I420 VA_FOURCC('I','4','2','0') +#endif + +/* + * VA_FOURCC_IA44 is an exception because the va.h already + * defines the AI44 as VA_FOURCC('I', 'A', '4', '4'). + */ +#ifndef VA_FOURCC_IA44 +#define VA_FOURCC_IA44 VA_FOURCC('A','I','4','4') +#endif + +#ifndef VA_FOURCC_IA88 +#define VA_FOURCC_IA88 VA_FOURCC('I','A','8','8') +#endif + +#ifndef VA_FOURCC_AI88 +#define VA_FOURCC_AI88 VA_FOURCC('A','I','8','8') +#endif + +#ifndef VA_FOURCC_IMC1 +#define VA_FOURCC_IMC1 VA_FOURCC('I','M','C','1') +#endif + +#ifndef VA_FOURCC_YVY2 +#define VA_FOURCC_YVY2 VA_FOURCC('Y','V','Y','2') +#endif + +#define I965_MAX_PLANES 4 +#define I965_MAX_COMONENTS 4 + +#define I965_COLOR_YUV 0 +#define I965_COLOR_RGB 1 +#define I965_COLOR_INDEX 2 + +typedef struct { + uint8_t plane; /* the plane which the pixel belongs to */ + uint8_t offset; /* bits offset within a pixel in the plane */ +} i965_component_info; + +typedef struct { + uint32_t fourcc; /* fourcc */ + uint32_t format; /* 0: YUV, 1: RGB, 2: Indexed format */ + uint32_t subsampling; /* Sub sampling */ + uint8_t flag; /* 1: only supported by vaCreateSurfaces(), 2: only supported by vaCreateImage(), 3: both */ + uint8_t hfactor; /* horizontal sampling factor */ + uint8_t vfactor; /* vertical sampling factor */ + uint8_t num_planes; /* number of planes */ + uint8_t bpp[I965_MAX_PLANES]; /* bits per pixel within a plane */ + uint8_t num_components; /* number of components */ + /* + * Components in the array are ordered in Y, U, V, A (up to 4 components) + * for YUV formats, R, G, B, A (up to 4 components) for RGB formats and + * I, A (2 components) for indexed formats + */ + i965_component_info components[I965_MAX_COMONENTS]; +} i965_fourcc_info; + +extern const i965_fourcc_info *get_fourcc_info(unsigned int); + +#endif /* _I965_FOURCC_H_ */ diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c index 4c6469b..3386b09 100644 --- a/src/i965_gpe_utils.c +++ b/src/i965_gpe_utils.c @@ -296,6 +296,44 @@ gen7_gpe_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling } static void +gen8_gpe_set_surface_tiling(struct gen8_surface_state *ss, unsigned int tiling) +{ + switch (tiling) { + case I915_TILING_NONE: + ss->ss0.tiled_surface = 0; + ss->ss0.tile_walk = 0; + break; + case I915_TILING_X: + ss->ss0.tiled_surface = 1; + ss->ss0.tile_walk = I965_TILEWALK_XMAJOR; + break; + case I915_TILING_Y: + ss->ss0.tiled_surface = 1; + ss->ss0.tile_walk = I965_TILEWALK_YMAJOR; + break; + } +} + +static void +gen8_gpe_set_surface2_tiling(struct gen8_surface_state2 *ss, unsigned int tiling) +{ + switch (tiling) { + case I915_TILING_NONE: + ss->ss2.tiled_surface = 0; + ss->ss2.tile_walk = 0; + break; + case I915_TILING_X: + ss->ss2.tiled_surface = 1; + ss->ss2.tile_walk = I965_TILEWALK_XMAJOR; + break; + case I915_TILING_Y: + ss->ss2.tiled_surface = 1; + ss->ss2.tile_walk = I965_TILEWALK_YMAJOR; + break; + } +} + +static void i965_gpe_set_surface2_state(VADriverContextP ctx, struct object_surface *obj_surface, struct i965_surface_state2 *ss) @@ -304,7 +342,7 @@ i965_gpe_set_surface2_state(VADriverContextP ctx, unsigned int tiling, swizzle; assert(obj_surface->bo); - assert(obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')); + assert(obj_surface->fourcc == VA_FOURCC_NV12); dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle); w = obj_surface->orig_width; @@ -467,7 +505,7 @@ gen7_gpe_set_surface2_state(VADriverContextP ctx, unsigned int tiling, swizzle; assert(obj_surface->bo); - assert(obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')); + assert(obj_surface->fourcc == VA_FOURCC_NV12); dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle); w = obj_surface->orig_width; @@ -610,7 +648,7 @@ gen75_gpe_media_chroma_surface_setup(VADriverContextP ctx, dri_bo *bo; int cbcr_offset; - assert(obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')); + assert(obj_surface->fourcc == VA_FOURCC_NV12); bo = gpe_context->surface_state_binding_table.bo; dri_bo_map(bo, True); assert(bo->virtual); @@ -677,3 +715,495 @@ gen7_gpe_buffer_suface_setup(VADriverContextP ctx, *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset; dri_bo_unmap(bo); } + +static void +gen8_gpe_set_surface2_state(VADriverContextP ctx, + struct object_surface *obj_surface, + struct gen8_surface_state2 *ss) +{ + int w, h, w_pitch; + unsigned int tiling, swizzle; + + assert(obj_surface->bo); + assert(obj_surface->fourcc == VA_FOURCC_NV12); + + dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle); + w = obj_surface->orig_width; + h = obj_surface->orig_height; + w_pitch = obj_surface->width; + + memset(ss, 0, sizeof(*ss)); + /* ss0 */ + ss->ss6.base_addr = obj_surface->bo->offset; + /* ss1 */ + ss->ss1.cbcr_pixel_offset_v_direction = 2; + ss->ss1.width = w - 1; + ss->ss1.height = h - 1; + /* ss2 */ + ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8; + ss->ss2.interleave_chroma = 1; + ss->ss2.pitch = w_pitch - 1; + ss->ss2.half_pitch_for_chroma = 0; + gen8_gpe_set_surface2_tiling(ss, tiling); + /* ss3: UV offset for interleave mode */ + ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset; + ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset; +} + +void +gen8_gpe_surface2_setup(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct object_surface *obj_surface, + unsigned long binding_table_offset, + unsigned long surface_state_offset) +{ + struct gen8_surface_state2 *ss; + dri_bo *bo; + + bo = gpe_context->surface_state_binding_table.bo; + dri_bo_map(bo, 1); + assert(bo->virtual); + + ss = (struct gen8_surface_state2 *)((char *)bo->virtual + surface_state_offset); + gen8_gpe_set_surface2_state(ctx, obj_surface, ss); + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_RENDER, 0, + 0, + surface_state_offset + offsetof(struct gen8_surface_state2, ss6), + obj_surface->bo); + + *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset; + dri_bo_unmap(bo); +} + +static void +gen8_gpe_set_media_rw_surface_state(VADriverContextP ctx, + struct object_surface *obj_surface, + struct gen8_surface_state *ss) +{ + int w, h, w_pitch; + unsigned int tiling, swizzle; + + dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle); + w = obj_surface->orig_width; + h = obj_surface->orig_height; + w_pitch = obj_surface->width; + + memset(ss, 0, sizeof(*ss)); + /* ss0 */ + ss->ss0.surface_type = I965_SURFACE_2D; + ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM; + /* ss1 */ + ss->ss8.base_addr = obj_surface->bo->offset; + /* ss2 */ + ss->ss2.width = w / 4 - 1; /* in DWORDs for media read & write message */ + ss->ss2.height = h - 1; + /* ss3 */ + ss->ss3.pitch = w_pitch - 1; + gen8_gpe_set_surface_tiling(ss, tiling); +} + +static void +gen8_gpe_set_media_chroma_surface_state(VADriverContextP ctx, + struct object_surface *obj_surface, + struct gen8_surface_state *ss) +{ + int w, h, w_pitch; + unsigned int tiling, swizzle; + int cbcr_offset; + + dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle); + w = obj_surface->orig_width; + h = obj_surface->orig_height; + w_pitch = obj_surface->width; + + cbcr_offset = obj_surface->height * obj_surface->width; + memset(ss, 0, sizeof(*ss)); + /* ss0 */ + ss->ss0.surface_type = I965_SURFACE_2D; + ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM; + /* ss1 */ + ss->ss8.base_addr = obj_surface->bo->offset + cbcr_offset; + /* ss2 */ + ss->ss2.width = w / 4 - 1; /* in DWORDs for media read & write message */ + ss->ss2.height = (obj_surface->height / 2) -1; + /* ss3 */ + ss->ss3.pitch = w_pitch - 1; + gen8_gpe_set_surface_tiling(ss, tiling); +} + +void +gen8_gpe_media_rw_surface_setup(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct object_surface *obj_surface, + unsigned long binding_table_offset, + unsigned long surface_state_offset) +{ + struct gen8_surface_state *ss; + dri_bo *bo; + + bo = gpe_context->surface_state_binding_table.bo; + dri_bo_map(bo, True); + assert(bo->virtual); + + ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset); + gen8_gpe_set_media_rw_surface_state(ctx, obj_surface, ss); + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_RENDER, 0, + 0, + surface_state_offset + offsetof(struct gen8_surface_state, ss8), + obj_surface->bo); + + *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset; + dri_bo_unmap(bo); +} + +void +gen8_gpe_media_chroma_surface_setup(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct object_surface *obj_surface, + unsigned long binding_table_offset, + unsigned long surface_state_offset) +{ + struct gen8_surface_state *ss; + dri_bo *bo; + int cbcr_offset; + + assert(obj_surface->fourcc == VA_FOURCC_NV12); + bo = gpe_context->surface_state_binding_table.bo; + dri_bo_map(bo, True); + assert(bo->virtual); + + cbcr_offset = obj_surface->height * obj_surface->width; + ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset); + gen8_gpe_set_media_chroma_surface_state(ctx, obj_surface, ss); + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_RENDER, 0, + cbcr_offset, + surface_state_offset + offsetof(struct gen8_surface_state, ss8), + obj_surface->bo); + + *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset; + dri_bo_unmap(bo); +} + + +static void +gen8_gpe_set_buffer_surface_state(VADriverContextP ctx, + struct i965_buffer_surface *buffer_surface, + struct gen8_surface_state *ss) +{ + int num_entries; + + assert(buffer_surface->bo); + num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch; + + memset(ss, 0, sizeof(*ss)); + /* ss0 */ + ss->ss0.surface_type = I965_SURFACE_BUFFER; + /* ss1 */ + ss->ss8.base_addr = buffer_surface->bo->offset; + /* ss2 */ + ss->ss2.width = ((num_entries - 1) & 0x7f); + ss->ss2.height = (((num_entries - 1) >> 7) & 0x3fff); + /* ss3 */ + ss->ss3.depth = (((num_entries - 1) >> 21) & 0x3f); + ss->ss3.pitch = buffer_surface->pitch - 1; +} + +void +gen8_gpe_buffer_suface_setup(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct i965_buffer_surface *buffer_surface, + unsigned long binding_table_offset, + unsigned long surface_state_offset) +{ + struct gen8_surface_state *ss; + dri_bo *bo; + + bo = gpe_context->surface_state_binding_table.bo; + dri_bo_map(bo, 1); + assert(bo->virtual); + + ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset); + gen8_gpe_set_buffer_surface_state(ctx, buffer_surface, ss); + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0, + surface_state_offset + offsetof(struct gen8_surface_state, ss8), + buffer_surface->bo); + + *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset; + dri_bo_unmap(bo); +} + +static void +gen8_gpe_state_base_address(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct intel_batchbuffer *batch) +{ + BEGIN_BATCH(batch, 16); + + OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 14); + + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //General State Base Address + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + + /*DW4 Surface state base address */ + OUT_RELOC(batch, gpe_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ + OUT_BATCH(batch, 0); + + /*DW6. Dynamic state base address */ + if (gpe_context->dynamic_state.bo) + OUT_RELOC(batch, gpe_context->dynamic_state.bo, + I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER, + 0, BASE_ADDRESS_MODIFY); + else + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + + OUT_BATCH(batch, 0); + + /*DW8. Indirect Object base address */ + if (gpe_context->indirect_state.bo) + OUT_RELOC(batch, gpe_context->indirect_state.bo, + I915_GEM_DOMAIN_SAMPLER, + 0, BASE_ADDRESS_MODIFY); + else + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + + OUT_BATCH(batch, 0); + + /*DW10. Instruct base address */ + if (gpe_context->instruction_state.bo) + OUT_RELOC(batch, gpe_context->instruction_state.bo, + I915_GEM_DOMAIN_INSTRUCTION, + 0, BASE_ADDRESS_MODIFY); + else + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); + + OUT_BATCH(batch, 0); + + /* DW12. Size limitation */ + OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //General State Access Upper Bound + OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Dynamic State Access Upper Bound + OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Indirect Object Access Upper Bound + OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Instruction Access Upper Bound + + /* + OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //LLC Coherent Base Address + OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY ); //LLC Coherent Upper Bound + */ + + ADVANCE_BATCH(batch); +} + +static void +gen8_gpe_vfe_state(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct intel_batchbuffer *batch) +{ + + BEGIN_BATCH(batch, 9); + + OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (9 - 2)); + /* Scratch Space Base Pointer and Space */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + + OUT_BATCH(batch, + gpe_context->vfe_state.max_num_threads << 16 | /* Maximum Number of Threads */ + gpe_context->vfe_state.num_urb_entries << 8 | /* Number of URB Entries */ + gpe_context->vfe_state.gpgpu_mode << 2); /* MEDIA Mode */ + OUT_BATCH(batch, 0); /* Debug: Object ID */ + OUT_BATCH(batch, + gpe_context->vfe_state.urb_entry_size << 16 | /* URB Entry Allocation Size */ + gpe_context->vfe_state.curbe_allocation_size); /* CURBE Allocation Size */ + + /* the vfe_desc5/6/7 will decide whether the scoreboard is used. */ + OUT_BATCH(batch, gpe_context->vfe_desc5.dword); + OUT_BATCH(batch, gpe_context->vfe_desc6.dword); + OUT_BATCH(batch, gpe_context->vfe_desc7.dword); + + ADVANCE_BATCH(batch); + +} + + +static void +gen8_gpe_curbe_load(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct intel_batchbuffer *batch) +{ + BEGIN_BATCH(batch, 4); + + OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, gpe_context->curbe_size); + OUT_BATCH(batch, gpe_context->curbe_offset); + + ADVANCE_BATCH(batch); +} + +static void +gen8_gpe_idrt(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct intel_batchbuffer *batch) +{ + BEGIN_BATCH(batch, 6); + + OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH); + OUT_BATCH(batch, 0); + + OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | (4 - 2)); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, gpe_context->idrt_size); + OUT_BATCH(batch, gpe_context->idrt_offset); + + ADVANCE_BATCH(batch); +} + + +void +gen8_gpe_pipeline_setup(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct intel_batchbuffer *batch) +{ + intel_batchbuffer_emit_mi_flush(batch); + + i965_gpe_select(ctx, gpe_context, batch); + gen8_gpe_state_base_address(ctx, gpe_context, batch); + gen8_gpe_vfe_state(ctx, gpe_context, batch); + gen8_gpe_curbe_load(ctx, gpe_context, batch); + gen8_gpe_idrt(ctx, gpe_context, batch); +} + +void +gen8_gpe_context_init(VADriverContextP ctx, + struct i965_gpe_context *gpe_context) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + dri_bo *bo; + int bo_size; + unsigned int start_offset, end_offset; + + dri_bo_unreference(gpe_context->surface_state_binding_table.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "surface state & binding table", + gpe_context->surface_state_binding_table.length, + 4096); + assert(bo); + gpe_context->surface_state_binding_table.bo = bo; + + bo_size = gpe_context->idrt_size + gpe_context->curbe_size + gpe_context->sampler_size + 192; + dri_bo_unreference(gpe_context->dynamic_state.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "surface state & binding table", + bo_size, + 4096); + assert(bo); + gpe_context->dynamic_state.bo = bo; + gpe_context->dynamic_state.bo_size = bo_size; + + end_offset = 0; + gpe_context->dynamic_state.end_offset = 0; + + /* Constant buffer offset */ + start_offset = ALIGN(end_offset, 64); + gpe_context->curbe_offset = start_offset; + end_offset = start_offset + gpe_context->curbe_size; + + /* Interface descriptor offset */ + start_offset = ALIGN(end_offset, 64); + gpe_context->idrt_offset = start_offset; + end_offset = start_offset + gpe_context->idrt_size; + + /* Sampler state offset */ + start_offset = ALIGN(end_offset, 64); + gpe_context->sampler_offset = start_offset; + end_offset = start_offset + gpe_context->sampler_size; + + /* update the end offset of dynamic_state */ + gpe_context->dynamic_state.end_offset = end_offset; +} + + +void +gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context) +{ + int i; + + dri_bo_unreference(gpe_context->surface_state_binding_table.bo); + gpe_context->surface_state_binding_table.bo = NULL; + + dri_bo_unreference(gpe_context->instruction_state.bo); + gpe_context->instruction_state.bo = NULL; + + dri_bo_unreference(gpe_context->dynamic_state.bo); + gpe_context->dynamic_state.bo = NULL; + + dri_bo_unreference(gpe_context->indirect_state.bo); + gpe_context->indirect_state.bo = NULL; + +} + + +void +gen8_gpe_load_kernels(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct i965_kernel *kernel_list, + unsigned int num_kernels) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + int i, kernel_size; + unsigned int kernel_offset, end_offset; + unsigned char *kernel_ptr; + struct i965_kernel *kernel; + + assert(num_kernels <= MAX_GPE_KERNELS); + memcpy(gpe_context->kernels, kernel_list, sizeof(*kernel_list) * num_kernels); + gpe_context->num_kernels = num_kernels; + + kernel_size = num_kernels * 64; + for (i = 0; i < num_kernels; i++) { + kernel = &gpe_context->kernels[i]; + + kernel_size += kernel->size; + } + + gpe_context->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr, + "kernel shader", + kernel_size, + 0x1000); + if (gpe_context->instruction_state.bo == NULL) { + WARN_ONCE("failure to allocate the buffer space for kernel shader\n"); + return; + } + + assert(gpe_context->instruction_state.bo); + + gpe_context->instruction_state.bo_size = kernel_size; + gpe_context->instruction_state.end_offset = 0; + end_offset = 0; + + dri_bo_map(gpe_context->instruction_state.bo, 1); + kernel_ptr = (unsigned char *)(gpe_context->instruction_state.bo->virtual); + for (i = 0; i < num_kernels; i++) { + kernel_offset = ALIGN(end_offset, 64); + kernel = &gpe_context->kernels[i]; + kernel->kernel_offset = kernel_offset; + + if (kernel->size) { + memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size); + + end_offset = kernel_offset + kernel->size; + } + } + + gpe_context->instruction_state.end_offset = end_offset; + + dri_bo_unmap(gpe_context->instruction_state.bo); + + return; +} + diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h index 72d7de8..2331152 100644 --- a/src/i965_gpe_utils.h +++ b/src/i965_gpe_utils.h @@ -114,6 +114,29 @@ struct i965_gpe_context unsigned int num_kernels; struct i965_kernel kernels[MAX_GPE_KERNELS]; + + struct { + dri_bo *bo; + int bo_size; + unsigned int end_offset; + } instruction_state; + + struct { + dri_bo *bo; + } indirect_state; + + struct { + dri_bo *bo; + int bo_size; + unsigned int end_offset; + } dynamic_state; + + unsigned int sampler_offset; + int sampler_size; + unsigned int idrt_offset; + int idrt_size; + unsigned int curbe_offset; + int curbe_size; }; void i965_gpe_context_destroy(struct i965_gpe_context *gpe_context); @@ -161,4 +184,39 @@ void gen75_gpe_media_chroma_surface_setup(VADriverContextP ctx, struct object_surface *obj_surface, unsigned long binding_table_offset, unsigned long surface_state_offset); + +extern void gen8_gpe_surface2_setup(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct object_surface *obj_surface, + unsigned long binding_table_offset, + unsigned long surface_state_offset); +extern void gen8_gpe_media_rw_surface_setup(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct object_surface *obj_surface, + unsigned long binding_table_offset, + unsigned long surface_state_offset); +extern void gen8_gpe_buffer_suface_setup(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct i965_buffer_surface *buffer_surface, + unsigned long binding_table_offset, + unsigned long surface_state_offset); +extern void gen8_gpe_media_chroma_surface_setup(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct object_surface *obj_surface, + unsigned long binding_table_offset, + unsigned long surface_state_offset); + +void gen8_gpe_pipeline_setup(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct intel_batchbuffer *batch); + + +void gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context); +void gen8_gpe_context_init(VADriverContextP ctx, + struct i965_gpe_context *gpe_context); + +void gen8_gpe_load_kernels(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + struct i965_kernel *kernel_list, + unsigned int num_kernels); #endif /* _I965_GPE_UTILS_H_ */ diff --git a/src/i965_media.c b/src/i965_media.c index d734a8e..644104e 100644 --- a/src/i965_media.c +++ b/src/i965_media.c @@ -60,7 +60,7 @@ i965_media_urb_layout(VADriverContextP ctx, struct i965_media_context *media_con unsigned int vfe_fence, cs_fence; vfe_fence = media_context->urb.cs_start; - cs_fence = URB_SIZE((&i965->intel)); + cs_fence = i965->intel.device_info->urb_size; BEGIN_BATCH(batch, 3); OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1); @@ -77,7 +77,7 @@ i965_media_state_base_address(VADriverContextP ctx, struct i965_media_context *m struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch = media_context->base.batch; - if (IS_IRONLAKE(i965->intel.device_id)) { + if (IS_IRONLAKE(i965->intel.device_info)) { BEGIN_BATCH(batch, 8); OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6); OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); @@ -257,7 +257,7 @@ i965_media_decode_init(VADriverContextP ctx, i965_media_mpeg2_decode_init(ctx, decode_state, media_context); break; - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: i965_media_h264_decode_init(ctx, decode_state, media_context); @@ -348,7 +348,7 @@ g4x_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config) i965_media_mpeg2_dec_context_init(ctx, media_context); break; - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: i965_media_h264_dec_context_init(ctx, media_context); @@ -381,7 +381,7 @@ ironlake_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_con i965_media_mpeg2_dec_context_init(ctx, media_context); break; - case VAProfileH264Baseline: + case VAProfileH264ConstrainedBaseline: case VAProfileH264Main: case VAProfileH264High: i965_media_h264_dec_context_init(ctx, media_context); diff --git a/src/i965_media_h264.c b/src/i965_media_h264.c index 93e13f6..cf95299 100644 --- a/src/i965_media_h264.c +++ b/src/i965_media_h264.c @@ -11,6 +11,7 @@ #include "i965_drv_video.h" #include "i965_media.h" #include "i965_media_h264.h" +#include "i965_decoder_utils.h" enum { INTRA_16X16 = 0, @@ -349,7 +350,7 @@ i965_media_h264_surfaces_setup(VADriverContextP ctx, struct object_surface *obj_surface; VAPictureParameterBufferH264 *pic_param; VAPictureH264 *va_pic; - int i, j, w, h; + int i, w, h; int field_picture; assert(media_context->private_context); @@ -381,24 +382,15 @@ i965_media_h264_surfaces_setup(VADriverContextP ctx, /* Reference Pictures */ for (i = 0; i < ARRAY_ELEMS(i965_h264_context->fsid_list); i++) { - if (i965_h264_context->fsid_list[i].surface_id != VA_INVALID_ID && - i965_h264_context->fsid_list[i].obj_surface != NULL) { - int found = 0; - for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) { - va_pic = &pic_param->ReferenceFrames[j]; - - if (va_pic->flags & VA_PICTURE_H264_INVALID) - continue; - - if (va_pic->picture_id == i965_h264_context->fsid_list[i].surface_id) { - found = 1; - break; - } - } - - assert(found == 1); - - obj_surface = i965_h264_context->fsid_list[i].obj_surface; + struct object_surface * const obj_surface = + i965_h264_context->fsid_list[i].obj_surface; + + if (obj_surface) { + const VAPictureH264 * const va_pic = avc_find_picture( + obj_surface->base.id, pic_param->ReferenceFrames, + ARRAY_ELEMS(pic_param->ReferenceFrames)); + + assert(va_pic != NULL); w = obj_surface->width; h = obj_surface->height; field_picture = !!(va_pic->flags & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD)); @@ -919,7 +911,7 @@ i965_media_h264_dec_context_init(VADriverContextP ctx, struct i965_media_context sizeof(h264_avc_kernels_gen5[0]))); assert(NUM_AVC_MC_INTERFACES == (sizeof(avc_mc_kernel_offset_gen5) / sizeof(avc_mc_kernel_offset_gen5[0]))); - if (IS_IRONLAKE(i965->intel.device_id)) { + if (IS_IRONLAKE(i965->intel.device_info)) { memcpy(i965_h264_context->avc_kernels, h264_avc_kernels_gen5, sizeof(i965_h264_context->avc_kernels)); avc_mc_kernel_offset = avc_mc_kernel_offset_gen5; intra_kernel_header = &intra_kernel_header_gen5; @@ -953,7 +945,7 @@ i965_media_h264_dec_context_init(VADriverContextP ctx, struct i965_media_context media_context->free_private_context = i965_media_h264_free_private_context; /* URB */ - if (IS_IRONLAKE(i965->intel.device_id)) { + if (IS_IRONLAKE(i965->intel.device_info)) { media_context->urb.num_vfe_entries = 63; } else { media_context->urb.num_vfe_entries = 23; @@ -968,7 +960,7 @@ i965_media_h264_dec_context_init(VADriverContextP ctx, struct i965_media_context media_context->urb.cs_start = media_context->urb.vfe_start + media_context->urb.num_vfe_entries * media_context->urb.size_vfe_entry; assert(media_context->urb.cs_start + - media_context->urb.num_cs_entries * media_context->urb.size_cs_entry <= URB_SIZE((&i965->intel))); + media_context->urb.num_cs_entries * media_context->urb.size_cs_entry <= i965->intel.device_info->urb_size); /* hook functions */ media_context->media_states_setup = i965_media_h264_states_setup; diff --git a/src/i965_media_h264.h b/src/i965_media_h264.h index 490213c..e507e1d 100644 --- a/src/i965_media_h264.h +++ b/src/i965_media_h264.h @@ -61,6 +61,7 @@ struct i965_h264_context struct i965_avc_hw_scoreboard_context avc_hw_scoreboard_context; struct i965_avc_ildb_context avc_ildb_context; + GenFrameStoreContext fs_ctx; GenFrameStore fsid_list[MAX_GEN_REFERENCE_FRAMES]; struct i965_kernel avc_kernels[NUM_H264_AVC_KERNELS]; diff --git a/src/i965_media_mpeg2.c b/src/i965_media_mpeg2.c index 1c105b3..245c8e7 100644 --- a/src/i965_media_mpeg2.c +++ b/src/i965_media_mpeg2.c @@ -515,7 +515,7 @@ i965_media_mpeg2_surface_setup(VADriverContextP ctx, int w = obj_surface->width; int h = obj_surface->height; - i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('I','4','2','0'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_I420, SUBSAMPLE_YUV420); if (picture_structure == MPEG_FRAME) { i965_media_mpeg2_surface_state(ctx, base_index + 0, obj_surface, @@ -988,7 +988,7 @@ i965_media_mpeg2_dec_context_init(VADriverContextP ctx, struct i965_media_contex sizeof(mpeg2_vld_kernels_gen5[0]))); assert(NUM_MPEG2_VLD_KERNELS <= MAX_INTERFACE_DESC); - if (IS_IRONLAKE(i965->intel.device_id)) + if (IS_IRONLAKE(i965->intel.device_info)) memcpy(i965_mpeg2_context->vld_kernels, mpeg2_vld_kernels_gen5, sizeof(i965_mpeg2_context->vld_kernels)); else memcpy(i965_mpeg2_context->vld_kernels, mpeg2_vld_kernels_gen4, sizeof(i965_mpeg2_context->vld_kernels)); @@ -1013,7 +1013,7 @@ i965_media_mpeg2_dec_context_init(VADriverContextP ctx, struct i965_media_contex media_context->urb.cs_start = media_context->urb.vfe_start + media_context->urb.num_vfe_entries * media_context->urb.size_vfe_entry; assert(media_context->urb.cs_start + - media_context->urb.num_cs_entries * media_context->urb.size_cs_entry <= URB_SIZE((&i965->intel))); + media_context->urb.num_cs_entries * media_context->urb.size_cs_entry <= i965->intel.device_info->urb_size); /* hook functions */ media_context->media_states_setup = i965_media_mpeg2_states_setup; diff --git a/src/i965_output_dri.c b/src/i965_output_dri.c index de7be92..6f8ea31 100644 --- a/src/i965_output_dri.c +++ b/src/i965_output_dri.c @@ -127,6 +127,7 @@ i965_put_surface_dri( bool new_region = false; uint32_t name; int i, ret; + unsigned int color_flag = 0; /* Currently don't support DRI1 */ if (!VA_CHECK_DRM_AUTH_TYPE(ctx, VA_DRM_AUTH_DRI2)) @@ -136,8 +137,7 @@ i965_put_surface_dri( * will get here */ obj_surface = SURFACE(surface); - if (!obj_surface || !obj_surface->bo) - return VA_STATUS_SUCCESS; + ASSERT_RET(obj_surface && obj_surface->bo, VA_STATUS_SUCCESS); _i965LockMutex(&i965->render_mutex); @@ -179,6 +179,12 @@ i965_put_surface_dri( assert(ret == 0); } + color_flag = flags & VA_SRC_COLOR_MASK; + if (color_flag == 0) + color_flag = VA_SRC_BT601; + + pp_flag = color_flag; + if ((flags & VA_FILTER_SCALING_MASK) == VA_FILTER_SCALING_NL_ANAMORPHIC) pp_flag |= I965_PP_FLAG_AVS; @@ -197,17 +203,8 @@ i965_put_surface_dri( } } - dri_vtable->swap_buffer(ctx, dri_drawable); - obj_surface->flags |= SURFACE_DISPLAYED; - - if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) { - dri_bo_unreference(obj_surface->bo); - obj_surface->bo = NULL; - obj_surface->flags &= ~SURFACE_REF_DIS_MASK; - - if (obj_surface->free_private_data) - obj_surface->free_private_data(&obj_surface->private_data); - } + if (!(g_intel_debug_option_flags & VA_INTEL_DEBUG_OPTION_BENCH)) + dri_vtable->swap_buffer(ctx, dri_drawable); _i965UnlockMutex(&i965->render_mutex); diff --git a/src/i965_output_wayland.c b/src/i965_output_wayland.c index be7f32c..5a75397 100644 --- a/src/i965_output_wayland.c +++ b/src/i965_output_wayland.c @@ -237,7 +237,7 @@ va_GetSurfaceBufferWl( return VA_STATUS_ERROR_INVALID_SURFACE; switch (obj_surface->fourcc) { - case VA_FOURCC('N','V','1','2'): + case VA_FOURCC_NV12: drm_format = WL_DRM_FORMAT_NV12; offsets[0] = 0; pitches[0] = obj_surface->width; @@ -246,10 +246,14 @@ va_GetSurfaceBufferWl( offsets[2] = 0; pitches[2] = 0; break; - case VA_FOURCC('Y','V','1','2'): - case VA_FOURCC('I','4','2','0'): - case VA_FOURCC('I','M','C','1'): - case VA_FOURCC('I','M','C','3'): + case VA_FOURCC_YV12: + case VA_FOURCC_I420: + case VA_FOURCC_IMC1: + case VA_FOURCC_IMC3: + case VA_FOURCC_422H: + case VA_FOURCC_422V: + case VA_FOURCC_411P: + case VA_FOURCC_444P: switch (obj_surface->subsampling) { case SUBSAMPLE_YUV411: drm_format = WL_DRM_FORMAT_YUV411; diff --git a/src/i965_pciids.h b/src/i965_pciids.h new file mode 100644 index 0000000..fc046d1 --- /dev/null +++ b/src/i965_pciids.h @@ -0,0 +1,135 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Copied and modified from (mesa) include/pci_ids/i965_pci_ids.h + */ + +CHIPSET(0x2A42, g4x, g4x, "Intel(R) GM45 Express Chipset") +CHIPSET(0x2E02, g4x, g4x, "Intel(R) Integrated Graphics Device") +CHIPSET(0x2E12, g4x, g4x, "Intel(R) Q45/Q43") +CHIPSET(0x2E22, g4x, g4x, "Intel(R) G45/G43") +CHIPSET(0x2E32, g4x, g4x, "Intel(R) G41") +CHIPSET(0x2E42, g4x, g4x, "Intel(R) B43") +CHIPSET(0x2E92, g4x, g4x, "Intel(R) B43") +CHIPSET(0x0042, ilk, ilk, "Intel(R) Ironlake Desktop") +CHIPSET(0x0046, ilk, ilk, "Intel(R) Ironlake Mobile") +CHIPSET(0x0102, snb, snb_gt1, "Intel(R) Sandybridge Desktop") +CHIPSET(0x0112, snb, snb_gt2, "Intel(R) Sandybridge Desktop") +CHIPSET(0x0122, snb, snb_gt2, "Intel(R) Sandybridge Desktop") +CHIPSET(0x0106, snb, snb_gt1, "Intel(R) Sandybridge Mobile") +CHIPSET(0x0116, snb, snb_gt2, "Intel(R) Sandybridge Mobile") +CHIPSET(0x0126, snb, snb_gt2, "Intel(R) Sandybridge Mobile") +CHIPSET(0x010A, snb, snb_gt1, "Intel(R) Sandybridge Server") +CHIPSET(0x0152, ivb, ivb_gt1, "Intel(R) Ivybridge Desktop") +CHIPSET(0x0162, ivb, ivb_gt2, "Intel(R) Ivybridge Desktop") +CHIPSET(0x0156, ivb, ivb_gt1, "Intel(R) Ivybridge Mobile") +CHIPSET(0x0166, ivb, ivb_gt2, "Intel(R) Ivybridge Mobile") +CHIPSET(0x015A, ivb, ivb_gt1, "Intel(R) Ivybridge Server") +CHIPSET(0x016A, ivb, ivb_gt2, "Intel(R) Ivybridge Server") +CHIPSET(0x0F31, ivb, byt, "Intel(R) Bay Trail") +CHIPSET(0x0F32, ivb, byt, "Intel(R) Bay Trail") +CHIPSET(0x0F33, ivb, byt, "Intel(R) Bay Trail") +CHIPSET(0x0157, ivb, byt, "Intel(R) Bay Trail") +CHIPSET(0x0155, ivb, byt, "Intel(R) Bay Trail") +CHIPSET(0x0402, hsw, hsw_gt1, "Intel(R) Haswell Desktop") +CHIPSET(0x0412, hsw, hsw_gt2, "Intel(R) Haswell Desktop") +CHIPSET(0x0422, hsw, hsw_gt3, "Intel(R) Haswell Desktop") +CHIPSET(0x0406, hsw, hsw_gt1, "Intel(R) Haswell Mobile") +CHIPSET(0x0416, hsw, hsw_gt2, "Intel(R) Haswell Mobile") +CHIPSET(0x0426, hsw, hsw_gt3, "Intel(R) Haswell Mobile") +CHIPSET(0x040A, hsw, hsw_gt1, "Intel(R) Haswell Server") +CHIPSET(0x041A, hsw, hsw_gt2, "Intel(R) Haswell Server") +CHIPSET(0x042A, hsw, hsw_gt3, "Intel(R) Haswell Server") +CHIPSET(0x040B, hsw, hsw_gt1, "Intel(R) Haswell") +CHIPSET(0x041B, hsw, hsw_gt2, "Intel(R) Haswell") +CHIPSET(0x042B, hsw, hsw_gt3, "Intel(R) Haswell") +CHIPSET(0x040E, hsw, hsw_gt1, "Intel(R) Haswell") +CHIPSET(0x041E, hsw, hsw_gt2, "Intel(R) Haswell") +CHIPSET(0x042E, hsw, hsw_gt3, "Intel(R) Haswell") +CHIPSET(0x0C02, hsw, hsw_gt1, "Intel(R) Haswell Desktop") +CHIPSET(0x0C12, hsw, hsw_gt2, "Intel(R) Haswell Desktop") +CHIPSET(0x0C22, hsw, hsw_gt3, "Intel(R) Haswell Desktop") +CHIPSET(0x0C06, hsw, hsw_gt1, "Intel(R) Haswell Mobile") +CHIPSET(0x0C16, hsw, hsw_gt2, "Intel(R) Haswell Mobile") +CHIPSET(0x0C26, hsw, hsw_gt3, "Intel(R) Haswell Mobile") +CHIPSET(0x0C0A, hsw, hsw_gt1, "Intel(R) Haswell Server") +CHIPSET(0x0C1A, hsw, hsw_gt2, "Intel(R) Haswell Server") +CHIPSET(0x0C2A, hsw, hsw_gt3, "Intel(R) Haswell Server") +CHIPSET(0x0C0B, hsw, hsw_gt1, "Intel(R) Haswell") +CHIPSET(0x0C1B, hsw, hsw_gt2, "Intel(R) Haswell") +CHIPSET(0x0C2B, hsw, hsw_gt3, "Intel(R) Haswell") +CHIPSET(0x0C0E, hsw, hsw_gt1, "Intel(R) Haswell") +CHIPSET(0x0C1E, hsw, hsw_gt2, "Intel(R) Haswell") +CHIPSET(0x0C2E, hsw, hsw_gt3, "Intel(R) Haswell") +CHIPSET(0x0A02, hsw, hsw_gt1, "Intel(R) Haswell Desktop") +CHIPSET(0x0A12, hsw, hsw_gt2, "Intel(R) Haswell Desktop") +CHIPSET(0x0A22, hsw, hsw_gt3, "Intel(R) Haswell Desktop") +CHIPSET(0x0A06, hsw, hsw_gt1, "Intel(R) Haswell Mobile") +CHIPSET(0x0A16, hsw, hsw_gt2, "Intel(R) Haswell Mobile") +CHIPSET(0x0A26, hsw, hsw_gt3, "Intel(R) Haswell Mobile") +CHIPSET(0x0A0A, hsw, hsw_gt1, "Intel(R) Haswell Server") +CHIPSET(0x0A1A, hsw, hsw_gt2, "Intel(R) Haswell Server") +CHIPSET(0x0A2A, hsw, hsw_gt3, "Intel(R) Haswell Server") +CHIPSET(0x0A0B, hsw, hsw_gt1, "Intel(R) Haswell") +CHIPSET(0x0A1B, hsw, hsw_gt2, "Intel(R) Haswell") +CHIPSET(0x0A2B, hsw, hsw_gt3, "Intel(R) Haswell") +CHIPSET(0x0A0E, hsw, hsw_gt1, "Intel(R) Haswell") +CHIPSET(0x0A1E, hsw, hsw_gt2, "Intel(R) Haswell") +CHIPSET(0x0A2E, hsw, hsw_gt3, "Intel(R) Haswell") +CHIPSET(0x0D02, hsw, hsw_gt1, "Intel(R) Haswell Desktop") +CHIPSET(0x0D12, hsw, hsw_gt2, "Intel(R) Haswell Desktop") +CHIPSET(0x0D22, hsw, hsw_gt3, "Intel(R) Haswell Desktop") +CHIPSET(0x0D06, hsw, hsw_gt1, "Intel(R) Haswell Mobile") +CHIPSET(0x0D16, hsw, hsw_gt2, "Intel(R) Haswell Mobile") +CHIPSET(0x0D26, hsw, hsw_gt3, "Intel(R) Haswell Mobile") +CHIPSET(0x0D0A, hsw, hsw_gt1, "Intel(R) Haswell Server") +CHIPSET(0x0D1A, hsw, hsw_gt2, "Intel(R) Haswell Server") +CHIPSET(0x0D2A, hsw, hsw_gt3, "Intel(R) Haswell") +CHIPSET(0x0D0B, hsw, hsw_gt1, "Intel(R) Haswell") +CHIPSET(0x0D1B, hsw, hsw_gt2, "Intel(R) Haswell") +CHIPSET(0x0D2B, hsw, hsw_gt3, "Intel(R) Haswell") +CHIPSET(0x0D0E, hsw, hsw_gt1, "Intel(R) Haswell") +CHIPSET(0x0D1E, hsw, hsw_gt2, "Intel(R) Haswell") +CHIPSET(0x0D2E, hsw, hsw_gt3, "Intel(R) Haswell") +CHIPSET(0x1602, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x1606, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x160A, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x160B, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x160D, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x160E, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x1612, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x1616, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x161A, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x161B, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x161D, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x161E, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x1622, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x1626, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x162A, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x162B, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x162D, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x162E, bdw, bdw, "Intel(R) Broadwell") +CHIPSET(0x22B0, chv, chv, "Intel(R) CherryView") +CHIPSET(0x22B1, chv, chv, "Intel(R) CherryView") +CHIPSET(0x22B2, chv, chv, "Intel(R) CherryView") +CHIPSET(0x22B3, chv, chv, "Intel(R) CherryView") diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c index e91dc03..6d435a8 100755 --- a/src/i965_post_processing.c +++ b/src/i965_post_processing.c @@ -40,19 +40,16 @@ #include "i965_render.h" #include "intel_media.h" -#define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) || \ - IS_GEN6((ctx)->intel.device_id) || \ - IS_GEN7((ctx)->intel.device_id)) +extern VAStatus +vpp_surface_convert(VADriverContextP ctx, + struct object_surface *src_obj_surf, + struct object_surface *dst_obj_surf); -#define SURFACE_STATE_PADDED_SIZE_0_I965 ALIGN(sizeof(struct i965_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE_1_I965 ALIGN(sizeof(struct i965_surface_state2), 32) -#define SURFACE_STATE_PADDED_SIZE_I965 MAX(SURFACE_STATE_PADDED_SIZE_0_I965, SURFACE_STATE_PADDED_SIZE_1_I965) +#define HAS_VPP(ctx) ((ctx)->codec_info->has_vpp) -#define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32) -#define SURFACE_STATE_PADDED_SIZE_GEN7 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7) +#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN8,\ + MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)) -#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7) #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) #define BINDING_TABLE_OFFSET SURFACE_STATE_OFFSET(MAX_PP_SURFACES) @@ -60,6 +57,8 @@ #define GPU_ASM_BLOCK_HEIGHT 8 #define GPU_ASM_X_OFFSET_ALIGNMENT 4 +#define VA_STATUS_SUCCESS_1 0xFFFFFFFE + static const uint32_t pp_null_gen5[][4] = { #include "shaders/post_processing/gen5_6/null.g4b.gen5" }; @@ -112,6 +111,10 @@ static const uint32_t pp_pa_load_save_pl3_gen5[][4] = { #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5" }; +static const uint32_t pp_pa_load_save_pa_gen5[][4] = { +#include "shaders/post_processing/gen5_6/pa_load_save_pa.g4b.gen5" +}; + static const uint32_t pp_rgbx_load_save_nv12_gen5[][4] = { #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g4b.gen5" }; @@ -322,6 +325,18 @@ static struct pp_module pp_modules_gen5[] = { { { + "PA_PA module", + PP_PA_LOAD_SAVE_PA, + pp_pa_load_save_pa_gen5, + sizeof(pp_pa_load_save_pa_gen5), + NULL, + }, + + pp_plx_load_save_plx_initialize, + }, + + { + { "RGBX_NV12 module", PP_RGBX_LOAD_SAVE_NV12, pp_rgbx_load_save_nv12_gen5, @@ -397,6 +412,10 @@ static const uint32_t pp_pa_load_save_pl3_gen6[][4] = { #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g6b" }; +static const uint32_t pp_pa_load_save_pa_gen6[][4] = { +#include "shaders/post_processing/gen5_6/pa_load_save_pa.g6b" +}; + static const uint32_t pp_rgbx_load_save_nv12_gen6[][4] = { #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g6b" }; @@ -560,7 +579,19 @@ static struct pp_module pp_modules_gen6[] = { pp_plx_load_save_plx_initialize, }, - + + { + { + "PA_PA module", + PP_PA_LOAD_SAVE_PA, + pp_pa_load_save_pa_gen6, + sizeof(pp_pa_load_save_pa_gen6), + NULL, + }, + + pp_plx_load_save_plx_initialize, + }, + { { "RGBX_NV12 module", @@ -632,6 +663,9 @@ static const uint32_t pp_pa_load_save_nv12_gen7[][4] = { static const uint32_t pp_pa_load_save_pl3_gen7[][4] = { #include "shaders/post_processing/gen7/pa_to_pl3.g7b" }; +static const uint32_t pp_pa_load_save_pa_gen7[][4] = { +#include "shaders/post_processing/gen7/pa_to_pa.g7b" +}; static const uint32_t pp_rgbx_load_save_nv12_gen7[][4] = { #include "shaders/post_processing/gen7/rgbx_to_nv12.g7b" }; @@ -658,13 +692,6 @@ static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_pos const VARectangle *dst_rect, void *filter_param); -static VAStatus gen7_pp_rgbx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context, - const struct i965_surface *src_surface, - const VARectangle *src_rect, - struct i965_surface *dst_surface, - const VARectangle *dst_rect, - void *filter_param); - static struct pp_module pp_modules_gen7[] = { { { @@ -820,7 +847,19 @@ static struct pp_module pp_modules_gen7[] = { gen7_pp_plx_avs_initialize, }, - + + { + { + "PA_PA module", + PP_PA_LOAD_SAVE_PA, + pp_pa_load_save_pa_gen7, + sizeof(pp_pa_load_save_pa_gen7), + NULL, + }, + + gen7_pp_plx_avs_initialize, + }, + { { "RGBX_NV12 module", @@ -830,7 +869,7 @@ static struct pp_module pp_modules_gen7[] = { NULL, }, - gen7_pp_rgbx_avs_initialize, + gen7_pp_plx_avs_initialize, }, { @@ -893,6 +932,9 @@ static const uint32_t pp_pa_load_save_nv12_gen75[][4] = { static const uint32_t pp_pa_load_save_pl3_gen75[][4] = { #include "shaders/post_processing/gen7/pa_to_pl3.g75b" }; +static const uint32_t pp_pa_load_save_pa_gen75[][4] = { +#include "shaders/post_processing/gen7/pa_to_pa.g75b" +}; static const uint32_t pp_rgbx_load_save_nv12_gen75[][4] = { #include "shaders/post_processing/gen7/rgbx_to_nv12.g75b" }; @@ -994,7 +1036,7 @@ static struct pp_module pp_modules_gen75[] = { NULL, }, - gen7_pp_nv12_dndi_initialize, + gen7_pp_nv12_dn_initialize, }, { @@ -1056,7 +1098,19 @@ static struct pp_module pp_modules_gen75[] = { gen7_pp_plx_avs_initialize, }, - + + { + { + "PA_PA module", + PP_PA_LOAD_SAVE_PA, + pp_pa_load_save_pa_gen75, + sizeof(pp_pa_load_save_pa_gen75), + NULL, + }, + + gen7_pp_plx_avs_initialize, + }, + { { "RGBX_NV12 module", @@ -1066,7 +1120,7 @@ static struct pp_module pp_modules_gen75[] = { NULL, }, - gen7_pp_rgbx_avs_initialize, + gen7_pp_plx_avs_initialize, }, { @@ -1100,6 +1154,22 @@ pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface) } static void +pp_get_surface_size(VADriverContextP ctx, const struct i965_surface *surface, int *width, int *height) +{ + if (surface->type == I965_SURFACE_TYPE_IMAGE) { + struct object_image *obj_image = (struct object_image *)surface->base; + + *width = obj_image->image.width; + *height = obj_image->image.height; + } else { + struct object_surface *obj_surface = (struct object_surface *)surface->base; + + *width = obj_surface->orig_width; + *height = obj_surface->orig_height; + } +} + +static void pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling) { switch (tiling) { @@ -1358,8 +1428,8 @@ ironlake_pp_object_walker(VADriverContextP ctx, int x, x_steps, y, y_steps; struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter; - x_steps = pp_context->pp_x_steps(&pp_context->private_context); - y_steps = pp_context->pp_y_steps(&pp_context->private_context); + x_steps = pp_context->pp_x_steps(pp_context->private_context); + y_steps = pp_context->pp_y_steps(pp_context->private_context); for (y = 0; y < y_steps; y++) { for (x = 0; x < x_steps; x++) { @@ -1408,20 +1478,20 @@ static void i965_update_src_surface_static_parameter( int fourcc = pp_get_surface_fourcc(ctx, surface); switch (fourcc) { - case VA_FOURCC('Y', 'U', 'Y', '2'): + case VA_FOURCC_YUY2: pp_static_parameter->grf1.source_packed_u_offset = 1; pp_static_parameter->grf1.source_packed_v_offset = 3; break; - case VA_FOURCC('U', 'Y', 'V', 'Y'): + case VA_FOURCC_UYVY: pp_static_parameter->grf1.source_packed_y_offset = 1; pp_static_parameter->grf1.source_packed_v_offset = 2; break; - case VA_FOURCC('B', 'G', 'R', 'X'): - case VA_FOURCC('B', 'G', 'R', 'A'): + case VA_FOURCC_BGRX: + case VA_FOURCC_BGRA: pp_static_parameter->grf1.source_rgb_layout = 0; break; - case VA_FOURCC('R', 'G', 'B', 'X'): - case VA_FOURCC('R', 'G', 'B', 'A'): + case VA_FOURCC_RGBX: + case VA_FOURCC_RGBA: pp_static_parameter->grf1.source_rgb_layout = 1; break; default: @@ -1439,20 +1509,20 @@ static void i965_update_dst_surface_static_parameter( int fourcc = pp_get_surface_fourcc(ctx, surface); switch (fourcc) { - case VA_FOURCC('Y', 'U', 'Y', '2'): + case VA_FOURCC_YUY2: pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_u_offset = 1; pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 3; break; - case VA_FOURCC('U', 'Y', 'V', 'Y'): + case VA_FOURCC_UYVY: pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_y_offset = 1; pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 2; break; - case VA_FOURCC('B', 'G', 'R', 'X'): - case VA_FOURCC('B', 'G', 'R', 'A'): + case VA_FOURCC_BGRX: + case VA_FOURCC_BGRA: pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 0; break; - case VA_FOURCC('R', 'G', 'B', 'X'): - case VA_FOURCC('R', 'G', 'B', 'A'): + case VA_FOURCC_RGBX: + case VA_FOURCC_RGBA: pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 1; break; default: @@ -1563,7 +1633,7 @@ gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_cont ss->ss2.height = height - 1; ss->ss3.pitch = pitch - 1; gen7_pp_set_surface_tiling(ss, tiling); - if (IS_HASWELL(i965->intel.device_id)) + if (IS_HASWELL(i965->intel.device_info)) gen7_render_set_surface_scs(ss); dri_bo_emit_reloc(ss_bo, I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0, @@ -1625,15 +1695,19 @@ pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processin dri_bo *bo; int fourcc = pp_get_surface_fourcc(ctx, surface); const int Y = 0; - const int U = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 2 : 1; - const int V = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 1 : 2; + const int U = ((fourcc == VA_FOURCC_YV12) || + (fourcc == VA_FOURCC_YV16)) + ? 2 : 1; + const int V = ((fourcc == VA_FOURCC_YV12) || + (fourcc == VA_FOURCC_YV16)) + ? 1 : 2; const int UV = 1; - int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2'); - int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')); - int full_packed_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') || - fourcc == VA_FOURCC('R', 'G', 'B', 'X') || - fourcc == VA_FOURCC('B', 'G', 'R', 'A') || - fourcc == VA_FOURCC('B', 'G', 'R', 'X')); + int interleaved_uv = fourcc == VA_FOURCC_NV12; + int packed_yuv = (fourcc == VA_FOURCC_YUY2 || fourcc == VA_FOURCC_UYVY); + int full_packed_format = (fourcc == VA_FOURCC_RGBA || + fourcc == VA_FOURCC_RGBX || + fourcc == VA_FOURCC_BGRA || + fourcc == VA_FOURCC_BGRX); int scale_factor_of_1st_plane_width_in_byte = 1; if (surface->type == I965_SURFACE_TYPE_SURFACE) { @@ -1646,11 +1720,9 @@ pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processin if (full_packed_format) { scale_factor_of_1st_plane_width_in_byte = 4; - pitch[0] = obj_surface->width * 4; } else if (packed_yuv ) { scale_factor_of_1st_plane_width_in_byte = 2; - pitch[0] = obj_surface->width * 2; } else if (interleaved_uv) { width[1] = obj_surface->orig_width; @@ -1695,6 +1767,12 @@ pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processin height[2] = obj_image->image.height / 2; pitch[2] = obj_image->image.pitches[2]; offset[2] = obj_image->image.offsets[2]; + if (fourcc == VA_FOURCC_YV16) { + width[1] = obj_image->image.width / 2; + height[1] = obj_image->image.height; + width[2] = obj_image->image.width / 2; + height[2] = obj_image->image.height; + } } } @@ -1731,84 +1809,73 @@ static void gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context, const struct i965_surface *surface, int base_index, int is_target, + const VARectangle *rect, int *width, int *height, int *pitch, int *offset) { struct object_surface *obj_surface; struct object_image *obj_image; dri_bo *bo; int fourcc = pp_get_surface_fourcc(ctx, surface); - const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') || - fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1; - const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') || - fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2; - int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2'); - int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')); - int rgbx_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') || - fourcc == VA_FOURCC('R', 'G', 'B', 'X') || - fourcc == VA_FOURCC('B', 'G', 'R', 'A') || - fourcc == VA_FOURCC('B', 'G', 'R', 'X')); + const i965_fourcc_info *fourcc_info = get_fourcc_info(fourcc); + + if (fourcc_info == NULL) + return; if (surface->type == I965_SURFACE_TYPE_SURFACE) { obj_surface = (struct object_surface *)surface->base; bo = obj_surface->bo; - width[0] = obj_surface->orig_width; - height[0] = obj_surface->orig_height; + width[0] = MIN(rect->x + rect->width, obj_surface->orig_width); + height[0] = MIN(rect->y + rect->height, obj_surface->orig_height); pitch[0] = obj_surface->width; offset[0] = 0; - if (packed_yuv) { - if (is_target) - width[0] = obj_surface->orig_width * 2; /* surface format is R8, so double the width */ - else - width[0] = obj_surface->orig_width; /* surface foramt is YCBCR, width is specified in units of pixels */ - - pitch[0] = obj_surface->width * 2; - } else if (rgbx_format) { - if (is_target) - width[0] = obj_surface->orig_width * 4; /* surface format is R8, so quad the width */ - pitch[0] = obj_surface->width * 4; - } + if (fourcc_info->num_planes == 1 && is_target) + width[0] = width[0] * (fourcc_info->bpp[0] / 8); /* surface format is R8 */ - width[1] = obj_surface->cb_cr_width; - height[1] = obj_surface->cb_cr_height; + width[1] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_surface->cb_cr_width); + height[1] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_surface->cb_cr_height); pitch[1] = obj_surface->cb_cr_pitch; offset[1] = obj_surface->y_cb_offset * obj_surface->width; - width[2] = obj_surface->cb_cr_width; - height[2] = obj_surface->cb_cr_height; + width[2] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_surface->cb_cr_width); + height[2] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_surface->cb_cr_height); pitch[2] = obj_surface->cb_cr_pitch; offset[2] = obj_surface->y_cr_offset * obj_surface->width; } else { + int U = 0, V = 0; + + /* FIXME: add support for ARGB/ABGR image */ obj_image = (struct object_image *)surface->base; bo = obj_image->bo; - width[0] = obj_image->image.width; - height[0] = obj_image->image.height; + width[0] = MIN(rect->x + rect->width, obj_image->image.width); + height[0] = MIN(rect->y + rect->height, obj_image->image.height); pitch[0] = obj_image->image.pitches[0]; offset[0] = obj_image->image.offsets[0]; - if (rgbx_format) { - if (is_target) - width[0] = obj_image->image.width * 4; /* surface format is R8, so quad the width */ - } else if (packed_yuv) { + if (fourcc_info->num_planes == 1) { if (is_target) - width[0] = obj_image->image.width * 2; /* surface format is R8, so double the width */ - else - width[0] = obj_image->image.width; /* surface foramt is YCBCR, width is specified in units of pixels */ - } else if (interleaved_uv) { - width[1] = obj_image->image.width / 2; - height[1] = obj_image->image.height / 2; - pitch[1] = obj_image->image.pitches[1]; - offset[1] = obj_image->image.offsets[1]; + width[0] = width[0] * (fourcc_info->bpp[0] / 8); /* surface format is R8 */ + } else if (fourcc_info->num_planes == 2) { + U = 1, V = 1; } else { - width[1] = obj_image->image.width / 2; - height[1] = obj_image->image.height / 2; - pitch[1] = obj_image->image.pitches[U]; - offset[1] = obj_image->image.offsets[U]; - width[2] = obj_image->image.width / 2; - height[2] = obj_image->image.height / 2; - pitch[2] = obj_image->image.pitches[V]; - offset[2] = obj_image->image.offsets[V]; + assert(fourcc_info->num_components == 3); + + U = fourcc_info->components[1].plane; + V = fourcc_info->components[2].plane; + assert((U == 1 && V == 2) || + (U == 2 && V == 1)); } + + /* Always set width/height although they aren't used for fourcc_info->num_planes == 1 */ + width[1] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_image->image.width / fourcc_info->hfactor); + height[1] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_image->image.height / fourcc_info->vfactor); + pitch[1] = obj_image->image.pitches[U]; + offset[1] = obj_image->image.offsets[U]; + + width[2] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_image->image.width / fourcc_info->hfactor); + height[2] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_image->image.height / fourcc_info->vfactor); + pitch[2] = obj_image->image.pitches[V]; + offset[2] = obj_image->image.offsets[V]; } if (is_target) { @@ -1817,61 +1884,63 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc width[0] / 4, height[0], pitch[0], I965_SURFACEFORMAT_R8_UINT, base_index, 1); - if (rgbx_format) { - struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; - /* the format is MSB: X-B-G-R */ - pp_static_parameter->grf2.save_avs_rgb_swap = 0; - if ((fourcc == VA_FOURCC('B', 'G', 'R', 'A')) || - (fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) { - /* It is stored as MSB: X-R-G-B */ - pp_static_parameter->grf2.save_avs_rgb_swap = 1; - } - } - if (!packed_yuv && !rgbx_format) { - if (interleaved_uv) { - gen7_pp_set_surface_state(ctx, pp_context, - bo, offset[1], - width[1] / 2, height[1], pitch[1], - I965_SURFACEFORMAT_R8G8_SINT, - base_index + 1, 1); - } else { - gen7_pp_set_surface_state(ctx, pp_context, - bo, offset[1], - width[1] / 4, height[1], pitch[1], - I965_SURFACEFORMAT_R8_SINT, - base_index + 1, 1); - gen7_pp_set_surface_state(ctx, pp_context, - bo, offset[2], - width[2] / 4, height[2], pitch[2], - I965_SURFACEFORMAT_R8_SINT, - base_index + 2, 1); + + if (fourcc_info->num_planes == 2) { + gen7_pp_set_surface_state(ctx, pp_context, + bo, offset[1], + width[1] / 2, height[1], pitch[1], + I965_SURFACEFORMAT_R8G8_SINT, + base_index + 1, 1); + } else if (fourcc_info->num_planes == 3) { + gen7_pp_set_surface_state(ctx, pp_context, + bo, offset[1], + width[1] / 4, height[1], pitch[1], + I965_SURFACEFORMAT_R8_SINT, + base_index + 1, 1); + gen7_pp_set_surface_state(ctx, pp_context, + bo, offset[2], + width[2] / 4, height[2], pitch[2], + I965_SURFACEFORMAT_R8_SINT, + base_index + 2, 1); + } + + if (fourcc_info->format == I965_COLOR_RGB) { + struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; + /* the format is MSB: X-B-G-R */ + pp_static_parameter->grf2.save_avs_rgb_swap = 0; + if ((fourcc == VA_FOURCC_BGRA) || + (fourcc == VA_FOURCC_BGRX)) { + /* It is stored as MSB: X-R-G-B */ + pp_static_parameter->grf2.save_avs_rgb_swap = 1; } } } else { int format0 = SURFACE_FORMAT_Y8_UNORM; switch (fourcc) { - case VA_FOURCC('Y', 'U', 'Y', '2'): + case VA_FOURCC_YUY2: format0 = SURFACE_FORMAT_YCRCB_NORMAL; break; - case VA_FOURCC('U', 'Y', 'V', 'Y'): + case VA_FOURCC_UYVY: format0 = SURFACE_FORMAT_YCRCB_SWAPY; break; default: break; } - if (rgbx_format) { + + if (fourcc_info->format == I965_COLOR_RGB) { struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; /* Only R8G8B8A8_UNORM is supported for BGRX or RGBX */ format0 = SURFACE_FORMAT_R8G8B8A8_UNORM; pp_static_parameter->grf2.src_avs_rgb_swap = 0; - if ((fourcc == VA_FOURCC('B', 'G', 'R', 'A')) || - (fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) { + if ((fourcc == VA_FOURCC_BGRA) || + (fourcc == VA_FOURCC_BGRX)) { pp_static_parameter->grf2.src_avs_rgb_swap = 1; } } + gen7_pp_set_surface2_state(ctx, pp_context, bo, offset[0], width[0], height[0], pitch[0], @@ -1879,28 +1948,26 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc format0, 0, base_index); - if (!packed_yuv && !rgbx_format) { - if (interleaved_uv) { - gen7_pp_set_surface2_state(ctx, pp_context, - bo, offset[1], - width[1], height[1], pitch[1], - 0, 0, - SURFACE_FORMAT_R8B8_UNORM, 0, - base_index + 1); - } else { - gen7_pp_set_surface2_state(ctx, pp_context, - bo, offset[1], - width[1], height[1], pitch[1], - 0, 0, - SURFACE_FORMAT_R8_UNORM, 0, - base_index + 1); - gen7_pp_set_surface2_state(ctx, pp_context, - bo, offset[2], - width[2], height[2], pitch[2], - 0, 0, - SURFACE_FORMAT_R8_UNORM, 0, - base_index + 2); - } + if (fourcc_info->num_planes == 2) { + gen7_pp_set_surface2_state(ctx, pp_context, + bo, offset[1], + width[1], height[1], pitch[1], + 0, 0, + SURFACE_FORMAT_R8B8_UNORM, 0, + base_index + 1); + } else if (fourcc_info->num_planes == 3) { + gen7_pp_set_surface2_state(ctx, pp_context, + bo, offset[1], + width[1], height[1], pitch[1], + 0, 0, + SURFACE_FORMAT_R8_UNORM, 0, + base_index + 1); + gen7_pp_set_surface2_state(ctx, pp_context, + bo, offset[2], + width[2], height[2], pitch[2], + 0, 0, + SURFACE_FORMAT_R8_UNORM, 0, + base_index + 2); } } } @@ -1934,6 +2001,7 @@ pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp /* private function & data */ pp_context->pp_x_steps = pp_null_x_steps; pp_context->pp_y_steps = pp_null_y_steps; + pp_context->private_context = NULL; pp_context->pp_set_block_parameter = pp_null_set_block_parameter; dst_surface->flags = src_surface->flags; @@ -1959,7 +2027,7 @@ static int pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y) { struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter; - struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context; + struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)pp_context->private_context; pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_load_save_context->dest_x; pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_load_save_context->dest_y; @@ -2008,7 +2076,7 @@ pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processin const VARectangle *dst_rect, void *filter_param) { - struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context; + struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->pp_load_save_context; struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter; struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; int width[3], height[3], pitch[3], offset[3]; @@ -2024,6 +2092,7 @@ pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processin /* private function & data */ pp_context->pp_x_steps = pp_load_save_x_steps; pp_context->pp_y_steps = pp_load_save_y_steps; + pp_context->private_context = &pp_context->pp_load_save_context; pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter; int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;; @@ -2064,7 +2133,7 @@ pp_scaling_y_steps(void *private_context) static int pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y) { - struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context; + struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)pp_context->private_context; struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter; struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step; @@ -2086,7 +2155,7 @@ pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_con const VARectangle *dst_rect, void *filter_param) { - struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context; + struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->pp_scaling_context; struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter; struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; struct object_surface *obj_surface; @@ -2156,6 +2225,7 @@ pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_con /* private function & data */ pp_context->pp_x_steps = pp_scaling_x_steps; pp_context->pp_y_steps = pp_scaling_y_steps; + pp_context->private_context = &pp_context->pp_scaling_context; pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter; int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; @@ -2195,7 +2265,7 @@ pp_avs_y_steps(void *private_context) static int pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y) { - struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context; + struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context; struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter; struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; float src_x_steping, src_y_steping, video_step_delta; @@ -2302,7 +2372,7 @@ pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context void *filter_param, int nlas) { - struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context; + struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context; struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter; struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; struct object_surface *obj_surface; @@ -2545,6 +2615,7 @@ pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context /* private function & data */ pp_context->pp_x_steps = pp_avs_x_steps; pp_context->pp_y_steps = pp_avs_y_steps; + pp_context->private_context = &pp_context->pp_avs_context; pp_context->pp_set_block_parameter = pp_avs_set_block_parameter; int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; @@ -2624,7 +2695,7 @@ gen7_pp_avs_y_steps(void *private_context) static int gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y) { - struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context; + struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context; struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter; pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x; @@ -2642,11 +2713,11 @@ static void gen7_update_src_surface_uv_offset(VADriverContextP ctx, struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; int fourcc = pp_get_surface_fourcc(ctx, surface); - if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) { + if (fourcc == VA_FOURCC_YUY2) { pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0; pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1; pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3; - } else if (fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) { + } else if (fourcc == VA_FOURCC_UYVY) { pp_static_parameter->grf2.di_destination_packed_y_component_offset = 1; pp_static_parameter->grf2.di_destination_packed_u_component_offset = 0; pp_static_parameter->grf2.di_destination_packed_v_component_offset = 2; @@ -2661,7 +2732,7 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con const VARectangle *dst_rect, void *filter_param) { - struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context; + struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context; struct i965_driver_data *i965 = i965_driver_data(ctx); struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; struct gen7_sampler_8x8 *sampler_8x8; @@ -2672,12 +2743,14 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con /* source surface */ gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0, + src_rect, width, height, pitch, offset); src_width = width[0]; src_height = height[0]; /* destination surface */ gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1, + dst_rect, width, height, pitch, offset); /* sampler 8x8 state */ @@ -2827,6 +2900,7 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con /* private function & data */ pp_context->pp_x_steps = gen7_pp_avs_x_steps; pp_context->pp_y_steps = gen7_pp_avs_y_steps; + pp_context->private_context = &pp_context->pp_avs_context; pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter; pp_avs_context->dest_x = dst_rect->x; @@ -2838,233 +2912,35 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con pp_avs_context->horiz_range = (float)src_rect->width / src_width; int dw = (pp_avs_context->src_w - 1) / 16 + 1; - dw = MAX(dw, pp_avs_context->dest_w); + dw = MAX(dw, dst_rect->width); pp_static_parameter->grf1.pointer_to_inline_parameter = 7; pp_static_parameter->grf2.avs_wa_enable = 1; /* must be set for GEN7 */ - if (IS_HASWELL(i965->intel.device_id)) + if (IS_HASWELL(i965->intel.device_info)) pp_static_parameter->grf2.avs_wa_enable = 0; /* HSW don't use the WA */ - - pp_static_parameter->grf2.avs_wa_width = dw; - pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * dw); - pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * dw); - - pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw; - pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / pp_avs_context->dest_h; - pp_static_parameter->grf5.sampler_load_vertical_frame_origin = (float) src_rect->y / src_height - - (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step; - pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = (float) src_rect->x / src_width - - (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw; - - gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface); - - dst_surface->flags = src_surface->flags; - - return VA_STATUS_SUCCESS; -} - - -static VAStatus -gen7_pp_rgbx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context, - const struct i965_surface *src_surface, - const VARectangle *src_rect, - struct i965_surface *dst_surface, - const VARectangle *dst_rect, - void *filter_param) -{ - struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context; - struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; - struct gen7_sampler_8x8 *sampler_8x8; - struct i965_sampler_8x8_state *sampler_8x8_state; - int index, i; - int width[3], height[3], pitch[3], offset[3]; - int src_width, src_height; - /* source surface */ - gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0, - width, height, pitch, offset); - src_width = width[0]; - src_height = height[0]; - - /* destination surface */ - gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1, - width, height, pitch, offset); - - /* sampler 8x8 state */ - dri_bo_map(pp_context->sampler_state_table.bo_8x8, True); - assert(pp_context->sampler_state_table.bo_8x8->virtual); - assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138); - sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual; - memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state)); - - /* The sampler_state setting of RGBX surface will be different with - * that for NV12/I420 surface. - */ - for (i = 0; i < 17; i++) { - float coff; - coff = i; - coff = coff / 16; - /* for Y channel, currently ignore */ - sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0; - sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0; - sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0; - sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0); - sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = intel_format_convert(coff, 1, 6, 0); - sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0; - sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0; - sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0; - sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0; - sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0; - sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0; - sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0); - sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = intel_format_convert(coff, 1, 6, 0); - sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0; - sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0; - sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0; - /* for U/V channel, 0.25 */ - sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0; - sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0; - sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x00; - sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0); - sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = intel_format_convert(coff, 1, 6, 0); - sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x00; - sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0; - sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0; - sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0; - sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0; - sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x00; - sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0); - sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = intel_format_convert(coff, 1, 6, 0); - sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x00; - sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0; - sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0; + if (pp_static_parameter->grf2.avs_wa_enable) { + int src_fourcc = pp_get_surface_fourcc(ctx, src_surface); + if ((src_fourcc == VA_FOURCC_RGBA) || + (src_fourcc == VA_FOURCC_RGBX) || + (src_fourcc == VA_FOURCC_BGRA) || + (src_fourcc == VA_FOURCC_BGRX)) { + pp_static_parameter->grf2.avs_wa_enable = 0; + } } - - sampler_8x8_state->dw136.default_sharpness_level = 0; - sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 0; - sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1; - sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1; - dri_bo_unmap(pp_context->sampler_state_table.bo_8x8); - - /* sampler 8x8 */ - dri_bo_map(pp_context->sampler_state_table.bo, True); - assert(pp_context->sampler_state_table.bo->virtual); - assert(sizeof(*sampler_8x8) == sizeof(int) * 4); - sampler_8x8 = pp_context->sampler_state_table.bo->virtual; - - /* sample_8x8 Y index 4 */ - index = 4; - memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8)); - sampler_8x8[index].dw0.global_noise_estimation = 255; - sampler_8x8[index].dw0.ief_bypass = 1; - - sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5; - - sampler_8x8[index].dw2.weak_edge_threshold = 1; - sampler_8x8[index].dw2.strong_edge_threshold = 8; - sampler_8x8[index].dw2.r5x_coefficient = 9; - sampler_8x8[index].dw2.r5cx_coefficient = 8; - sampler_8x8[index].dw2.r5c_coefficient = 3; - - sampler_8x8[index].dw3.r3x_coefficient = 27; - sampler_8x8[index].dw3.r3c_coefficient = 5; - sampler_8x8[index].dw3.gain_factor = 40; - sampler_8x8[index].dw3.non_edge_weight = 1; - sampler_8x8[index].dw3.regular_weight = 2; - sampler_8x8[index].dw3.strong_edge_weight = 7; - sampler_8x8[index].dw3.ief4_smooth_enable = 0; - - dri_bo_emit_reloc(pp_context->sampler_state_table.bo, - I915_GEM_DOMAIN_RENDER, - 0, - 0, - sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1), - pp_context->sampler_state_table.bo_8x8); - - /* sample_8x8 UV index 8 */ - index = 8; - memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8)); - sampler_8x8[index].dw0.disable_8x8_filter = 0; - sampler_8x8[index].dw0.global_noise_estimation = 255; - sampler_8x8[index].dw0.ief_bypass = 1; - sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5; - sampler_8x8[index].dw2.weak_edge_threshold = 1; - sampler_8x8[index].dw2.strong_edge_threshold = 8; - sampler_8x8[index].dw2.r5x_coefficient = 9; - sampler_8x8[index].dw2.r5cx_coefficient = 8; - sampler_8x8[index].dw2.r5c_coefficient = 3; - sampler_8x8[index].dw3.r3x_coefficient = 27; - sampler_8x8[index].dw3.r3c_coefficient = 5; - sampler_8x8[index].dw3.gain_factor = 40; - sampler_8x8[index].dw3.non_edge_weight = 1; - sampler_8x8[index].dw3.regular_weight = 2; - sampler_8x8[index].dw3.strong_edge_weight = 7; - sampler_8x8[index].dw3.ief4_smooth_enable = 0; - - dri_bo_emit_reloc(pp_context->sampler_state_table.bo, - I915_GEM_DOMAIN_RENDER, - 0, - 0, - sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1), - pp_context->sampler_state_table.bo_8x8); - - /* sampler_8x8 V, index 12 */ - index = 12; - memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8)); - sampler_8x8[index].dw0.disable_8x8_filter = 0; - sampler_8x8[index].dw0.global_noise_estimation = 255; - sampler_8x8[index].dw0.ief_bypass = 1; - sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5; - sampler_8x8[index].dw2.weak_edge_threshold = 1; - sampler_8x8[index].dw2.strong_edge_threshold = 8; - sampler_8x8[index].dw2.r5x_coefficient = 9; - sampler_8x8[index].dw2.r5cx_coefficient = 8; - sampler_8x8[index].dw2.r5c_coefficient = 3; - sampler_8x8[index].dw3.r3x_coefficient = 27; - sampler_8x8[index].dw3.r3c_coefficient = 5; - sampler_8x8[index].dw3.gain_factor = 40; - sampler_8x8[index].dw3.non_edge_weight = 1; - sampler_8x8[index].dw3.regular_weight = 2; - sampler_8x8[index].dw3.strong_edge_weight = 7; - sampler_8x8[index].dw3.ief4_smooth_enable = 0; - - dri_bo_emit_reloc(pp_context->sampler_state_table.bo, - I915_GEM_DOMAIN_RENDER, - 0, - 0, - sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1), - pp_context->sampler_state_table.bo_8x8); - - dri_bo_unmap(pp_context->sampler_state_table.bo); - - /* private function & data */ - pp_context->pp_x_steps = gen7_pp_avs_x_steps; - pp_context->pp_y_steps = gen7_pp_avs_y_steps; - pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter; - - pp_avs_context->dest_x = dst_rect->x; - pp_avs_context->dest_y = dst_rect->y; - pp_avs_context->dest_w = ALIGN(dst_rect->width, 16); - pp_avs_context->dest_h = ALIGN(dst_rect->height, 16); - pp_avs_context->src_w = src_rect->width; - pp_avs_context->src_h = src_rect->height; - pp_avs_context->horiz_range = (float)src_rect->width / src_width; - - int dw = (pp_avs_context->src_w - 1) / 16 + 1; - dw = MAX(dw, pp_avs_context->dest_w); - - pp_static_parameter->grf1.pointer_to_inline_parameter = 7; - pp_static_parameter->grf2.avs_wa_enable = 0; /* It is unnecessary to use WA for RGBX surface */ - pp_static_parameter->grf2.avs_wa_width = dw; - pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * dw); - pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * dw); + + pp_static_parameter->grf2.avs_wa_width = src_width; + pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * src_width); + pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * src_width); + pp_static_parameter->grf2.alpha = 255; pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw; - pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / pp_avs_context->dest_h; + pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / dst_rect->height; pp_static_parameter->grf5.sampler_load_vertical_frame_origin = (float) src_rect->y / src_height - - (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step; + (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step; pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = (float) src_rect->x / src_width - - (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw; + (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw; + gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface); dst_surface->flags = src_surface->flags; @@ -3106,77 +2982,177 @@ pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_contex void *filter_param) { struct i965_driver_data *i965 = i965_driver_data(ctx); - struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context; + struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->pp_dndi_context; struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter; struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; - struct object_surface *obj_surface; + struct object_surface *previous_in_obj_surface, *current_in_obj_surface, *previous_out_obj_surface, *current_out_obj_surface; struct i965_sampler_dndi *sampler_dndi; int index; int w, h; int orig_w, orig_h; int dndi_top_first = 1; + VAProcFilterParameterBufferDeinterlacing *di_filter_param = (VAProcFilterParameterBufferDeinterlacing *)filter_param; + int is_first_frame = (pp_dndi_context->frame_order == -1); - if (src_surface->flags == I965_SURFACE_FLAG_FRAME) - return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED; - - if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) - dndi_top_first = 1; - else + if (di_filter_param->flags & VA_DEINTERLACING_BOTTOM_FIELD) dndi_top_first = 0; + else + dndi_top_first = 1; /* surface */ - obj_surface = (struct object_surface *)src_surface->base; - orig_w = obj_surface->orig_width; - orig_h = obj_surface->orig_height; - w = obj_surface->width; - h = obj_surface->height; + current_in_obj_surface = (struct object_surface *)src_surface->base; + + if (di_filter_param->algorithm == VAProcDeinterlacingBob) { + previous_in_obj_surface = current_in_obj_surface; + is_first_frame = 1; + } else if (di_filter_param->algorithm == VAProcDeinterlacingMotionAdaptive) { + if (pp_dndi_context->frame_order == 0) { + VAProcPipelineParameterBuffer *pipeline_param = pp_context->pipeline_param; + if (!pipeline_param || + !pipeline_param->num_forward_references || + pipeline_param->forward_references[0] == VA_INVALID_ID) { + WARN_ONCE("A forward temporal reference is needed for Motion adaptive deinterlacing !!!\n"); + + return VA_STATUS_ERROR_INVALID_PARAMETER; + } else { + previous_in_obj_surface = SURFACE(pipeline_param->forward_references[0]); + assert(previous_in_obj_surface && previous_in_obj_surface->bo); - if (pp_context->stmm.bo == NULL) { - pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr, - "STMM surface", - w * h, - 4096); - assert(pp_context->stmm.bo); + is_first_frame = 0; + } + } else if (pp_dndi_context->frame_order == 1) { + vpp_surface_convert(ctx, + pp_dndi_context->current_out_obj_surface, + (struct object_surface *)dst_surface->base); + pp_dndi_context->frame_order = (pp_dndi_context->frame_order + 1) % 2; + is_first_frame = 0; + + return VA_STATUS_SUCCESS_1; + } else { + previous_in_obj_surface = current_in_obj_surface; + is_first_frame = 1; + } + } else { + return VA_STATUS_ERROR_UNIMPLEMENTED; } + /* source (temporal reference) YUV surface index 5 */ + orig_w = previous_in_obj_surface->orig_width; + orig_h = previous_in_obj_surface->orig_height; + w = previous_in_obj_surface->width; + h = previous_in_obj_surface->height; + i965_pp_set_surface2_state(ctx, pp_context, + previous_in_obj_surface->bo, 0, + orig_w, orig_h, w, + 0, h, + SURFACE_FORMAT_PLANAR_420_8, 1, + 5); + + /* source surface */ + orig_w = current_in_obj_surface->orig_width; + orig_h = current_in_obj_surface->orig_height; + w = current_in_obj_surface->width; + h = current_in_obj_surface->height; + /* source UV surface index 2 */ i965_pp_set_surface_state(ctx, pp_context, - obj_surface->bo, w * h, + current_in_obj_surface->bo, w * h, orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 2, 0); /* source YUV surface index 4 */ i965_pp_set_surface2_state(ctx, pp_context, - obj_surface->bo, 0, + current_in_obj_surface->bo, 0, orig_w, orig_h, w, 0, h, SURFACE_FORMAT_PLANAR_420_8, 1, 4); - /* source STMM surface index 20 */ + /* source STMM surface index 6 */ + if (pp_dndi_context->stmm_bo == NULL) { + pp_dndi_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr, + "STMM surface", + w * h, + 4096); + assert(pp_dndi_context->stmm_bo); + } + i965_pp_set_surface_state(ctx, pp_context, - pp_context->stmm.bo, 0, + pp_dndi_context->stmm_bo, 0, orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, - 20, 1); + 6, 0); - /* destination surface */ - obj_surface = (struct object_surface *)dst_surface->base; - orig_w = obj_surface->orig_width; - orig_h = obj_surface->orig_height; - w = obj_surface->width; - h = obj_surface->height; + /* destination (Previous frame) */ + previous_out_obj_surface = (struct object_surface *)dst_surface->base; + orig_w = previous_out_obj_surface->orig_width; + orig_h = previous_out_obj_surface->orig_height; + w = previous_out_obj_surface->width; + h = previous_out_obj_surface->height; - /* destination Y surface index 7 */ + if (is_first_frame) { + current_out_obj_surface = previous_out_obj_surface; + } else { + VAStatus va_status; + + if (pp_dndi_context->current_out_surface == VA_INVALID_SURFACE) { + unsigned int tiling = 0, swizzle = 0; + dri_bo_get_tiling(previous_out_obj_surface->bo, &tiling, &swizzle); + + va_status = i965_CreateSurfaces(ctx, + orig_w, + orig_h, + VA_RT_FORMAT_YUV420, + 1, + &pp_dndi_context->current_out_surface); + assert(va_status == VA_STATUS_SUCCESS); + pp_dndi_context->current_out_obj_surface = SURFACE(pp_dndi_context->current_out_surface); + assert(pp_dndi_context->current_out_obj_surface); + i965_check_alloc_surface_bo(ctx, + pp_dndi_context->current_out_obj_surface, + tiling != I915_TILING_NONE, + VA_FOURCC_NV12, + SUBSAMPLE_YUV420); + } + + current_out_obj_surface = pp_dndi_context->current_out_obj_surface; + } + + /* destination (Previous frame) Y surface index 7 */ i965_pp_set_surface_state(ctx, pp_context, - obj_surface->bo, 0, + previous_out_obj_surface->bo, 0, orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 7, 1); - /* destination UV surface index 8 */ + /* destination (Previous frame) UV surface index 8 */ i965_pp_set_surface_state(ctx, pp_context, - obj_surface->bo, w * h, + previous_out_obj_surface->bo, w * h, orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 8, 1); + + /* destination(Current frame) */ + orig_w = current_out_obj_surface->orig_width; + orig_h = current_out_obj_surface->orig_height; + w = current_out_obj_surface->width; + h = current_out_obj_surface->height; + + /* destination (Current frame) Y surface index xxx */ + i965_pp_set_surface_state(ctx, pp_context, + current_out_obj_surface->bo, 0, + orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, + 10, 1); + + /* destination (Current frame) UV surface index xxx */ + i965_pp_set_surface_state(ctx, pp_context, + current_out_obj_surface->bo, w * h, + orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, + 11, 1); + + /* STMM output surface, index 20 */ + i965_pp_set_surface_state(ctx, pp_context, + pp_dndi_context->stmm_bo, 0, + orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, + 20, 1); + /* sampler dndi */ dri_bo_map(pp_context->sampler_state_table.bo, True); assert(pp_context->sampler_state_table.bo->virtual); @@ -3185,61 +3161,62 @@ pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_contex /* sample dndi index 1 */ index = 0; - sampler_dndi[index].dw0.denoise_asd_threshold = 0; - sampler_dndi[index].dw0.denoise_history_delta = 8; // 0-15, default is 8 - sampler_dndi[index].dw0.denoise_maximum_history = 128; // 128-240 - sampler_dndi[index].dw0.denoise_stad_threshold = 0; + sampler_dndi[index].dw0.denoise_asd_threshold = 38; + sampler_dndi[index].dw0.denoise_history_delta = 7; // 0-15, default is 8 + sampler_dndi[index].dw0.denoise_maximum_history = 192; // 128-240 + sampler_dndi[index].dw0.denoise_stad_threshold = 140; - sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64; - sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 4; + sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 38; + sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 1; sampler_dndi[index].dw1.stmm_c2 = 1; - sampler_dndi[index].dw1.low_temporal_difference_threshold = 8; - sampler_dndi[index].dw1.temporal_difference_threshold = 16; + sampler_dndi[index].dw1.low_temporal_difference_threshold = 0; + sampler_dndi[index].dw1.temporal_difference_threshold = 0; - sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15; // 0-31 - sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7; // 0-15 + sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 20; // 0-31 + sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 1; // 0-15 sampler_dndi[index].dw2.denoise_edge_threshold = 7; // 0-15 - sampler_dndi[index].dw2.good_neighbor_threshold = 4; // 0-63 + sampler_dndi[index].dw2.good_neighbor_threshold = 12; // 0-63 - sampler_dndi[index].dw3.maximum_stmm = 128; - sampler_dndi[index].dw3.multipler_for_vecm = 2; - sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0; + sampler_dndi[index].dw3.maximum_stmm = 150; + sampler_dndi[index].dw3.multipler_for_vecm = 30; + sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 125; sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64; sampler_dndi[index].dw3.stmm_blending_constant_select = 0; - sampler_dndi[index].dw4.sdi_delta = 8; - sampler_dndi[index].dw4.sdi_threshold = 128; - sampler_dndi[index].dw4.stmm_output_shift = 7; // stmm_max - stmm_min = 2 ^ stmm_output_shift - sampler_dndi[index].dw4.stmm_shift_up = 0; + sampler_dndi[index].dw4.sdi_delta = 5; + sampler_dndi[index].dw4.sdi_threshold = 100; + sampler_dndi[index].dw4.stmm_output_shift = 5; // stmm_max - stmm_min = 2 ^ stmm_output_shift + sampler_dndi[index].dw4.stmm_shift_up = 1; sampler_dndi[index].dw4.stmm_shift_down = 0; - sampler_dndi[index].dw4.minimum_stmm = 0; + sampler_dndi[index].dw4.minimum_stmm = 118; - sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 8; - sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 32; - sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 64; - sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 32; + sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 175; + sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 37; + sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 100; + sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 50; sampler_dndi[index].dw6.dn_enable = 1; sampler_dndi[index].dw6.di_enable = 1; sampler_dndi[index].dw6.di_partial = 0; sampler_dndi[index].dw6.dndi_top_first = dndi_top_first; sampler_dndi[index].dw6.dndi_stream_id = 0; - sampler_dndi[index].dw6.dndi_first_frame = 1; + sampler_dndi[index].dw6.dndi_first_frame = is_first_frame; sampler_dndi[index].dw6.progressive_dn = 0; - sampler_dndi[index].dw6.fmd_tear_threshold = 63; - sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32; - sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32; + sampler_dndi[index].dw6.fmd_tear_threshold = 2; + sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 100; + sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 16; sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0; sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0; sampler_dndi[index].dw7.vdi_walker_enable = 0; - sampler_dndi[index].dw7.column_width_minus1 = 0; + sampler_dndi[index].dw7.column_width_minus1 = w / 16; dri_bo_unmap(pp_context->sampler_state_table.bo); /* private function & data */ pp_context->pp_x_steps = pp_dndi_x_steps; pp_context->pp_y_steps = pp_dndi_y_steps; + pp_context->private_context = &pp_context->pp_dndi_context; pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter; pp_static_parameter->grf1.statistics_surface_picth = w / 2; @@ -3257,6 +3234,8 @@ pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_contex dst_surface->flags = I965_SURFACE_FLAG_FRAME; + pp_dndi_context->frame_order = (pp_dndi_context->frame_order + 1) % 2; + return VA_STATUS_SUCCESS; } @@ -3294,7 +3273,7 @@ pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context void *filter_param) { struct i965_driver_data *i965 = i965_driver_data(ctx); - struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context; + struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->pp_dn_context; struct object_surface *obj_surface; struct i965_sampler_dndi *sampler_dndi; struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; @@ -3337,12 +3316,12 @@ pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context w = obj_surface->width; h = obj_surface->height; - if (pp_context->stmm.bo == NULL) { - pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr, - "STMM surface", - w * h, - 4096); - assert(pp_context->stmm.bo); + if (pp_dn_context->stmm_bo == NULL) { + pp_dn_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr, + "STMM surface", + w * h, + 4096); + assert(pp_dn_context->stmm_bo); } /* source UV surface index 2 */ @@ -3361,7 +3340,7 @@ pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context /* source STMM surface index 20 */ i965_pp_set_surface_state(ctx, pp_context, - pp_context->stmm.bo, 0, + pp_dn_context->stmm_bo, 0, orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 20, 1); @@ -3446,6 +3425,7 @@ pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context /* private function & data */ pp_context->pp_x_steps = pp_dn_x_steps; pp_context->pp_y_steps = pp_dn_y_steps; + pp_context->private_context = &pp_context->pp_dn_context; pp_context->pp_set_block_parameter = pp_dn_set_block_parameter; pp_static_parameter->grf1.statistics_surface_picth = w / 2; @@ -3502,100 +3482,172 @@ gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_c void *filter_param) { struct i965_driver_data *i965 = i965_driver_data(ctx); - struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context; + struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->pp_dndi_context; struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; - struct object_surface *obj_surface; + struct object_surface *previous_in_obj_surface, *current_in_obj_surface, *previous_out_obj_surface, *current_out_obj_surface; struct gen7_sampler_dndi *sampler_dndi; int index; int w, h; int orig_w, orig_h; int dndi_top_first = 1; + VAProcFilterParameterBufferDeinterlacing *di_filter_param = (VAProcFilterParameterBufferDeinterlacing *)filter_param; + int is_first_frame = (pp_dndi_context->frame_order == -1); - if (src_surface->flags == I965_SURFACE_FLAG_FRAME) - return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED; - - if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) - dndi_top_first = 1; - else + if (di_filter_param->flags & VA_DEINTERLACING_BOTTOM_FIELD) dndi_top_first = 0; + else + dndi_top_first = 1; /* surface */ - obj_surface = (struct object_surface *)src_surface->base; - orig_w = obj_surface->orig_width; - orig_h = obj_surface->orig_height; - w = obj_surface->width; - h = obj_surface->height; + current_in_obj_surface = (struct object_surface *)src_surface->base; + + if (di_filter_param->algorithm == VAProcDeinterlacingBob) { + previous_in_obj_surface = current_in_obj_surface; + is_first_frame = 1; + } else if (di_filter_param->algorithm == VAProcDeinterlacingMotionAdaptive) { + if (pp_dndi_context->frame_order == 0) { + VAProcPipelineParameterBuffer *pipeline_param = pp_context->pipeline_param; + if (!pipeline_param || + !pipeline_param->num_forward_references || + pipeline_param->forward_references[0] == VA_INVALID_ID) { + WARN_ONCE("A forward temporal reference is needed for Motion adaptive deinterlacing !!!\n"); + + return VA_STATUS_ERROR_INVALID_PARAMETER; + } else { + previous_in_obj_surface = SURFACE(pipeline_param->forward_references[0]); + assert(previous_in_obj_surface && previous_in_obj_surface->bo); - if (pp_context->stmm.bo == NULL) { - pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr, - "STMM surface", - w * h, - 4096); - assert(pp_context->stmm.bo); + is_first_frame = 0; + } + } else if (pp_dndi_context->frame_order == 1) { + vpp_surface_convert(ctx, + pp_dndi_context->current_out_obj_surface, + (struct object_surface *)dst_surface->base); + pp_dndi_context->frame_order = (pp_dndi_context->frame_order + 1) % 2; + is_first_frame = 0; + + return VA_STATUS_SUCCESS_1; + } else { + previous_in_obj_surface = current_in_obj_surface; + is_first_frame = 1; + } + } else { + return VA_STATUS_ERROR_UNIMPLEMENTED; } + /* source (temporal reference) YUV surface index 4 */ + orig_w = previous_in_obj_surface->orig_width; + orig_h = previous_in_obj_surface->orig_height; + w = previous_in_obj_surface->width; + h = previous_in_obj_surface->height; + gen7_pp_set_surface2_state(ctx, pp_context, + previous_in_obj_surface->bo, 0, + orig_w, orig_h, w, + 0, h, + SURFACE_FORMAT_PLANAR_420_8, 1, + 4); + + /* source surface */ + orig_w = current_in_obj_surface->orig_width; + orig_h = current_in_obj_surface->orig_height; + w = current_in_obj_surface->width; + h = current_in_obj_surface->height; + /* source UV surface index 1 */ gen7_pp_set_surface_state(ctx, pp_context, - obj_surface->bo, w * h, + current_in_obj_surface->bo, w * h, orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 1, 0); /* source YUV surface index 3 */ gen7_pp_set_surface2_state(ctx, pp_context, - obj_surface->bo, 0, + current_in_obj_surface->bo, 0, orig_w, orig_h, w, 0, h, SURFACE_FORMAT_PLANAR_420_8, 1, 3); - /* source (temporal reference) YUV surface index 4 */ - gen7_pp_set_surface2_state(ctx, pp_context, - obj_surface->bo, 0, - orig_w, orig_h, w, - 0, h, - SURFACE_FORMAT_PLANAR_420_8, 1, - 4); - /* STMM / History Statistics input surface, index 5 */ + if (pp_dndi_context->stmm_bo == NULL) { + pp_dndi_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr, + "STMM surface", + w * h, + 4096); + assert(pp_dndi_context->stmm_bo); + } + gen7_pp_set_surface_state(ctx, pp_context, - pp_context->stmm.bo, 0, + pp_dndi_context->stmm_bo, 0, orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 5, 1); /* destination surface */ - obj_surface = (struct object_surface *)dst_surface->base; - orig_w = obj_surface->orig_width; - orig_h = obj_surface->orig_height; - w = obj_surface->width; - h = obj_surface->height; + previous_out_obj_surface = (struct object_surface *)dst_surface->base; + orig_w = previous_out_obj_surface->orig_width; + orig_h = previous_out_obj_surface->orig_height; + w = previous_out_obj_surface->width; + h = previous_out_obj_surface->height; + + if (is_first_frame) { + current_out_obj_surface = previous_out_obj_surface; + } else { + VAStatus va_status; + + if (pp_dndi_context->current_out_surface == VA_INVALID_SURFACE) { + unsigned int tiling = 0, swizzle = 0; + dri_bo_get_tiling(previous_out_obj_surface->bo, &tiling, &swizzle); + + va_status = i965_CreateSurfaces(ctx, + orig_w, + orig_h, + VA_RT_FORMAT_YUV420, + 1, + &pp_dndi_context->current_out_surface); + assert(va_status == VA_STATUS_SUCCESS); + pp_dndi_context->current_out_obj_surface = SURFACE(pp_dndi_context->current_out_surface); + assert(pp_dndi_context->current_out_obj_surface); + i965_check_alloc_surface_bo(ctx, + pp_dndi_context->current_out_obj_surface, + tiling != I915_TILING_NONE, + VA_FOURCC_NV12, + SUBSAMPLE_YUV420); + } + + current_out_obj_surface = pp_dndi_context->current_out_obj_surface; + } /* destination(Previous frame) Y surface index 27 */ gen7_pp_set_surface_state(ctx, pp_context, - obj_surface->bo, 0, + previous_out_obj_surface->bo, 0, orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 27, 1); /* destination(Previous frame) UV surface index 28 */ gen7_pp_set_surface_state(ctx, pp_context, - obj_surface->bo, w * h, + previous_out_obj_surface->bo, w * h, orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 28, 1); /* destination(Current frame) Y surface index 30 */ gen7_pp_set_surface_state(ctx, pp_context, - obj_surface->bo, 0, + current_out_obj_surface->bo, 0, orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 30, 1); /* destination(Current frame) UV surface index 31 */ + orig_w = current_out_obj_surface->orig_width; + orig_h = current_out_obj_surface->orig_height; + w = current_out_obj_surface->width; + h = current_out_obj_surface->height; + gen7_pp_set_surface_state(ctx, pp_context, - obj_surface->bo, w * h, + current_out_obj_surface->bo, w * h, orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 31, 1); /* STMM output surface, index 33 */ gen7_pp_set_surface_state(ctx, pp_context, - pp_context->stmm.bo, 0, + pp_dndi_context->stmm_bo, 0, orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 33, 1); @@ -3608,55 +3660,55 @@ gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_c /* sample dndi index 0 */ index = 0; - sampler_dndi[index].dw0.denoise_asd_threshold = 0; - sampler_dndi[index].dw0.dnmh_delt = 8; + sampler_dndi[index].dw0.denoise_asd_threshold = 38; + sampler_dndi[index].dw0.dnmh_delt = 7; sampler_dndi[index].dw0.vdi_walker_y_stride = 0; sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0; - sampler_dndi[index].dw0.denoise_maximum_history = 128; // 128-240 - sampler_dndi[index].dw0.denoise_stad_threshold = 0; + sampler_dndi[index].dw0.denoise_maximum_history = 192; // 128-240 + sampler_dndi[index].dw0.denoise_stad_threshold = 140; - sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64; - sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0; - sampler_dndi[index].dw1.stmm_c2 = 0; - sampler_dndi[index].dw1.low_temporal_difference_threshold = 8; - sampler_dndi[index].dw1.temporal_difference_threshold = 16; + sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 38; + sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 1; + sampler_dndi[index].dw1.stmm_c2 = 2; + sampler_dndi[index].dw1.low_temporal_difference_threshold = 0; + sampler_dndi[index].dw1.temporal_difference_threshold = 0; - sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15; // 0-31 + sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 20; // 0-31 sampler_dndi[index].dw2.bne_edge_th = 1; sampler_dndi[index].dw2.smooth_mv_th = 0; sampler_dndi[index].dw2.sad_tight_th = 5; sampler_dndi[index].dw2.cat_slope_minus1 = 9; - sampler_dndi[index].dw2.good_neighbor_th = 4; + sampler_dndi[index].dw2.good_neighbor_th = 12; - sampler_dndi[index].dw3.maximum_stmm = 128; - sampler_dndi[index].dw3.multipler_for_vecm = 2; - sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0; + sampler_dndi[index].dw3.maximum_stmm = 150; + sampler_dndi[index].dw3.multipler_for_vecm = 30; + sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 125; sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64; sampler_dndi[index].dw3.stmm_blending_constant_select = 0; - sampler_dndi[index].dw4.sdi_delta = 8; - sampler_dndi[index].dw4.sdi_threshold = 128; - sampler_dndi[index].dw4.stmm_output_shift = 7; // stmm_max - stmm_min = 2 ^ stmm_output_shift - sampler_dndi[index].dw4.stmm_shift_up = 0; + sampler_dndi[index].dw4.sdi_delta = 5; + sampler_dndi[index].dw4.sdi_threshold = 100; + sampler_dndi[index].dw4.stmm_output_shift = 5; // stmm_max - stmm_min = 2 ^ stmm_output_shift + sampler_dndi[index].dw4.stmm_shift_up = 1; sampler_dndi[index].dw4.stmm_shift_down = 0; - sampler_dndi[index].dw4.minimum_stmm = 0; + sampler_dndi[index].dw4.minimum_stmm = 118; - sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0; - sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0; - sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0; - sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0; + sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 175; + sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 37; + sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 100; + sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 50; sampler_dndi[index].dw6.dn_enable = 0; sampler_dndi[index].dw6.di_enable = 1; sampler_dndi[index].dw6.di_partial = 0; sampler_dndi[index].dw6.dndi_top_first = dndi_top_first; sampler_dndi[index].dw6.dndi_stream_id = 1; - sampler_dndi[index].dw6.dndi_first_frame = 1; + sampler_dndi[index].dw6.dndi_first_frame = is_first_frame; sampler_dndi[index].dw6.progressive_dn = 0; sampler_dndi[index].dw6.mcdi_enable = 0; - sampler_dndi[index].dw6.fmd_tear_threshold = 32; + sampler_dndi[index].dw6.fmd_tear_threshold = 2; sampler_dndi[index].dw6.cat_th1 = 0; - sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32; - sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32; + sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 100; + sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 16; sampler_dndi[index].dw7.sad_tha = 5; sampler_dndi[index].dw7.sad_thb = 10; @@ -3672,6 +3724,7 @@ gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_c /* private function & data */ pp_context->pp_x_steps = gen7_pp_dndi_x_steps; pp_context->pp_y_steps = gen7_pp_dndi_y_steps; + pp_context->private_context = &pp_context->pp_dndi_context; pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter; pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2; @@ -3691,6 +3744,8 @@ gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_c dst_surface->flags = I965_SURFACE_FLAG_FRAME; + pp_dndi_context->frame_order = (pp_dndi_context->frame_order + 1) % 2; + return VA_STATUS_SUCCESS; } @@ -3730,7 +3785,7 @@ gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_con void *filter_param) { struct i965_driver_data *i965 = i965_driver_data(ctx); - struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context; + struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->pp_dn_context; struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter; struct object_surface *obj_surface; struct gen7_sampler_dndi *sampler_dn; @@ -3772,12 +3827,12 @@ gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_con w = obj_surface->width; h = obj_surface->height; - if (pp_context->stmm.bo == NULL) { - pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr, - "STMM surface", - w * h, - 4096); - assert(pp_context->stmm.bo); + if (pp_dn_context->stmm_bo == NULL) { + pp_dn_context->stmm_bo= dri_bo_alloc(i965->intel.bufmgr, + "STMM surface", + w * h, + 4096); + assert(pp_dn_context->stmm_bo); } /* source UV surface index 1 */ @@ -3804,7 +3859,7 @@ gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_con /* STMM / History Statistics input surface, index 5 */ gen7_pp_set_surface_state(ctx, pp_context, - pp_context->stmm.bo, 0, + pp_dn_context->stmm_bo, 0, orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 33, 1); @@ -3900,6 +3955,7 @@ gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_con /* private function & data */ pp_context->pp_x_steps = gen7_pp_dn_x_steps; pp_context->pp_y_steps = gen7_pp_dn_y_steps; + pp_context->private_context = &pp_context->pp_dn_context; pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter; pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2; @@ -4131,7 +4187,7 @@ gen6_pp_initialize( assert(bo); pp_context->vfe_state.bo = bo; - if (IS_GEN7(i965->intel.device_id)) { + if (IS_GEN7(i965->intel.device_info)) { static_param_size = sizeof(struct gen7_pp_static_parameter); inline_param_size = sizeof(struct gen7_pp_inline_parameter); } else { @@ -4161,6 +4217,7 @@ gen6_pp_initialize( return va_status; } + static void gen6_pp_interface_descriptor_table(VADriverContextP ctx, struct i965_post_processing_context *pp_context) @@ -4186,7 +4243,7 @@ gen6_pp_interface_descriptor_table(VADriverContextP ctx, desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5); desc->desc4.constant_urb_entry_read_offset = 0; - if (IS_GEN7(i965->intel.device_id)) + if (IS_GEN7(i965->intel.device_info)) desc->desc4.constant_urb_entry_read_length = 6; /* grf 1-6 */ else desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */ @@ -4218,7 +4275,7 @@ gen6_pp_upload_constants(VADriverContextP ctx, assert(sizeof(struct pp_static_parameter) == 128); assert(sizeof(struct gen7_pp_static_parameter) == 192); - if (IS_GEN7(i965->intel.device_id)) + if (IS_GEN7(i965->intel.device_info)) param_size = sizeof(struct gen7_pp_static_parameter); else param_size = sizeof(struct pp_static_parameter); @@ -4279,12 +4336,14 @@ gen6_pp_vfe_state(VADriverContextP ctx, OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2)); OUT_BATCH(batch, 0); OUT_BATCH(batch, - (pp_context->urb.num_vfe_entries - 1) << 16 | - pp_context->urb.num_vfe_entries << 8); + (pp_context->vfe_gpu_state.max_num_threads - 1) << 16 | + pp_context->vfe_gpu_state.num_urb_entries << 8); OUT_BATCH(batch, 0); OUT_BATCH(batch, - (pp_context->urb.size_vfe_entry * 2) << 16 | /* URB Entry Allocation Size, in 256 bits unit */ - (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2)); /* CURBE Allocation Size, in 256 bits unit */ + (pp_context->vfe_gpu_state.urb_entry_size) << 16 | + /* URB Entry Allocation Size, in 256 bits unit */ + (pp_context->vfe_gpu_state.curbe_allocation_size)); + /* CURBE Allocation Size, in 256 bits unit */ OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); @@ -4296,14 +4355,19 @@ gen6_pp_curbe_load(VADriverContextP ctx, struct i965_post_processing_context *pp_context) { struct intel_batchbuffer *batch = pp_context->batch; + struct i965_driver_data *i965 = i965_driver_data(ctx); + int param_size; - assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32 <= pp_context->curbe.bo->size); + if (IS_GEN7(i965->intel.device_info)) + param_size = sizeof(struct gen7_pp_static_parameter); + else + param_size = sizeof(struct pp_static_parameter); BEGIN_BATCH(batch, 4); OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2)); OUT_BATCH(batch, 0); OUT_BATCH(batch, - pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32); + param_size); OUT_RELOC(batch, pp_context->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, @@ -4380,13 +4444,13 @@ gen6_pp_object_walker(VADriverContextP ctx, dri_bo *command_buffer; unsigned int *command_ptr; - if (IS_GEN7(i965->intel.device_id)) + if (IS_GEN7(i965->intel.device_info)) param_size = sizeof(struct gen7_pp_inline_parameter); else param_size = sizeof(struct pp_inline_parameter); - x_steps = pp_context->pp_x_steps(&pp_context->private_context); - y_steps = pp_context->pp_y_steps(&pp_context->private_context); + x_steps = pp_context->pp_x_steps(pp_context->private_context); + y_steps = pp_context->pp_y_steps(pp_context->private_context); command_length_in_dws = 6 + (param_size >> 2); command_buffer = dri_bo_alloc(i965->intel.bufmgr, "command objects buffer", @@ -4400,7 +4464,7 @@ gen6_pp_object_walker(VADriverContextP ctx, for (x = 0; x < x_steps; x++) { if (!pp_context->pp_set_block_parameter(pp_context, x, y)) { // some common block parameter update goes here, apply to all pp functions - if (IS_GEN6(i965->intel.device_id)) + if (IS_GEN6(i965->intel.device_info)) update_block_mask_parameter (pp_context, x, y, x_steps, y_steps); *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2)); @@ -4423,12 +4487,12 @@ gen6_pp_object_walker(VADriverContextP ctx, dri_bo_unmap(command_buffer); BEGIN_BATCH(batch, 2); - OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6)); - OUT_RELOC(batch, command_buffer, - I915_GEM_DOMAIN_COMMAND, 0, + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8)); + OUT_RELOC(batch, command_buffer, + I915_GEM_DOMAIN_COMMAND, 0, 0); ADVANCE_BATCH(batch); - + dri_bo_unreference(command_buffer); /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END @@ -4483,6 +4547,9 @@ gen6_post_processing( gen6_pp_pipeline_setup(ctx, pp_context); } + if (va_status == VA_STATUS_SUCCESS_1) + va_status = VA_STATUS_SUCCESS; + return va_status; } @@ -4501,27 +4568,18 @@ i965_post_processing_internal( VAStatus va_status; struct i965_driver_data *i965 = i965_driver_data(ctx); - if (IS_GEN6(i965->intel.device_id) || - IS_GEN7(i965->intel.device_id)) - va_status = gen6_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param); - else - va_status = ironlake_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param); - + if (pp_context && pp_context->intel_post_processing) { + va_status = (pp_context->intel_post_processing)(ctx, pp_context, + src_surface, src_rect, + dst_surface, dst_rect, + pp_index, filter_param); + } else { + va_status = VA_STATUS_ERROR_UNIMPLEMENTED; + } + return va_status; } -VAStatus -i965_DestroySurfaces(VADriverContextP ctx, - VASurfaceID *surface_list, - int num_surfaces); -VAStatus -i965_CreateSurfaces(VADriverContextP ctx, - int width, - int height, - int format, - int num_surfaces, - VASurfaceID *surfaces); - static void rgb_to_yuv(unsigned int argb, unsigned char *y, @@ -4554,7 +4612,7 @@ i965_vpp_clear_surface(VADriverContextP ctx, int region_width, region_height; /* Currently only support NV12 surface */ - if (!obj_surface || obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2')) + if (!obj_surface || obj_surface->fourcc != VA_FOURCC_NV12) return; rgb_to_yuv(color, &y, &u, &v, &a); @@ -4576,13 +4634,13 @@ i965_vpp_clear_surface(VADriverContextP ctx, br13 |= BR13_8; br13 |= pitch; - if (IS_GEN6(i965->intel.device_id) || - IS_GEN7(i965->intel.device_id)) { - intel_batchbuffer_start_atomic_blt(batch, 48); - BEGIN_BLT_BATCH(batch, 12); - } else { + if (IS_IRONLAKE(i965->intel.device_info)) { intel_batchbuffer_start_atomic(batch, 48); BEGIN_BATCH(batch, 12); + } else { + /* Will double-check the command if the new chipset is added */ + intel_batchbuffer_start_atomic_blt(batch, 48); + BEGIN_BLT_BATCH(batch, 12); } region_width = obj_surface->width; @@ -4641,10 +4699,10 @@ i965_scaling_processing( VAStatus va_status = VA_STATUS_SUCCESS; struct i965_driver_data *i965 = i965_driver_data(ctx); - assert(src_surface_obj->fourcc == VA_FOURCC('N', 'V', '1', '2')); - assert(dst_surface_obj->fourcc == VA_FOURCC('N', 'V', '1', '2')); + assert(src_surface_obj->fourcc == VA_FOURCC_NV12); + assert(dst_surface_obj->fourcc == VA_FOURCC_NV12); - if (HAS_PP(i965) && (flags & I965_PP_FLAG_AVS)) { + if (HAS_VPP(i965) && (flags & I965_PP_FLAG_AVS)) { struct i965_surface src_surface; struct i965_surface dst_surface; @@ -4687,13 +4745,13 @@ i965_post_processing( *has_done_scaling = 0; - if (HAS_PP(i965)) { + if (HAS_VPP(i965)) { VAStatus status; struct i965_surface src_surface; struct i965_surface dst_surface; /* Currently only support post processing for NV12 surface */ - if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2')) + if (obj_surface->fourcc != VA_FOURCC_NV12) return out_surface_id; _i965LockMutex(&i965->pp_mutex); @@ -4713,7 +4771,7 @@ i965_post_processing( assert(status == VA_STATUS_SUCCESS); obj_surface = SURFACE(out_surface_id); assert(obj_surface); - i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420); i965_vpp_clear_surface(ctx, i965->pp_context, obj_surface, 0); dst_surface.base = (struct object_base *)obj_surface; @@ -4749,7 +4807,7 @@ i965_post_processing( assert(status == VA_STATUS_SUCCESS); obj_surface = SURFACE(out_surface_id); assert(obj_surface); - i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420); i965_vpp_clear_surface(ctx, i965->pp_context, obj_surface, 0); dst_surface.base = (struct object_base *)obj_surface; @@ -4777,6 +4835,70 @@ i965_post_processing( } static VAStatus +i965_image_pl2_processing(VADriverContextP ctx, + const struct i965_surface *src_surface, + const VARectangle *src_rect, + struct i965_surface *dst_surface, + const VARectangle *dst_rect); + +static VAStatus +i965_image_plx_nv12_plx_processing(VADriverContextP ctx, + VAStatus (*i965_image_plx_nv12_processing)( + VADriverContextP, + const struct i965_surface *, + const VARectangle *, + struct i965_surface *, + const VARectangle *), + const struct i965_surface *src_surface, + const VARectangle *src_rect, + struct i965_surface *dst_surface, + const VARectangle *dst_rect) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + VAStatus status; + VASurfaceID tmp_surface_id = VA_INVALID_SURFACE; + struct object_surface *obj_surface = NULL; + struct i965_surface tmp_surface; + int width, height; + + pp_get_surface_size(ctx, dst_surface, &width, &height); + status = i965_CreateSurfaces(ctx, + width, + height, + VA_RT_FORMAT_YUV420, + 1, + &tmp_surface_id); + assert(status == VA_STATUS_SUCCESS); + obj_surface = SURFACE(tmp_surface_id); + assert(obj_surface); + i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420); + + tmp_surface.base = (struct object_base *)obj_surface; + tmp_surface.type = I965_SURFACE_TYPE_SURFACE; + tmp_surface.flags = I965_SURFACE_FLAG_FRAME; + + status = i965_image_plx_nv12_processing(ctx, + src_surface, + src_rect, + &tmp_surface, + dst_rect); + + if (status == VA_STATUS_SUCCESS) + status = i965_image_pl2_processing(ctx, + &tmp_surface, + dst_rect, + dst_surface, + dst_rect); + + i965_DestroySurfaces(ctx, + &tmp_surface_id, + 1); + + return status; +} + + +static VAStatus i965_image_pl1_rgbx_processing(VADriverContextP ctx, const struct i965_surface *src_surface, const VARectangle *src_rect, @@ -4786,23 +4908,31 @@ i965_image_pl1_rgbx_processing(VADriverContextP ctx, struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_post_processing_context *pp_context = i965->pp_context; int fourcc = pp_get_surface_fourcc(ctx, dst_surface); + VAStatus vaStatus; - if (fourcc == VA_FOURCC('N', 'V', '1', '2')) { - i965_post_processing_internal(ctx, i965->pp_context, - src_surface, - src_rect, - dst_surface, - dst_rect, - PP_RGBX_LOAD_SAVE_NV12, - NULL); - } else { - assert(0); - return VA_STATUS_ERROR_UNKNOWN; - } + switch (fourcc) { + case VA_FOURCC_NV12: + vaStatus = i965_post_processing_internal(ctx, i965->pp_context, + src_surface, + src_rect, + dst_surface, + dst_rect, + PP_RGBX_LOAD_SAVE_NV12, + NULL); + intel_batchbuffer_flush(pp_context->batch); + break; - intel_batchbuffer_flush(pp_context->batch); + default: + vaStatus = i965_image_plx_nv12_plx_processing(ctx, + i965_image_pl1_rgbx_processing, + src_surface, + src_rect, + dst_surface, + dst_rect); + break; + } - return VA_STATUS_SUCCESS; + return vaStatus; } static VAStatus @@ -4817,7 +4947,8 @@ i965_image_pl3_processing(VADriverContextP ctx, int fourcc = pp_get_surface_fourcc(ctx, dst_surface); VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED; - if (fourcc == VA_FOURCC('N', 'V', '1', '2')) { + switch (fourcc) { + case VA_FOURCC_NV12: vaStatus = i965_post_processing_internal(ctx, i965->pp_context, src_surface, src_rect, @@ -4825,10 +4956,13 @@ i965_image_pl3_processing(VADriverContextP ctx, dst_rect, PP_PL3_LOAD_SAVE_N12, NULL); - } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || - fourcc == VA_FOURCC('I', 'M', 'C', '3') || - fourcc == VA_FOURCC('Y', 'V', '1', '2') || - fourcc == VA_FOURCC('I', '4', '2', '0')) { + intel_batchbuffer_flush(pp_context->batch); + break; + + case VA_FOURCC_IMC1: + case VA_FOURCC_IMC3: + case VA_FOURCC_YV12: + case VA_FOURCC_I420: vaStatus = i965_post_processing_internal(ctx, i965->pp_context, src_surface, src_rect, @@ -4836,8 +4970,11 @@ i965_image_pl3_processing(VADriverContextP ctx, dst_rect, PP_PL3_LOAD_SAVE_PL3, NULL); - } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || - fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) { + intel_batchbuffer_flush(pp_context->batch); + break; + + case VA_FOURCC_YUY2: + case VA_FOURCC_UYVY: vaStatus = i965_post_processing_internal(ctx, i965->pp_context, src_surface, src_rect, @@ -4845,12 +4982,18 @@ i965_image_pl3_processing(VADriverContextP ctx, dst_rect, PP_PL3_LOAD_SAVE_PA, NULL); - } - else { - assert(0); - } + intel_batchbuffer_flush(pp_context->batch); + break; - intel_batchbuffer_flush(pp_context->batch); + default: + vaStatus = i965_image_plx_nv12_plx_processing(ctx, + i965_image_pl3_processing, + src_surface, + src_rect, + dst_surface, + dst_rect); + break; + } return vaStatus; } @@ -4867,7 +5010,8 @@ i965_image_pl2_processing(VADriverContextP ctx, int fourcc = pp_get_surface_fourcc(ctx, dst_surface); VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED; - if (fourcc == VA_FOURCC('N', 'V', '1', '2')) { + switch (fourcc) { + case VA_FOURCC_NV12: vaStatus = i965_post_processing_internal(ctx, i965->pp_context, src_surface, src_rect, @@ -4875,10 +5019,12 @@ i965_image_pl2_processing(VADriverContextP ctx, dst_rect, PP_NV12_LOAD_SAVE_N12, NULL); - } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || - fourcc == VA_FOURCC('I', 'M', 'C', '3') || - fourcc == VA_FOURCC('Y', 'V', '1', '2') || - fourcc == VA_FOURCC('I', '4', '2', '0') ) { + break; + + case VA_FOURCC_IMC1: + case VA_FOURCC_IMC3: + case VA_FOURCC_YV12: + case VA_FOURCC_I420: vaStatus = i965_post_processing_internal(ctx, i965->pp_context, src_surface, src_rect, @@ -4886,19 +5032,23 @@ i965_image_pl2_processing(VADriverContextP ctx, dst_rect, PP_NV12_LOAD_SAVE_PL3, NULL); - } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || - fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) { + break; + + case VA_FOURCC_YUY2: + case VA_FOURCC_UYVY: vaStatus = i965_post_processing_internal(ctx, i965->pp_context, src_surface, src_rect, dst_surface, dst_rect, PP_NV12_LOAD_SAVE_PA, - NULL); - } else if (fourcc == VA_FOURCC('B', 'G', 'R', 'X') || - fourcc == VA_FOURCC('B', 'G', 'R', 'A') || - fourcc == VA_FOURCC('R', 'G', 'B', 'X') || - fourcc == VA_FOURCC('R', 'G', 'B', 'A') ) { + NULL); + break; + + case VA_FOURCC_BGRX: + case VA_FOURCC_BGRA: + case VA_FOURCC_RGBX: + case VA_FOURCC_RGBA: vaStatus = i965_post_processing_internal(ctx, i965->pp_context, src_surface, src_rect, @@ -4906,9 +5056,10 @@ i965_image_pl2_processing(VADriverContextP ctx, dst_rect, PP_NV12_LOAD_SAVE_RGBX, NULL); - } else { - assert(0); - return VA_STATUS_ERROR_UNKNOWN; + break; + + default: + return VA_STATUS_ERROR_UNIMPLEMENTED; } intel_batchbuffer_flush(pp_context->batch); @@ -4926,31 +5077,54 @@ i965_image_pl1_processing(VADriverContextP ctx, struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_post_processing_context *pp_context = i965->pp_context; int fourcc = pp_get_surface_fourcc(ctx, dst_surface); + VAStatus vaStatus; - if (fourcc == VA_FOURCC('N', 'V', '1', '2')) { - i965_post_processing_internal(ctx, i965->pp_context, - src_surface, - src_rect, - dst_surface, - dst_rect, - PP_PA_LOAD_SAVE_NV12, - NULL); - } else if (fourcc == VA_FOURCC_YV12) { - i965_post_processing_internal(ctx, i965->pp_context, - src_surface, - src_rect, - dst_surface, - dst_rect, - PP_PA_LOAD_SAVE_PL3, - NULL); + switch (fourcc) { + case VA_FOURCC_NV12: + vaStatus = i965_post_processing_internal(ctx, i965->pp_context, + src_surface, + src_rect, + dst_surface, + dst_rect, + PP_PA_LOAD_SAVE_NV12, + NULL); + intel_batchbuffer_flush(pp_context->batch); + break; - } else { - return VA_STATUS_ERROR_UNKNOWN; - } + case VA_FOURCC_YV12: + vaStatus = i965_post_processing_internal(ctx, i965->pp_context, + src_surface, + src_rect, + dst_surface, + dst_rect, + PP_PA_LOAD_SAVE_PL3, + NULL); + intel_batchbuffer_flush(pp_context->batch); + break; - intel_batchbuffer_flush(pp_context->batch); + case VA_FOURCC_YUY2: + case VA_FOURCC_UYVY: + vaStatus = i965_post_processing_internal(ctx, i965->pp_context, + src_surface, + src_rect, + dst_surface, + dst_rect, + PP_PA_LOAD_SAVE_PA, + NULL); + intel_batchbuffer_flush(pp_context->batch); + break; - return VA_STATUS_SUCCESS; + default: + vaStatus = i965_image_plx_nv12_plx_processing(ctx, + i965_image_pl1_processing, + src_surface, + src_rect, + dst_surface, + dst_rect); + break; + } + + return vaStatus; } VAStatus @@ -4963,16 +5137,21 @@ i965_image_processing(VADriverContextP ctx, struct i965_driver_data *i965 = i965_driver_data(ctx); VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED; - if (HAS_PP(i965)) { + if (HAS_VPP(i965)) { int fourcc = pp_get_surface_fourcc(ctx, src_surface); _i965LockMutex(&i965->pp_mutex); switch (fourcc) { - case VA_FOURCC('Y', 'V', '1', '2'): - case VA_FOURCC('I', '4', '2', '0'): - case VA_FOURCC('I', 'M', 'C', '1'): - case VA_FOURCC('I', 'M', 'C', '3'): + case VA_FOURCC_YV12: + case VA_FOURCC_I420: + case VA_FOURCC_IMC1: + case VA_FOURCC_IMC3: + case VA_FOURCC_422H: + case VA_FOURCC_422V: + case VA_FOURCC_411P: + case VA_FOURCC_444P: + case VA_FOURCC_YV16: status = i965_image_pl3_processing(ctx, src_surface, src_rect, @@ -4980,25 +5159,25 @@ i965_image_processing(VADriverContextP ctx, dst_rect); break; - case VA_FOURCC('N', 'V', '1', '2'): + case VA_FOURCC_NV12: status = i965_image_pl2_processing(ctx, src_surface, src_rect, dst_surface, dst_rect); break; - case VA_FOURCC('Y', 'U', 'Y', '2'): - case VA_FOURCC('U', 'Y', 'V', 'Y'): + case VA_FOURCC_YUY2: + case VA_FOURCC_UYVY: status = i965_image_pl1_processing(ctx, src_surface, src_rect, dst_surface, dst_rect); break; - case VA_FOURCC('B', 'G', 'R', 'A'): - case VA_FOURCC('B', 'G', 'R', 'X'): - case VA_FOURCC('R', 'G', 'B', 'A'): - case VA_FOURCC('R', 'G', 'B', 'X'): + case VA_FOURCC_BGRA: + case VA_FOURCC_BGRX: + case VA_FOURCC_RGBA: + case VA_FOURCC_RGBX: status = i965_image_pl1_rgbx_processing(ctx, src_surface, src_rect, @@ -5043,8 +5222,11 @@ i965_post_processing_context_finalize(struct i965_post_processing_context *pp_co dri_bo_unreference(pp_context->vfe_state.bo); pp_context->vfe_state.bo = NULL; - dri_bo_unreference(pp_context->stmm.bo); - pp_context->stmm.bo = NULL; + dri_bo_unreference(pp_context->pp_dndi_context.stmm_bo); + pp_context->pp_dndi_context.stmm_bo = NULL; + + dri_bo_unreference(pp_context->pp_dn_context.stmm_bo); + pp_context->pp_dn_context.stmm_bo = NULL; for (i = 0; i < NUM_PP_MODULES; i++) { struct pp_module *pp_module = &pp_context->pp_modules[i]; @@ -5066,49 +5248,59 @@ i965_post_processing_terminate(VADriverContextP ctx) struct i965_post_processing_context *pp_context = i965->pp_context; if (pp_context) { - i965_post_processing_context_finalize(pp_context); + pp_context->finalize(pp_context); free(pp_context); } i965->pp_context = NULL; } -static void +#define VPP_CURBE_ALLOCATION_SIZE 32 + +void i965_post_processing_context_init(VADriverContextP ctx, - struct i965_post_processing_context *pp_context, + void *data, struct intel_batchbuffer *batch) { struct i965_driver_data *i965 = i965_driver_data(ctx); int i; + struct i965_post_processing_context *pp_context = data; - pp_context->urb.size = URB_SIZE((&i965->intel)); - pp_context->urb.num_vfe_entries = 32; - pp_context->urb.size_vfe_entry = 1; /* in 512 bits unit */ - pp_context->urb.num_cs_entries = 1; - - if (IS_GEN7(i965->intel.device_id)) - pp_context->urb.size_cs_entry = 4; /* in 512 bits unit */ - else + if (IS_IRONLAKE(i965->intel.device_info)) { + pp_context->urb.size = i965->intel.device_info->urb_size; + pp_context->urb.num_vfe_entries = 32; + pp_context->urb.size_vfe_entry = 1; /* in 512 bits unit */ + pp_context->urb.num_cs_entries = 1; pp_context->urb.size_cs_entry = 2; + pp_context->urb.vfe_start = 0; + pp_context->urb.cs_start = pp_context->urb.vfe_start + + pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry; + assert(pp_context->urb.cs_start + + pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= i965->intel.device_info->urb_size); + pp_context->intel_post_processing = ironlake_post_processing; + } else { + pp_context->vfe_gpu_state.max_num_threads = 60; + pp_context->vfe_gpu_state.num_urb_entries = 59; + pp_context->vfe_gpu_state.gpgpu_mode = 0; + pp_context->vfe_gpu_state.urb_entry_size = 16 - 1; + pp_context->vfe_gpu_state.curbe_allocation_size = VPP_CURBE_ALLOCATION_SIZE; + pp_context->intel_post_processing = gen6_post_processing; + } - pp_context->urb.vfe_start = 0; - pp_context->urb.cs_start = pp_context->urb.vfe_start + - pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry; - assert(pp_context->urb.cs_start + - pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel))); + pp_context->finalize = i965_post_processing_context_finalize; assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5)); assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6)); assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7)); assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen75)); - if (IS_HASWELL(i965->intel.device_id)) + if (IS_HASWELL(i965->intel.device_info)) memcpy(pp_context->pp_modules, pp_modules_gen75, sizeof(pp_context->pp_modules)); - else if (IS_GEN7(i965->intel.device_id)) + else if (IS_GEN7(i965->intel.device_info)) memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules)); - else if (IS_GEN6(i965->intel.device_id)) + else if (IS_GEN6(i965->intel.device_info)) memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules)); - else if (IS_IRONLAKE(i965->intel.device_id)) + else if (IS_IRONLAKE(i965->intel.device_info)) memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules)); for (i = 0; i < NUM_PP_MODULES; i++) { @@ -5127,7 +5319,7 @@ i965_post_processing_context_init(VADriverContextP ctx, } /* static & inline parameters */ - if (IS_GEN7(i965->intel.device_id)) { + if (IS_GEN7(i965->intel.device_info)) { pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1); pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1); } else { @@ -5135,6 +5327,9 @@ i965_post_processing_context_init(VADriverContextP ctx, pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1); } + pp_context->pp_dndi_context.current_out_surface = VA_INVALID_SURFACE; + pp_context->pp_dndi_context.current_out_obj_surface = NULL; + pp_context->pp_dndi_context.frame_order = -1; pp_context->batch = batch; } @@ -5144,10 +5339,10 @@ i965_post_processing_init(VADriverContextP ctx) struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_post_processing_context *pp_context = i965->pp_context; - if (HAS_PP(i965)) { + if (HAS_VPP(i965)) { if (pp_context == NULL) { pp_context = calloc(1, sizeof(*pp_context)); - i965_post_processing_context_init(ctx, pp_context, i965->batch); + i965->codec_info->post_processing_context_init(ctx, pp_context, i965->pp_batch); i965->pp_context = pp_context; } } @@ -5189,14 +5384,28 @@ i965_proc_picture(VADriverContextP ctx, unsigned int tiling = 0, swizzle = 0; int in_width, in_height; - assert(pipeline_param->surface != VA_INVALID_ID); - assert(proc_state->current_render_target != VA_INVALID_ID); + if (pipeline_param->surface == VA_INVALID_ID || + proc_state->current_render_target == VA_INVALID_ID) { + status = VA_STATUS_ERROR_INVALID_SURFACE; + goto error; + } obj_surface = SURFACE(pipeline_param->surface); - assert(obj_surface && obj_surface->bo); - if (!obj_surface || !obj_surface->bo) + if (!obj_surface) { + status = VA_STATUS_ERROR_INVALID_SURFACE; + goto error; + } + + if (!obj_surface->bo) { + status = VA_STATUS_ERROR_INVALID_VALUE; /* The input surface is created without valid content */ + goto error; + } + + if (pipeline_param->num_filters && !pipeline_param->filters) { + status = VA_STATUS_ERROR_INVALID_PARAMETER; goto error; + } in_width = obj_surface->orig_width; in_height = obj_surface->orig_height; @@ -5207,7 +5416,7 @@ i965_proc_picture(VADriverContextP ctx, src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3]; VASurfaceID out_surface_id = VA_INVALID_ID; - if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2')) { + if (obj_surface->fourcc != VA_FOURCC_NV12) { src_surface.base = (struct object_base *)obj_surface; src_surface.type = I965_SURFACE_TYPE_SURFACE; src_surface.flags = I965_SURFACE_FLAG_FRAME; @@ -5226,7 +5435,7 @@ i965_proc_picture(VADriverContextP ctx, tmp_surfaces[num_tmp_surfaces++] = out_surface_id; obj_surface = SURFACE(out_surface_id); assert(obj_surface); - i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420); dst_surface.base = (struct object_base *)obj_surface; dst_surface.type = I965_SURFACE_TYPE_SURFACE; @@ -5272,16 +5481,20 @@ i965_proc_picture(VADriverContextP ctx, dst_rect.height = in_height; } + proc_context->pp_context.pipeline_param = pipeline_param; + for (i = 0; i < pipeline_param->num_filters; i++) { struct object_buffer *obj_buffer = BUFFER(pipeline_param->filters[i]); VAProcFilterParameterBufferBase *filter_param = NULL; VAProcFilterType filter_type; int kernel_index; - assert(obj_buffer && obj_buffer->buffer_store); - - if (!obj_buffer || !obj_buffer->buffer_store) + if (!obj_buffer || + !obj_buffer->buffer_store || + !obj_buffer->buffer_store->buffer) { + status = VA_STATUS_ERROR_INVALID_FILTER_CHAIN; goto error; + } out_surface_id = VA_INVALID_ID; filter_param = (VAProcFilterParameterBufferBase *)obj_buffer->buffer_store->buffer; @@ -5300,7 +5513,7 @@ i965_proc_picture(VADriverContextP ctx, tmp_surfaces[num_tmp_surfaces++] = out_surface_id; obj_surface = SURFACE(out_surface_id); assert(obj_surface); - i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420); dst_surface.base = (struct object_base *)obj_surface; dst_surface.type = I965_SURFACE_TYPE_SURFACE; status = i965_post_processing_internal(ctx, &proc_context->pp_context, @@ -5319,14 +5532,16 @@ i965_proc_picture(VADriverContextP ctx, } } + proc_context->pp_context.pipeline_param = NULL; obj_surface = SURFACE(proc_state->current_render_target); - assert(obj_surface); - if (!obj_surface) + if (!obj_surface) { + status = VA_STATUS_ERROR_INVALID_SURFACE; goto error; + } int csc_needed = 0; - if (obj_surface->fourcc && obj_surface->fourcc != VA_FOURCC('N','V','1','2')){ + if (obj_surface->fourcc && obj_surface->fourcc != VA_FOURCC_NV12){ csc_needed = 1; out_surface_id = VA_INVALID_ID; status = i965_CreateSurfaces(ctx, @@ -5339,10 +5554,10 @@ i965_proc_picture(VADriverContextP ctx, tmp_surfaces[num_tmp_surfaces++] = out_surface_id; struct object_surface *csc_surface = SURFACE(out_surface_id); assert(csc_surface); - i965_check_alloc_surface_bo(ctx, csc_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, csc_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420); dst_surface.base = (struct object_base *)csc_surface; } else { - i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420); dst_surface.base = (struct object_base *)obj_surface; } @@ -5397,7 +5612,7 @@ error: tmp_surfaces, num_tmp_surfaces); - return VA_STATUS_ERROR_INVALID_PARAMETER; + return status; } static void @@ -5413,13 +5628,14 @@ i965_proc_context_destroy(void *hw_context) struct hw_context * i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config) { + struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_driver_data *intel = intel_driver_data(ctx); struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context)); proc_context->base.destroy = i965_proc_context_destroy; proc_context->base.run = i965_proc_picture; proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0); - i965_post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch); + i965->codec_info->post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch); return (struct hw_context *)proc_context; } diff --git a/src/i965_post_processing.h b/src/i965_post_processing.h index 66fcdef..f0a277e 100755 --- a/src/i965_post_processing.h +++ b/src/i965_post_processing.h @@ -51,6 +51,7 @@ enum PP_PL3_LOAD_SAVE_PA, PP_PA_LOAD_SAVE_NV12, PP_PA_LOAD_SAVE_PL3, + PP_PA_LOAD_SAVE_PA, PP_RGBX_LOAD_SAVE_NV12, PP_NV12_LOAD_SAVE_RGBX, NUM_PP_MODULES, @@ -93,12 +94,17 @@ struct pp_dndi_context { int dest_w; int dest_h; + dri_bo *stmm_bo; + int frame_order; /* -1 for the first frame */ + VASurfaceID current_out_surface; + struct object_surface *current_out_obj_surface; }; struct pp_dn_context { int dest_w; int dest_h; + dri_bo *stmm_bo; }; struct i965_post_processing_context; @@ -374,7 +380,7 @@ struct gen7_pp_static_parameter unsigned int di_destination_packed_y_component_offset:8; unsigned int di_destination_packed_u_component_offset:8; unsigned int di_destination_packed_v_component_offset:8; - unsigned int pad0:8; + unsigned int alpha:8; } grf2; struct { @@ -472,16 +478,21 @@ struct i965_post_processing_context } urb; struct { - dri_bo *bo; - } stmm; - - union { - struct pp_load_save_context pp_load_save_context; - struct pp_scaling_context pp_scaling_context; - struct pp_avs_context pp_avs_context; - struct pp_dndi_context pp_dndi_context; - struct pp_dn_context pp_dn_context; - } private_context; + unsigned int gpgpu_mode : 1; + unsigned int pad0 : 7; + unsigned int max_num_threads : 16; + unsigned int num_urb_entries : 8; + unsigned int urb_entry_size : 16; + unsigned int curbe_allocation_size : 16; + } vfe_gpu_state; + + struct pp_load_save_context pp_load_save_context; + struct pp_scaling_context pp_scaling_context; + struct pp_avs_context pp_avs_context; + struct pp_dndi_context pp_dndi_context; + struct pp_dn_context pp_dn_context; + void *private_context; /* pointer to the current private context */ + void *pipeline_param; /* pointer to the pipeline parameter */ int (*pp_x_steps)(void *private_context); int (*pp_y_steps)(void *private_context); @@ -492,6 +503,39 @@ struct i965_post_processing_context unsigned int block_horizontal_mask_left:16; unsigned int block_horizontal_mask_right:16; unsigned int block_vertical_mask_bottom:8; + + struct { + dri_bo *bo; + int bo_size; + unsigned int end_offset; + } instruction_state; + + struct { + dri_bo *bo; + } indirect_state; + + struct { + dri_bo *bo; + int bo_size; + unsigned int end_offset; + } dynamic_state; + + unsigned int sampler_offset; + int sampler_size; + unsigned int idrt_offset; + int idrt_size; + unsigned int curbe_offset; + int curbe_size; + + VAStatus (*intel_post_processing)(VADriverContextP ctx, + struct i965_post_processing_context *pp_context, + const struct i965_surface *src_surface, + const VARectangle *src_rect, + struct i965_surface *dst_surface, + const VARectangle *dst_rect, + int pp_index, + void * filter_param); + void (*finalize)(struct i965_post_processing_context *pp_context); }; struct i965_proc_context @@ -532,4 +576,11 @@ i965_post_processing_terminate(VADriverContextP ctx); bool i965_post_processing_init(VADriverContextP ctx); + +extern VAStatus +i965_proc_picture(VADriverContextP ctx, + VAProfile profile, + union codec_state *codec_state, + struct hw_context *hw_context); + #endif /* __I965_POST_PROCESSING_H__ */ diff --git a/src/i965_render.c b/src/i965_render.c index 21ec844..38f70eb 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -35,6 +35,7 @@ #include <stdlib.h> #include <string.h> #include <assert.h> +#include <math.h> #include <va/va_drmcommon.h> @@ -54,7 +55,7 @@ static const uint32_t sf_kernel_static[][4] = #include "shaders/render/exa_sf.g4b" }; -#define PS_KERNEL_NUM_GRF 32 +#define PS_KERNEL_NUM_GRF 48 #define PS_MAX_THREADS 32 #define I965_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) @@ -64,6 +65,7 @@ static const uint32_t ps_kernel_static[][4] = #include "shaders/render/exa_wm_xy.g4b" #include "shaders/render/exa_wm_src_affine.g4b" #include "shaders/render/exa_wm_src_sample_planar.g4b" +#include "shaders/render/exa_wm_yuv_color_balance.g4b" #include "shaders/render/exa_wm_yuv_rgb.g4b" #include "shaders/render/exa_wm_write.g4b" }; @@ -86,6 +88,7 @@ static const uint32_t ps_kernel_static_gen5[][4] = #include "shaders/render/exa_wm_xy.g4b.gen5" #include "shaders/render/exa_wm_src_affine.g4b.gen5" #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5" +#include "shaders/render/exa_wm_yuv_color_balance.g4b.gen5" #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5" #include "shaders/render/exa_wm_write.g4b.gen5" }; @@ -105,6 +108,7 @@ static const uint32_t sf_kernel_static_gen6[][4] = static const uint32_t ps_kernel_static_gen6[][4] = { #include "shaders/render/exa_wm_src_affine.g6b" #include "shaders/render/exa_wm_src_sample_planar.g6b" +#include "shaders/render/exa_wm_yuv_color_balance.g6b" #include "shaders/render/exa_wm_yuv_rgb.g6b" #include "shaders/render/exa_wm_write.g6b" }; @@ -123,6 +127,7 @@ static const uint32_t sf_kernel_static_gen7[][4] = static const uint32_t ps_kernel_static_gen7[][4] = { #include "shaders/render/exa_wm_src_affine.g7b" #include "shaders/render/exa_wm_src_sample_planar.g7b" +#include "shaders/render/exa_wm_yuv_color_balance.g7b" #include "shaders/render/exa_wm_yuv_rgb.g7b" #include "shaders/render/exa_wm_write.g7b" }; @@ -137,13 +142,14 @@ static const uint32_t ps_subpic_kernel_static_gen7[][4] = { static const uint32_t ps_kernel_static_gen7_haswell[][4] = { #include "shaders/render/exa_wm_src_affine.g7b" #include "shaders/render/exa_wm_src_sample_planar.g7b.haswell" +#include "shaders/render/exa_wm_yuv_color_balance.g7b.haswell" #include "shaders/render/exa_wm_yuv_rgb.g7b" #include "shaders/render/exa_wm_write.g7b" }; -#define SURFACE_STATE_PADDED_SIZE_I965 ALIGN(sizeof(struct i965_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32) -#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7) + +#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7) + #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) #define BINDING_TABLE_OFFSET SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES) @@ -302,8 +308,26 @@ static struct i965_kernel render_kernels_gen7_haswell[] = { #define URB_SF_ENTRIES 1 #define URB_SF_ENTRY_SIZE 2 -#define URB_CS_ENTRIES 1 -#define URB_CS_ENTRY_SIZE 1 +#define URB_CS_ENTRIES 4 +#define URB_CS_ENTRY_SIZE 4 + +static float yuv_to_rgb_bt601[3][4] = { +{1.164, 0, 1.596, -0.06275,}, +{1.164, -0.392, -0.813, -0.50196,}, +{1.164, 2.017, 0, -0.50196,}, +}; + +static float yuv_to_rgb_bt709[3][4] = { +{1.164, 0, 1.793, -0.06275,}, +{1.164, -0.213, -0.533, -0.50196,}, +{1.164, 2.112, 0, -0.50196,}, +}; + +static float yuv_to_rgb_smpte_240[3][4] = { +{1.164, 0, 1.794, -0.06275,}, +{1.164, -0.258, -0.5425, -0.50196,}, +{1.164, 2.078, 0, -0.50196,}, +}; static void i965_render_vs_unit(VADriverContextP ctx) @@ -317,7 +341,7 @@ i965_render_vs_unit(VADriverContextP ctx) vs_state = render_state->vs.state->virtual; memset(vs_state, 0, sizeof(*vs_state)); - if (IS_IRONLAKE(i965->intel.device_id)) + if (IS_IRONLAKE(i965->intel.device_info)) vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2; else vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES; @@ -431,7 +455,7 @@ i965_subpic_render_wm_unit(VADriverContextP ctx) wm_state->thread1.single_program_flow = 1; /* XXX */ - if (IS_IRONLAKE(i965->intel.device_id)) + if (IS_IRONLAKE(i965->intel.device_info)) wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */ else wm_state->thread1.binding_table_entry_count = 7; @@ -439,8 +463,8 @@ i965_subpic_render_wm_unit(VADriverContextP ctx) wm_state->thread2.scratch_space_base_pointer = 0; wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */ - wm_state->thread3.dispatch_grf_start_reg = 3; /* XXX */ - wm_state->thread3.const_urb_entry_read_length = 0; + wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */ + wm_state->thread3.const_urb_entry_read_length = 4; wm_state->thread3.const_urb_entry_read_offset = 0; wm_state->thread3.urb_entry_read_length = 1; /* XXX */ wm_state->thread3.urb_entry_read_offset = 0; /* XXX */ @@ -448,13 +472,13 @@ i965_subpic_render_wm_unit(VADriverContextP ctx) wm_state->wm4.stats_enable = 0; wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; - if (IS_IRONLAKE(i965->intel.device_id)) { + if (IS_IRONLAKE(i965->intel.device_info)) { wm_state->wm4.sampler_count = 0; /* hardware requirement */ } else { wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4; } - wm_state->wm5.max_threads = render_state->max_wm_threads - 1; + wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1; wm_state->wm5.thread_dispatch_enable = 1; wm_state->wm5.enable_16_pix = 1; wm_state->wm5.enable_8_pix = 0; @@ -495,7 +519,7 @@ i965_render_wm_unit(VADriverContextP ctx) wm_state->thread1.single_program_flow = 1; /* XXX */ - if (IS_IRONLAKE(i965->intel.device_id)) + if (IS_IRONLAKE(i965->intel.device_info)) wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */ else wm_state->thread1.binding_table_entry_count = 7; @@ -504,7 +528,7 @@ i965_render_wm_unit(VADriverContextP ctx) wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */ wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */ - wm_state->thread3.const_urb_entry_read_length = 1; + wm_state->thread3.const_urb_entry_read_length = 4; wm_state->thread3.const_urb_entry_read_offset = 0; wm_state->thread3.urb_entry_read_length = 1; /* XXX */ wm_state->thread3.urb_entry_read_offset = 0; /* XXX */ @@ -512,13 +536,13 @@ i965_render_wm_unit(VADriverContextP ctx) wm_state->wm4.stats_enable = 0; wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; - if (IS_IRONLAKE(i965->intel.device_id)) { + if (IS_IRONLAKE(i965->intel.device_info)) { wm_state->wm4.sampler_count = 0; /* hardware requirement */ } else { wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4; } - wm_state->wm5.max_threads = render_state->max_wm_threads - 1; + wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1; wm_state->wm5.thread_dispatch_enable = 1; wm_state->wm5.enable_16_pix = 1; wm_state->wm5.enable_8_pix = 0; @@ -779,6 +803,7 @@ gen7_render_set_surface_state( gen7_render_set_surface_tiling(ss, tiling); } + static void i965_render_src_surface_state( VADriverContextP ctx, @@ -803,12 +828,12 @@ i965_render_src_surface_state( assert(ss_bo->virtual); ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index); - if (IS_GEN7(i965->intel.device_id)) { + if (IS_GEN7(i965->intel.device_info)) { gen7_render_set_surface_state(ss, region, offset, w, h, pitch, format, flags); - if (IS_HASWELL(i965->intel.device_id)) + if (IS_HASWELL(i965->intel.device_info)) gen7_render_set_surface_scs(ss); dri_bo_emit_reloc(ss_bo, I915_GEM_DOMAIN_SAMPLER, 0, @@ -851,7 +876,10 @@ i965_render_src_surfaces_state( i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags); /* Y */ i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags); - if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) { + if (obj_surface->fourcc == VA_FOURCC_Y800) /* single plane for grayscale */ + return; + + if (obj_surface->fourcc == VA_FOURCC_NV12) { i965_render_src_surface_state(ctx, 3, region, region_pitch * obj_surface->y_cb_offset, obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch, @@ -918,12 +946,12 @@ i965_render_dest_surface_state(VADriverContextP ctx, int index) assert(ss_bo->virtual); ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index); - if (IS_GEN7(i965->intel.device_id)) { + if (IS_GEN7(i965->intel.device_info)) { gen7_render_set_surface_state(ss, dest_region->bo, 0, dest_region->width, dest_region->height, dest_region->pitch, format, 0); - if (IS_HASWELL(i965->intel.device_id)) + if (IS_HASWELL(i965->intel.device_info)) gen7_render_set_surface_scs(ss); dri_bo_emit_reloc(ss_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, @@ -1050,29 +1078,62 @@ i965_render_upload_vertex( i965_fill_vertex_buffer(ctx, tex_coords, vid_coords); } +#define PI 3.1415926 + static void i965_render_upload_constants(VADriverContextP ctx, - struct object_surface *obj_surface) + struct object_surface *obj_surface, + unsigned int flags) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; unsigned short *constant_buffer; + float *color_balance_base; + float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST; + float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */ + float hue = (float)i965->hue_attrib->value / 180 * PI; + float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION; + float *yuv_to_rgb; + unsigned int color_flag; dri_bo_map(render_state->curbe.bo, 1); assert(render_state->curbe.bo->virtual); constant_buffer = render_state->curbe.bo->virtual; if (obj_surface->subsampling == SUBSAMPLE_YUV400) { - assert(obj_surface->fourcc == VA_FOURCC('Y', '8', '0', '0')); + assert(obj_surface->fourcc == VA_FOURCC_Y800); - *constant_buffer = 2; + constant_buffer[0] = 2; } else { - if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) - *constant_buffer = 1; + if (obj_surface->fourcc == VA_FOURCC_NV12) + constant_buffer[0] = 1; else - *constant_buffer = 0; + constant_buffer[0] = 0; } + if (i965->contrast_attrib->value == DEFAULT_CONTRAST && + i965->brightness_attrib->value == DEFAULT_BRIGHTNESS && + i965->hue_attrib->value == DEFAULT_HUE && + i965->saturation_attrib->value == DEFAULT_SATURATION) + constant_buffer[1] = 1; /* skip color balance transformation */ + else + constant_buffer[1] = 0; + + color_balance_base = (float *)constant_buffer + 4; + *color_balance_base++ = contrast; + *color_balance_base++ = brightness; + *color_balance_base++ = cos(hue) * contrast * saturation; + *color_balance_base++ = sin(hue) * contrast * saturation; + + color_flag = flags & VA_SRC_COLOR_MASK; + yuv_to_rgb = (float *)constant_buffer + 8; + if (color_flag == VA_SRC_BT709) + memcpy(yuv_to_rgb, yuv_to_rgb_bt709, sizeof(yuv_to_rgb_bt709)); + else if (color_flag == VA_SRC_SMPTE_240) + memcpy(yuv_to_rgb, yuv_to_rgb_smpte_240, sizeof(yuv_to_rgb_smpte_240)); + else + memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601)); + dri_bo_unmap(render_state->curbe.bo); } @@ -1118,7 +1179,7 @@ i965_surface_render_state_setup( i965_render_cc_viewport(ctx); i965_render_cc_unit(ctx); i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect); - i965_render_upload_constants(ctx, obj_surface); + i965_render_upload_constants(ctx, obj_surface, flags); } static void @@ -1172,7 +1233,7 @@ i965_render_state_base_address(VADriverContextP ctx) struct intel_batchbuffer *batch = i965->batch; struct i965_render_state *render_state = &i965->render_state; - if (IS_IRONLAKE(i965->intel.device_id)) { + if (IS_IRONLAKE(i965->intel.device_info)) { BEGIN_BATCH(batch, 8); OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6); OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); @@ -1336,7 +1397,7 @@ i965_render_vertex_elements(VADriverContextP ctx) struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch = i965->batch; - if (IS_IRONLAKE(i965->intel.device_id)) { + if (IS_IRONLAKE(i965->intel.device_info)) { BEGIN_BATCH(batch, 5); OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3); /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */ @@ -1428,7 +1489,7 @@ i965_render_startup(VADriverContextP ctx) ((4 * 4) << VB0_BUFFER_PITCH_SHIFT)); OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0); - if (IS_IRONLAKE(i965->intel.device_id)) + if (IS_IRONLAKE(i965->intel.device_info)) OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4); else OUT_BATCH(batch, 3); @@ -1478,8 +1539,8 @@ i965_clear_dest_region(VADriverContextP ctx) br13 |= pitch; - if (IS_GEN6(i965->intel.device_id) || - IS_GEN7(i965->intel.device_id)) { + if (IS_GEN6(i965->intel.device_info) || + IS_GEN7(i965->intel.device_info)) { intel_batchbuffer_start_atomic_blt(batch, 24); BEGIN_BLT_BATCH(batch, 6); } else { @@ -1540,6 +1601,7 @@ i965_subpic_render_pipeline_setup(VADriverContextP ctx) i965_render_pipelined_pointers(ctx); i965_render_urb_layout(ctx); i965_render_cs_urb_layout(ctx); + i965_render_constant_buffer(ctx); i965_render_drawing_rectangle(ctx); i965_render_vertex_elements(ctx); i965_render_startup(ctx); @@ -1805,7 +1867,7 @@ gen6_render_setup_states( gen6_render_color_calc_state(ctx); gen6_render_blend_state(ctx); gen6_render_depth_stencil_state(ctx); - i965_render_upload_constants(ctx, obj_surface); + i965_render_upload_constants(ctx, obj_surface, flags); i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect); } @@ -2044,7 +2106,7 @@ gen6_emit_wm_state(VADriverContextP ctx, int kernel) OUT_RELOC(batch, render_state->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, - 0); + (URB_CS_ENTRY_SIZE-1)); OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); @@ -2057,7 +2119,7 @@ gen6_emit_wm_state(VADriverContextP ctx, int kernel) (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT)); OUT_BATCH(batch, 0); OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */ - OUT_BATCH(batch, ((render_state->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) | + OUT_BATCH(batch, ((i965->intel.device_info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) | GEN6_3DSTATE_WM_DISPATCH_ENABLE | GEN6_3DSTATE_WM_16_DISPATCH_ENABLE); OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) | @@ -2307,6 +2369,11 @@ gen7_render_initialize(VADriverContextP ctx) render_state->cc.depth_stencil = bo; } +/* + * for GEN8 + */ +#define ALIGNMENT 64 + static void gen7_render_color_calc_state(VADriverContextP ctx) { @@ -2383,6 +2450,7 @@ gen7_render_sampler(VADriverContextP ctx) dri_bo_unmap(render_state->wm.sampler); } + static void gen7_render_setup_states( VADriverContextP ctx, @@ -2399,10 +2467,11 @@ gen7_render_setup_states( gen7_render_color_calc_state(ctx); gen7_render_blend_state(ctx); gen7_render_depth_stencil_state(ctx); - i965_render_upload_constants(ctx, obj_surface); + i965_render_upload_constants(ctx, obj_surface, flags); i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect); } + static void gen7_emit_invarient_states(VADriverContextP ctx) { @@ -2486,7 +2555,7 @@ gen7_emit_urb(VADriverContextP ctx) struct intel_batchbuffer *batch = i965->batch; unsigned int num_urb_entries = 32; - if (IS_HASWELL(i965->intel.device_id)) + if (IS_HASWELL(i965->intel.device_info)) num_urb_entries = 64; BEGIN_BATCH(batch, 2); @@ -2795,7 +2864,7 @@ gen7_emit_wm_state(VADriverContextP ctx, int kernel) unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB; unsigned int num_samples = 0; - if (IS_HASWELL(i965->intel.device_id)) { + if (IS_HASWELL(i965->intel.device_info)) { max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW; num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW; } @@ -2810,7 +2879,7 @@ gen7_emit_wm_state(VADriverContextP ctx, int kernel) BEGIN_BATCH(batch, 7); OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2)); - OUT_BATCH(batch, 1); + OUT_BATCH(batch, URB_CS_ENTRY_SIZE); OUT_BATCH(batch, 0); OUT_RELOC(batch, render_state->curbe.bo, @@ -2832,7 +2901,7 @@ gen7_emit_wm_state(VADriverContextP ctx, int kernel) (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); OUT_BATCH(batch, 0); /* scratch space base offset */ OUT_BATCH(batch, - ((render_state->max_wm_threads - 1) << max_threads_shift) | num_samples | + ((i965->intel.device_info->max_wm_threads - 1) << max_threads_shift) | num_samples | GEN7_PS_PUSH_CONSTANT_ENABLE | GEN7_PS_ATTRIBUTE_ENABLE | GEN7_PS_16_DISPATCH_ENABLE); @@ -2930,6 +2999,7 @@ gen7_render_emit_states(VADriverContextP ctx, int kernel) intel_batchbuffer_end_atomic(batch); } + static void gen7_render_put_surface( VADriverContextP ctx, @@ -2949,6 +3019,7 @@ gen7_render_put_surface( intel_batchbuffer_flush(batch); } + static void gen7_subpicture_render_blend_state(VADriverContextP ctx) { @@ -3012,13 +3083,6 @@ gen7_render_put_subpicture( } -/* - * global functions - */ -VAStatus -i965_DestroySurfaces(VADriverContextP ctx, - VASurfaceID *surface_list, - int num_surfaces); void intel_render_put_surface( VADriverContextP ctx, @@ -3029,6 +3093,7 @@ intel_render_put_surface( ) { struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; int has_done_scaling = 0; VASurfaceID out_surface_id = i965_post_processing(ctx, obj_surface, @@ -3049,12 +3114,7 @@ intel_render_put_surface( src_rect = dst_rect; } - if (IS_GEN7(i965->intel.device_id)) - gen7_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags); - else if (IS_GEN6(i965->intel.device_id)) - gen6_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags); - else - i965_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags); + render_state->render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags); if (out_surface_id != VA_INVALID_ID) i965_DestroySurfaces(ctx, &out_surface_id, 1); @@ -3069,17 +3129,57 @@ intel_render_put_subpicture( ) { struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; - if (IS_GEN7(i965->intel.device_id)) - gen7_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect); - else if (IS_GEN6(i965->intel.device_id)) - gen6_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect); - else - i965_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect); + render_state->render_put_subpicture(ctx, obj_surface, src_rect, dst_rect); +} + +static void +genx_render_terminate(VADriverContextP ctx) +{ + int i; + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; + + dri_bo_unreference(render_state->curbe.bo); + render_state->curbe.bo = NULL; + + for (i = 0; i < NUM_RENDER_KERNEL; i++) { + struct i965_kernel *kernel = &render_state->render_kernels[i]; + + dri_bo_unreference(kernel->bo); + kernel->bo = NULL; + } + + dri_bo_unreference(render_state->vb.vertex_buffer); + render_state->vb.vertex_buffer = NULL; + dri_bo_unreference(render_state->vs.state); + render_state->vs.state = NULL; + dri_bo_unreference(render_state->sf.state); + render_state->sf.state = NULL; + dri_bo_unreference(render_state->wm.sampler); + render_state->wm.sampler = NULL; + dri_bo_unreference(render_state->wm.state); + render_state->wm.state = NULL; + dri_bo_unreference(render_state->wm.surface_state_binding_table_bo); + dri_bo_unreference(render_state->cc.viewport); + render_state->cc.viewport = NULL; + dri_bo_unreference(render_state->cc.state); + render_state->cc.state = NULL; + dri_bo_unreference(render_state->cc.blend); + render_state->cc.blend = NULL; + dri_bo_unreference(render_state->cc.depth_stencil); + render_state->cc.depth_stencil = NULL; + + if (render_state->draw_region) { + dri_bo_unreference(render_state->draw_region->bo); + free(render_state->draw_region); + render_state->draw_region = NULL; + } } bool -i965_render_init(VADriverContextP ctx) +genx_render_init(VADriverContextP ctx) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct i965_render_state *render_state = &i965->render_state; @@ -3091,16 +3191,27 @@ i965_render_init(VADriverContextP ctx) assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / sizeof(render_kernels_gen6[0]))); - if (IS_GEN7(i965->intel.device_id)) + if (IS_GEN7(i965->intel.device_info)) { memcpy(render_state->render_kernels, - (IS_HASWELL(i965->intel.device_id) ? render_kernels_gen7_haswell : render_kernels_gen7), + (IS_HASWELL(i965->intel.device_info) ? render_kernels_gen7_haswell : render_kernels_gen7), sizeof(render_state->render_kernels)); - else if (IS_GEN6(i965->intel.device_id)) + render_state->render_put_surface = gen7_render_put_surface; + render_state->render_put_subpicture = gen7_render_put_subpicture; + } else if (IS_GEN6(i965->intel.device_info)) { memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels)); - else if (IS_IRONLAKE(i965->intel.device_id)) + render_state->render_put_surface = gen6_render_put_surface; + render_state->render_put_subpicture = gen6_render_put_subpicture; + } else if (IS_IRONLAKE(i965->intel.device_info)) { memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels)); - else + render_state->render_put_surface = i965_render_put_surface; + render_state->render_put_subpicture = i965_render_put_subpicture; + } else { memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels)); + render_state->render_put_surface = i965_render_put_surface; + render_state->render_put_subpicture = i965_render_put_subpicture; + } + + render_state->render_terminate = genx_render_terminate; for (i = 0; i < NUM_RENDER_KERNEL; i++) { struct i965_kernel *kernel = &render_state->render_kernels[i]; @@ -3121,73 +3232,22 @@ i965_render_init(VADriverContextP ctx) 4096, 64); assert(render_state->curbe.bo); - if (IS_HSW_GT1(i965->intel.device_id)) { - render_state->max_wm_threads = 102; - } else if (IS_HSW_GT2(i965->intel.device_id)) { - render_state->max_wm_threads = 204; - } else if (IS_HSW_GT3(i965->intel.device_id)) { - render_state->max_wm_threads = 408; - } else if (IS_IVB_GT1(i965->intel.device_id)) { - render_state->max_wm_threads = 48; - } else if (IS_IVB_GT2(i965->intel.device_id)) { - render_state->max_wm_threads = 172; - } else if (IS_SNB_GT1(i965->intel.device_id)) { - render_state->max_wm_threads = 40; - } else if (IS_SNB_GT2(i965->intel.device_id)) { - render_state->max_wm_threads = 80; - } else if (IS_IRONLAKE(i965->intel.device_id)) { - render_state->max_wm_threads = 72; /* 12 * 6 */ - } else if (IS_G4X(i965->intel.device_id)) { - render_state->max_wm_threads = 50; /* 12 * 5 */ - } else { - /* should never get here !!! */ - assert(0); - } - return true; } -void -i965_render_terminate(VADriverContextP ctx) +bool +i965_render_init(VADriverContextP ctx) { - int i; struct i965_driver_data *i965 = i965_driver_data(ctx); - struct i965_render_state *render_state = &i965->render_state; - - dri_bo_unreference(render_state->curbe.bo); - render_state->curbe.bo = NULL; - for (i = 0; i < NUM_RENDER_KERNEL; i++) { - struct i965_kernel *kernel = &render_state->render_kernels[i]; - - dri_bo_unreference(kernel->bo); - kernel->bo = NULL; - } + return i965->codec_info->render_init(ctx); +} - dri_bo_unreference(render_state->vb.vertex_buffer); - render_state->vb.vertex_buffer = NULL; - dri_bo_unreference(render_state->vs.state); - render_state->vs.state = NULL; - dri_bo_unreference(render_state->sf.state); - render_state->sf.state = NULL; - dri_bo_unreference(render_state->wm.sampler); - render_state->wm.sampler = NULL; - dri_bo_unreference(render_state->wm.state); - render_state->wm.state = NULL; - dri_bo_unreference(render_state->wm.surface_state_binding_table_bo); - dri_bo_unreference(render_state->cc.viewport); - render_state->cc.viewport = NULL; - dri_bo_unreference(render_state->cc.state); - render_state->cc.state = NULL; - dri_bo_unreference(render_state->cc.blend); - render_state->cc.blend = NULL; - dri_bo_unreference(render_state->cc.depth_stencil); - render_state->cc.depth_stencil = NULL; +void +i965_render_terminate(VADriverContextP ctx) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_render_state *render_state = &i965->render_state; - if (render_state->draw_region) { - dri_bo_unreference(render_state->draw_region->bo); - free(render_state->draw_region); - render_state->draw_region = NULL; - } + render_state->render_terminate(ctx); } - diff --git a/src/i965_render.h b/src/i965_render.h index f09b535..fde398b 100644 --- a/src/i965_render.h +++ b/src/i965_render.h @@ -33,6 +33,8 @@ #define NUM_RENDER_KERNEL 3 +#define VA_SRC_COLOR_MASK 0x000000f0 + #include "i965_post_processing.h" struct i965_kernel; @@ -77,7 +79,51 @@ struct i965_render_state struct i965_kernel render_kernels[3]; - int max_wm_threads; + struct { + dri_bo *bo; + int bo_size; + unsigned int end_offset; + } instruction_state; + + struct { + dri_bo *bo; + } indirect_state; + + struct { + dri_bo *bo; + int bo_size; + unsigned int end_offset; + } dynamic_state; + + unsigned int curbe_offset; + int curbe_size; + + unsigned int sampler_offset; + int sampler_size; + + unsigned int cc_viewport_offset; + int cc_viewport_size; + + unsigned int cc_state_offset; + int cc_state_size; + + unsigned int blend_state_offset; + int blend_state_size; + + unsigned int sf_clip_offset; + int sf_clip_size; + + unsigned int scissor_offset; + int scissor_size; + + void (*render_put_surface)(VADriverContextP ctx, struct object_surface *, + const VARectangle *src_rec, + const VARectangle *dst_rect, + unsigned int flags); + void (*render_put_subpicture)(VADriverContextP ctx, struct object_surface *, + const VARectangle *src_rec, + const VARectangle *dst_rect); + void (*render_terminate)(VADriverContextP ctx); }; bool i965_render_init(VADriverContextP ctx); @@ -105,4 +151,10 @@ struct gen7_surface_state; void gen7_render_set_surface_scs(struct gen7_surface_state *ss); +struct gen8_surface_state; +void +gen8_render_set_surface_scs(struct gen8_surface_state *ss); + +extern bool gen8_render_init(VADriverContextP ctx); + #endif /* _I965_RENDER_H_ */ diff --git a/src/i965_structs.h b/src/i965_structs.h index c7dd272..682dc2d 100644 --- a/src/i965_structs.h +++ b/src/i965_structs.h @@ -968,6 +968,364 @@ struct i965_sampler_dndi } dw7; }; +struct gen8_interface_descriptor_data +{ + struct { + unsigned int pad0:6; + unsigned int kernel_start_pointer:26; + } desc0; + + struct { + unsigned int kernel_start_pointer_high:16; + unsigned int pad0:16; + } desc1; + + struct { + unsigned int pad0:7; + unsigned int software_exception_enable:1; + unsigned int pad1:3; + unsigned int maskstack_exception_enable:1; + unsigned int pad2:1; + unsigned int illegal_opcode_exception_enable:1; + unsigned int pad3:2; + unsigned int floating_point_mode:1; + unsigned int thread_priority:1; + unsigned int single_program_flow:1; + unsigned int denorm_mode:1; + unsigned int pad4:12; + } desc2; + + struct { + unsigned int pad0:2; + unsigned int sampler_count:3; + unsigned int sampler_state_pointer:27; + } desc3; + + struct { + unsigned int binding_table_entry_count:5; + unsigned int binding_table_pointer:11; + unsigned int pad0: 16; + } desc4; + + struct { + unsigned int constant_urb_entry_read_offset:16; + unsigned int constant_urb_entry_read_length:16; + } desc5; + + struct { + unsigned int num_threads_in_tg:10; + unsigned int pad0:5; + unsigned int global_barrier_enable:1; + unsigned int shared_local_memory_size:5; + unsigned int barrier_enable:1; + unsigned int rounding_mode:2; + unsigned int pad1:8; + } desc6; + + struct { + unsigned int cross_thread_constant_data_read_length:8; + unsigned int pad0:24; + } desc7; +}; + +struct gen8_surface_state +{ + struct { + unsigned int cube_pos_z:1; + unsigned int cube_neg_z:1; + unsigned int cube_pos_y:1; + unsigned int cube_neg_y:1; + unsigned int cube_pos_x:1; + unsigned int cube_neg_x:1; + unsigned int media_boundary_pixel_mode:2; + unsigned int render_cache_read_write:1; + unsigned int sampler_l2bypass_disable:1; + unsigned int vert_line_stride_ofs:1; + unsigned int vert_line_stride:1; + unsigned int tile_walk:1; + unsigned int tiled_surface:1; + unsigned int horizontal_alignment:2; + /* Field 16 */ + unsigned int vertical_alignment:2; + unsigned int surface_format:9; /**< BRW_SURFACEFORMAT_x */ + unsigned int pad0:1; + unsigned int is_array:1; + unsigned int surface_type:3; /**< BRW_SURFACE_1D/2D/3D/CUBE */ + } ss0; + + struct { + unsigned int surface_qpitch:15; + unsigned int pad0:4; + unsigned int base_mip_level:5; + unsigned int surface_mocs:7; + unsigned int pad1:1; + } ss1; + + struct { + unsigned int width:14; + unsigned int pad0:2; + unsigned int height:14; + unsigned int pad1:2; + } ss2; + + struct { + unsigned int pitch:18; + unsigned int pad:3; + unsigned int depth:11; + } ss3; + + struct { + unsigned int multisample_position_palette_index:3; + unsigned int num_multisamples:3; + unsigned int multisampled_surface_storage_format:1; + unsigned int render_target_view_extent:11; + unsigned int min_array_elt:11; + unsigned int rotation:2; + unsigned int force_ncmp_reduce_type:1; + } ss4; + + struct { + unsigned int mip_count:4; + unsigned int min_lod:4; + unsigned int pad0:4; + unsigned int pad1:2; + unsigned int coherence_type:1; + unsigned int pad2:3; + unsigned int pad3:2; + unsigned int ewa_disable_cube:1; + unsigned int y_offset:3; + unsigned int pad4:1; + unsigned int x_offset:7; + } ss5; + + struct { + unsigned int y_offset_uv_plane:14; + unsigned int pad0:2; + unsigned int x_offset_uv_plane:14; + unsigned int pad1:1; + unsigned int separate_uv_plane:1; + } ss6; + + struct { + unsigned int resource_min_lod:12; + unsigned int pad0:4; + unsigned int shader_chanel_select_a:3; + unsigned int shader_chanel_select_b:3; + unsigned int shader_chanel_select_g:3; + unsigned int shader_chanel_select_r:3; + unsigned int alpha_clear_color:1; + unsigned int blue_clear_color:1; + unsigned int green_clear_color:1; + unsigned int red_clear_color:1; + } ss7; + struct { + unsigned int base_addr; + } ss8; + + struct { + unsigned int base_addr_high:16; + unsigned int pad0:16; + } ss9; + + struct { + unsigned int pad0:12; + unsigned int aux_base_addr:20; + } ss10; + + union { + struct { + unsigned int y_offset_v_plane:14; + unsigned int pad0:2; + unsigned int x_offset_v_plane:14; + unsigned int pad1:2; + } planar; + struct { + unsigned int aux_base_addr_high:16; + unsigned int pad2:16; + } aux_buffer; + } ss11; + + struct { + unsigned int hier_depth_clear; + } ss12; + + struct { + unsigned int pad0; + } ss13; + + struct { + unsigned int pad0; + } ss14; + + struct { + unsigned int pad0; + } ss15; +}; + +struct gen8_surface_state2 +{ + struct { + unsigned int pad0; + } ss0; + + struct { + unsigned int cbcr_pixel_offset_v_direction:2; + unsigned int picture_structure:2; + unsigned int width:14; + unsigned int height:14; + } ss1; + + struct { + unsigned int tile_walk:1; + unsigned int tiled_surface:1; + unsigned int half_pitch_for_chroma:1; + unsigned int pitch:18; + unsigned int address_ctrl:1; /* clamp or mirror mode */ + unsigned int pad0:4; + unsigned int interleave_chroma:1; + unsigned int surface_format:5; + } ss2; + + struct { + unsigned int y_offset_for_cb:14; + unsigned int pad0:2; + unsigned int x_offset_for_cb:14; + unsigned int pad1:2; + } ss3; + + struct { + unsigned int y_offset_for_cr:15; + unsigned int pad0:1; + unsigned int x_offset_for_cr:14; + unsigned int pad1:2; + } ss4; + + struct { + unsigned int surface_object_mocs:7; + unsigned int pad0:11; + unsigned int pad1:2; + unsigned int pad2:10; + unsigned int vert_line_stride_offset:1; + unsigned int vert_line_stride:1; + } ss5; + + struct { + unsigned int base_addr; + } ss6; + + struct { + unsigned int base_addr_high:16; + unsigned int pad0:16; + } ss7; +}; + +struct gen8_sampler_state +{ + struct + { + unsigned int aniso_algorithm:1; + unsigned int lod_bias:13; + unsigned int min_filter:3; + unsigned int mag_filter:3; + unsigned int mip_filter:2; + unsigned int base_level:5; + unsigned int lod_preclamp:2; + unsigned int default_color_mode:1; + unsigned int pad0:1; + unsigned int disable:1; + } ss0; + + struct + { + unsigned int cube_control_mode:1; + unsigned int shadow_function:3; + unsigned int chroma_key_mode:1; + unsigned int chroma_key_index:2; + unsigned int chroma_key_enable:1; + unsigned int max_lod:12; + unsigned int min_lod:12; + } ss1; + + struct + { + unsigned int lod_clamp_mag_mode:1; /* MIPNONE or MIPFILTER */ + unsigned int flex_filter_vert_align:1; + unsigned int flex_filter_hort_align:1; + unsigned int flex_filter_coff_size:1; /* coff8 or coff 16 */ + unsigned int flex_filter_mode:1; + unsigned int pad0:1; + unsigned int indirect_state_pointer:18; /* point to the SAMPLE_INDIRECT_STATE */ + union { + unsigned char nonsep_filter_footer_highmask; + struct { + unsigned char pad1:2; + unsigned char sep_filter_height:2; + unsigned char sep_filter_width:2; + unsigned char sep_filter_coff_size:2; + } sep_filter; + } ss2_byte3; + } ss2; + + struct + { + unsigned int r_wrap_mode:3; + unsigned int t_wrap_mode:3; + unsigned int s_wrap_mode:3; + unsigned int pad0:1; + unsigned int non_normalized_coord:1; + unsigned int trilinear_quality:2; + unsigned int address_round:6; + unsigned int max_aniso:3; + unsigned int pad1:2; + unsigned int nonsep_filter_foot_lowmask:8; + } ss3; +}; + +struct gen8_global_blend_state +{ + unsigned int pad0:19; + unsigned int ydither_offset:2; + unsigned int xdither_offset:2; + unsigned int color_dither_enable:1; + unsigned int alpha_test_func:3; + unsigned int alpha_test_enable:1; + unsigned int alpha_to_coverage_dither:1; + unsigned int alpha_to_one:1; + unsigned int ia_blend_enable:1; + unsigned int alpha_to_coverage:1; +}; + +struct gen8_blend_state_rt { + struct { + unsigned int blue_write_dis:1; + unsigned int green_write_dis:1; + unsigned int red_write_dis:1; + unsigned int alpha_write_dis:1; + unsigned int pad0:1; + unsigned int alpha_blend_func:3; + unsigned int ia_dest_blend_factor:5; + unsigned int ia_src_blend_factor:5; + unsigned int color_blend_func:3; + unsigned int dest_blend_factor:5; + unsigned int src_blend_factor:5; + unsigned int colorbuf_blend:1; + } blend0; + + struct { + unsigned int post_blend_clamp_enable:1; + unsigned int pre_blend_clamp_enable:1; + unsigned int clamp_range:2; + unsigned int pre_blend_src_clamp:1; + unsigned int pad0:22; + unsigned int logic_op_func:4; + unsigned int logic_op_enable:1; + } blend1; +}; + +/* TODO: Add the sampler_8x8 for Gen8+. + * AVS/Convolve is 256DWs. + * MinMaxfilter/Erode/Dilate: 8DWs*/ + struct gen6_blend_state { @@ -1356,6 +1714,7 @@ struct gen7_sampler_8x8 } dw3; }; +/* This can also be used for BDW+ */ struct gen7_sampler_dndi { struct { @@ -1441,4 +1800,161 @@ struct gen7_sampler_dndi } dw7; }; +struct gen8_sampler_8x8_avs { + struct { + unsigned int gain_factor:6; + unsigned int weak_edge_threshold:6; + unsigned int strong_edge_threshold:6; + unsigned int r3x_coefficient:5; + unsigned int r3c_coefficient:5; + unsigned int chroma_key_index:2; + unsigned int chroma_key_enable:1; + unsigned int pad1:1; + } dw0; + + struct { + unsigned int pad0; + } dw1; + + struct { + unsigned int global_noise_estimation:8; + unsigned int non_edge_weight:3; + unsigned int regular_weight:3; + unsigned int strong_edge_weight:3; + unsigned int r5x_coefficient:5; + unsigned int r5cx_coefficient:5; + unsigned int r5c_coefficient:5; + } dw2; + + struct { + unsigned int sin_alpha:8; /* S0.7 */ + unsigned int cos_alpha:8; /* S0.7 */ + unsigned int sat_max:6; + unsigned int hue_max:6; + unsigned int enable_8tap_filter:2; + unsigned int ief4_smooth_enable:1; + unsigned int skin_ief_enable:1; + } dw3; + + struct { + unsigned int s3u:11; /* S2.8 */ + unsigned int pad0:1; + unsigned int diamond_margin:3; + unsigned int vy_std_enable:1; + unsigned int umid:8; + unsigned int vmid:8; + } dw4; + + struct { + unsigned int diamond_dv:7; + unsigned int diamond_th:6; + unsigned int diamond_alpha:8; + unsigned int hs_margin:3; + unsigned int diamond_du:7; + unsigned int skin_detailfilter:1; + } dw5; + + struct { + unsigned int y_point1:8; + unsigned int y_point2:8; + unsigned int y_point3:8; + unsigned int y_point4:8; + } dw6; + + struct { + unsigned int inv_margin_vyl:16; + unsigned int pad0:16; + } dw7; + + struct { + unsigned int inv_margin_vyu:16; + unsigned int p0l:8; + unsigned int p1l:8; + } dw8; + + struct { + unsigned int p2l:8; + unsigned int p3l:8; + unsigned int b0l:8; + unsigned int b1l:8; + } dw9; + + struct { + unsigned int b2l:8; + unsigned int b3l:8; + unsigned int s0l:11; + unsigned int y_slope2:5; + } dw10; + + struct { + unsigned int s1l:11; + unsigned int s2l:11; + unsigned int pad0:10; + } dw11; + + struct { + unsigned int s3l:11; + unsigned int p0u:8; + unsigned int p1u:8; + unsigned int y_slope1:5; + } dw12; + + struct { + unsigned int p2u:8; + unsigned int p3u:8; + unsigned int b0u:8; + unsigned int b1u:8; + } dw13; + + struct { + unsigned int b2u:8; + unsigned int b3u:8; + unsigned int s0u:11; + unsigned int pad0:5; + } dw14; + + struct { + unsigned int s1u:11; + unsigned int s2u:11; + unsigned int pad0:10; + } dw15; + + /* DW16-DW151 */ + struct i965_sampler_8x8_coefficient coefficients[17]; + + struct { + unsigned int transition_area_with_8_pixels:3; + unsigned int pad0:1; + unsigned int transition_area_with_4_pixels:3; + unsigned int pad1:1; + unsigned int max_derivative_8_pixels:8; + unsigned int max_derivative_4_pixels:8; + unsigned int default_sharpness_level:8; + } dw152; + + struct { + unsigned int rgb_adaptive:1; + unsigned int adaptive_filter_for_all_channel:1; + unsigned int pad0:19; + unsigned int bypass_y_adaptive_filtering:1; + unsigned int bypass_x_adaptive_filtering:1; + unsigned int pad1:9; + } dw153; + + /* Reserved to 256DW */ + unsigned int reserved[102]; +}; + +#define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32) +#define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32) +#define SURFACE_STATE_PADDED_SIZE_GEN7 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7) + +#define SURFACE_STATE_PADDED_SIZE_0_GEN6 ALIGN(sizeof(struct i965_surface_state), 32) +#define SURFACE_STATE_PADDED_SIZE_1_GEN6 ALIGN(sizeof(struct i965_surface_state2), 32) +#define SURFACE_STATE_PADDED_SIZE_GEN6 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN6, SURFACE_STATE_PADDED_SIZE_1_GEN6) + +#define SURFACE_STATE_PADDED_SIZE_0_GEN8 ALIGN(sizeof(struct gen8_surface_state), 32) +#define SURFACE_STATE_PADDED_SIZE_1_GEN8 ALIGN(sizeof(struct gen8_surface_state2), 32) +#define SURFACE_STATE_PADDED_SIZE_GEN8 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN8, SURFACE_STATE_PADDED_SIZE_1_GEN8) + #endif /* _I965_STRUCTS_H_ */ diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c index 94d968c..c6d3769 100644 --- a/src/intel_batchbuffer.c +++ b/src/intel_batchbuffer.c @@ -86,6 +86,16 @@ intel_batchbuffer_new(struct intel_driver_data *intel, int flag, int buffer_size batch->intel = intel; batch->flag = flag; batch->run = drm_intel_bo_mrb_exec; + + if (IS_GEN6(intel->device_info) && + flag == I915_EXEC_RENDER) + batch->wa_render_bo = dri_bo_alloc(intel->bufmgr, + "wa scratch", + 4096, + 4096); + else + batch->wa_render_bo = NULL; + intel_batchbuffer_reset(batch, buffer_size); return batch; @@ -99,6 +109,7 @@ void intel_batchbuffer_free(struct intel_batchbuffer *batch) } dri_bo_unreference(batch->buffer); + dri_bo_unreference(batch->wa_render_bo); free(batch); } @@ -172,27 +183,69 @@ intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch) { struct intel_driver_data *intel = batch->intel; - if (IS_GEN6(intel->device_id) || - IS_GEN7(intel->device_id)) { + if (IS_GEN6(intel->device_info) || + IS_GEN7(intel->device_info) || + IS_GEN8(intel->device_info)) { if (batch->flag == I915_EXEC_RENDER) { - BEGIN_BATCH(batch, 4); - OUT_BATCH(batch, CMD_PIPE_CONTROL | 0x2); + if (IS_GEN8(intel->device_info)) { + BEGIN_BATCH(batch, 6); + OUT_BATCH(batch, CMD_PIPE_CONTROL | (6 - 2)); - if (IS_GEN6(intel->device_id)) - OUT_BATCH(batch, + OUT_BATCH(batch, + CMD_PIPE_CONTROL_CS_STALL | + CMD_PIPE_CONTROL_WC_FLUSH | + CMD_PIPE_CONTROL_TC_FLUSH | + CMD_PIPE_CONTROL_DC_FLUSH | + CMD_PIPE_CONTROL_NOWRITE); + OUT_BATCH(batch, 0); /* write address */ + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); /* write data */ + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); + } else if (IS_GEN6(intel->device_info)) { + assert(batch->wa_render_bo); + + BEGIN_BATCH(batch, 4 * 3); + + OUT_BATCH(batch, CMD_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(batch, + CMD_PIPE_CONTROL_CS_STALL | + CMD_PIPE_CONTROL_STALL_AT_SCOREBOARD); + OUT_BATCH(batch, 0); /* address */ + OUT_BATCH(batch, 0); /* write data */ + + OUT_BATCH(batch, CMD_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(batch, CMD_PIPE_CONTROL_WRITE_QWORD); + OUT_RELOC(batch, + batch->wa_render_bo, + I915_GEM_DOMAIN_INSTRUCTION, + I915_GEM_DOMAIN_INSTRUCTION, + 0); + OUT_BATCH(batch, 0); /* write data */ + + /* now finally the _real flush */ + OUT_BATCH(batch, CMD_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(batch, CMD_PIPE_CONTROL_WC_FLUSH | CMD_PIPE_CONTROL_TC_FLUSH | CMD_PIPE_CONTROL_NOWRITE); - else + OUT_BATCH(batch, 0); /* write address */ + OUT_BATCH(batch, 0); /* write data */ + ADVANCE_BATCH(batch); + } else { + BEGIN_BATCH(batch, 4); + OUT_BATCH(batch, CMD_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(batch, CMD_PIPE_CONTROL_WC_FLUSH | CMD_PIPE_CONTROL_TC_FLUSH | CMD_PIPE_CONTROL_DC_FLUSH | CMD_PIPE_CONTROL_NOWRITE); + OUT_BATCH(batch, 0); /* write address */ + OUT_BATCH(batch, 0); /* write data */ + ADVANCE_BATCH(batch); + } - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - ADVANCE_BATCH(batch); } else { if (batch->flag == I915_EXEC_BLT) { BEGIN_BLT_BATCH(batch, 4); diff --git a/src/intel_batchbuffer.h b/src/intel_batchbuffer.h index 70ceddb..34ff66d 100644 --- a/src/intel_batchbuffer.h +++ b/src/intel_batchbuffer.h @@ -24,6 +24,9 @@ struct intel_batchbuffer int (*run)(drm_intel_bo *bo, int used, drm_clip_rect_t *cliprects, int num_cliprects, int DR4, unsigned int ring_flag); + + /* Used for Sandybdrige workaround */ + dri_bo *wa_render_bo; }; struct intel_batchbuffer *intel_batchbuffer_new(struct intel_driver_data *intel, int flag, int buffer_size); diff --git a/src/intel_driver.c b/src/intel_driver.c index 83542d9..994e64c 100644 --- a/src/intel_driver.c +++ b/src/intel_driver.c @@ -34,6 +34,7 @@ #include "intel_batchbuffer.h" #include "intel_memman.h" #include "intel_driver.h" +uint32_t g_intel_debug_option_flags = 0; static Bool intel_driver_get_param(struct intel_driver_data *intel, int param, int *value) @@ -67,12 +68,22 @@ static void intel_driver_get_revid(struct intel_driver_data *intel, int *value) return; } +extern const struct intel_device_info *i965_get_device_info(int devid); + bool intel_driver_init(VADriverContextP ctx) { struct intel_driver_data *intel = intel_driver_data(ctx); struct drm_state * const drm_state = (struct drm_state *)ctx->drm_state; - int has_exec2, has_bsd, has_blt; + int has_exec2 = 0, has_bsd = 0, has_blt = 0, has_vebox = 0; + char *env_str = NULL; + + g_intel_debug_option_flags = 0; + if ((env_str = getenv("VA_INTEL_DEBUG"))) + g_intel_debug_option_flags = atoi(env_str); + + if (g_intel_debug_option_flags) + fprintf(stderr, "g_intel_debug_option_flags:%x\n", g_intel_debug_option_flags); assert(drm_state); assert(VA_CHECK_DRM_AUTH_TYPE(ctx, VA_DRM_AUTH_DRI1) || @@ -91,12 +102,19 @@ intel_driver_init(VADriverContextP ctx) pthread_mutex_init(&intel->ctxmutex, NULL); intel_driver_get_param(intel, I915_PARAM_CHIPSET_ID, &intel->device_id); + intel->device_info = i965_get_device_info(intel->device_id); + + if (!intel->device_info) + return false; + if (intel_driver_get_param(intel, I915_PARAM_HAS_EXECBUF2, &has_exec2)) intel->has_exec2 = has_exec2; if (intel_driver_get_param(intel, I915_PARAM_HAS_BSD, &has_bsd)) intel->has_bsd = has_bsd; if (intel_driver_get_param(intel, I915_PARAM_HAS_BLT, &has_blt)) intel->has_blt = has_blt; + if (intel_driver_get_param(intel, I915_PARAM_HAS_VEBOX, &has_vebox)) + intel->has_vebox = !!has_vebox; intel_driver_get_revid(intel, &intel->revision); intel_memman_init(intel); diff --git a/src/intel_driver.h b/src/intel_driver.h index 9631b96..432a0d9 100644 --- a/src/intel_driver.h +++ b/src/intel_driver.h @@ -38,6 +38,8 @@ #define XY_COLOR_BLT_WRITE_RGB (1 << 20) #define XY_COLOR_BLT_DST_TILED (1 << 11) +#define GEN8_XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22) | 0x05) + /* BR13 */ #define BR13_8 (0x0 << 24) #define BR13_565 (0x1 << 24) @@ -45,6 +47,7 @@ #define BR13_8888 (0x3 << 24) #define CMD_PIPE_CONTROL (CMD_3D | (3 << 27) | (2 << 24) | (0 << 16)) +#define CMD_PIPE_CONTROL_CS_STALL (1 << 20) #define CMD_PIPE_CONTROL_NOWRITE (0 << 14) #define CMD_PIPE_CONTROL_WRITE_QWORD (1 << 14) #define CMD_PIPE_CONTROL_WRITE_DEPTH (2 << 14) @@ -57,12 +60,14 @@ #define CMD_PIPE_CONTROL_DC_FLUSH (1 << 5) #define CMD_PIPE_CONTROL_GLOBAL_GTT (1 << 2) #define CMD_PIPE_CONTROL_LOCAL_PGTT (0 << 2) +#define CMD_PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1) #define CMD_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) struct intel_batchbuffer; #define ALIGN(i, n) (((i) + (n) - 1) & ~((n) - 1)) +#define IS_ALIGNED(i, n) (((i) & ((n)-1)) == 0) #define MIN(a, b) ((a) < (b) ? (a) : (b)) #define MAX(a, b) ((a) > (b) ? (a) : (b)) #define ARRAY_ELEMS(a) (sizeof(a) / sizeof((a)[0])) @@ -71,6 +76,18 @@ struct intel_batchbuffer; #define True 1 #define False 0 +extern uint32_t g_intel_debug_option_flags; +#define VA_INTEL_DEBUG_OPTION_ASSERT (1 << 0) +#define VA_INTEL_DEBUG_OPTION_BENCH (1 << 1) + +#define ASSERT_RET(value, fail_ret) do { \ + if (!(value)) { \ + if (g_intel_debug_option_flags & VA_INTEL_DEBUG_OPTION_ASSERT) \ + assert(value); \ + return fail_ret; \ + } \ + } while (0) + #define SET_BLOCKED_SIGSET() do { \ sigset_t bl_mask; \ sigfillset(&bl_mask); \ @@ -104,6 +121,21 @@ struct intel_batchbuffer; } \ } while (0) +struct intel_device_info +{ + int gen; + int gt; + + unsigned int urb_size; + unsigned int max_wm_threads; + + unsigned int is_g4x : 1; /* gen4 */ + unsigned int is_ivybridge : 1; /* gen7 */ + unsigned int is_baytrail : 1; /* gen7 */ + unsigned int is_haswell : 1; /* gen7 */ + unsigned int is_cherryview : 1; /* gen8 */ +}; + struct intel_driver_data { int fd; @@ -121,6 +153,9 @@ struct intel_driver_data unsigned int has_exec2 : 1; /* Flag: has execbuffer2? */ unsigned int has_bsd : 1; /* Flag: has bitstream decoder for H.264? */ unsigned int has_blt : 1; /* Flag: has BLT unit? */ + unsigned int has_vebox : 1; /* Flag: has VEBOX unit */ + + const struct intel_device_info *device_info; }; bool intel_driver_init(VADriverContextP ctx); @@ -145,205 +180,16 @@ struct intel_region dri_bo *bo; }; -#define PCI_CHIP_GM45_GM 0x2A42 -#define PCI_CHIP_IGD_E_G 0x2E02 -#define PCI_CHIP_Q45_G 0x2E12 -#define PCI_CHIP_G45_G 0x2E22 -#define PCI_CHIP_G41_G 0x2E32 -#define PCI_CHIP_B43_G 0x2E42 -#define PCI_CHIP_B43_G1 0x2E92 - -#define PCI_CHIP_IRONLAKE_D_G 0x0042 -#define PCI_CHIP_IRONLAKE_M_G 0x0046 - -#ifndef PCI_CHIP_SANDYBRIDGE_GT1 -#define PCI_CHIP_SANDYBRIDGE_GT1 0x0102 /* Desktop */ -#define PCI_CHIP_SANDYBRIDGE_GT2 0x0112 -#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122 -#define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106 /* Mobile */ -#define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116 -#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126 -#define PCI_CHIP_SANDYBRIDGE_S_GT 0x010A /* Server */ -#endif - -#define PCI_CHIP_IVYBRIDGE_GT1 0x0152 /* Desktop */ -#define PCI_CHIP_IVYBRIDGE_GT2 0x0162 -#define PCI_CHIP_IVYBRIDGE_M_GT1 0x0156 /* Mobile */ -#define PCI_CHIP_IVYBRIDGE_M_GT2 0x0166 -#define PCI_CHIP_IVYBRIDGE_S_GT1 0x015a /* Server */ -#define PCI_CHIP_IVYBRIDGE_S_GT2 0x016a - -#define PCI_CHIP_HASWELL_GT1 0x0402 /* Desktop */ -#define PCI_CHIP_HASWELL_GT2 0x0412 -#define PCI_CHIP_HASWELL_GT3 0x0422 -#define PCI_CHIP_HASWELL_M_GT1 0x0406 /* Mobile */ -#define PCI_CHIP_HASWELL_M_GT2 0x0416 -#define PCI_CHIP_HASWELL_M_GT3 0x0426 -#define PCI_CHIP_HASWELL_S_GT1 0x040a /* Server */ -#define PCI_CHIP_HASWELL_S_GT2 0x041a -#define PCI_CHIP_HASWELL_S_GT3 0x042a -#define PCI_CHIP_HASWELL_B_GT1 0x040b /* Reserved */ -#define PCI_CHIP_HASWELL_B_GT2 0x041b -#define PCI_CHIP_HASWELL_B_GT3 0x042b -#define PCI_CHIP_HASWELL_E_GT1 0x040e /* Reserved */ -#define PCI_CHIP_HASWELL_E_GT2 0x041e -#define PCI_CHIP_HASWELL_E_GT3 0x042e - -#define PCI_CHIP_HASWELL_SDV_GT1 0x0c02 /* Desktop */ -#define PCI_CHIP_HASWELL_SDV_GT2 0x0c12 -#define PCI_CHIP_HASWELL_SDV_GT3 0x0c22 -#define PCI_CHIP_HASWELL_SDV_M_GT1 0x0c06 /* Mobile */ -#define PCI_CHIP_HASWELL_SDV_M_GT2 0x0c16 -#define PCI_CHIP_HASWELL_SDV_M_GT3 0x0c26 -#define PCI_CHIP_HASWELL_SDV_S_GT1 0x0c0a /* Server */ -#define PCI_CHIP_HASWELL_SDV_S_GT2 0x0c1a -#define PCI_CHIP_HASWELL_SDV_S_GT3 0x0c2a -#define PCI_CHIP_HASWELL_SDV_B_GT1 0x0c0b /* Reserved */ -#define PCI_CHIP_HASWELL_SDV_B_GT2 0x0c1b -#define PCI_CHIP_HASWELL_SDV_B_GT3 0x0c2b -#define PCI_CHIP_HASWELL_SDV_E_GT1 0x0c0e /* Reserved */ -#define PCI_CHIP_HASWELL_SDV_E_GT2 0x0c1e -#define PCI_CHIP_HASWELL_SDV_E_GT3 0x0c2e - -#define PCI_CHIP_HASWELL_ULT_GT1 0x0A02 /* Desktop */ -#define PCI_CHIP_HASWELL_ULT_GT2 0x0A12 -#define PCI_CHIP_HASWELL_ULT_GT3 0x0A22 -#define PCI_CHIP_HASWELL_ULT_M_GT1 0x0A06 /* Mobile */ -#define PCI_CHIP_HASWELL_ULT_M_GT2 0x0A16 -#define PCI_CHIP_HASWELL_ULT_M_GT3 0x0A26 -#define PCI_CHIP_HASWELL_ULT_S_GT1 0x0A0A /* Server */ -#define PCI_CHIP_HASWELL_ULT_S_GT2 0x0A1A -#define PCI_CHIP_HASWELL_ULT_S_GT3 0x0A2A -#define PCI_CHIP_HASWELL_ULT_B_GT1 0x0A0B /* Reserved */ -#define PCI_CHIP_HASWELL_ULT_B_GT2 0x0A1B -#define PCI_CHIP_HASWELL_ULT_B_GT3 0x0A2B -#define PCI_CHIP_HASWELL_ULT_E_GT1 0x0A0E /* Reserved */ -#define PCI_CHIP_HASWELL_ULT_E_GT2 0x0A1E -#define PCI_CHIP_HASWELL_ULT_E_GT3 0x0A2E - -#define PCI_CHIP_HASWELL_CRW_GT1 0x0D02 /* Desktop */ -#define PCI_CHIP_HASWELL_CRW_GT2 0x0D12 -#define PCI_CHIP_HASWELL_CRW_GT3 0x0D22 -#define PCI_CHIP_HASWELL_CRW_M_GT1 0x0D06 /* Mobile */ -#define PCI_CHIP_HASWELL_CRW_M_GT2 0x0D16 -#define PCI_CHIP_HASWELL_CRW_M_GT3 0x0D26 -#define PCI_CHIP_HASWELL_CRW_S_GT1 0x0D0A /* Server */ -#define PCI_CHIP_HASWELL_CRW_S_GT2 0x0D1A -#define PCI_CHIP_HASWELL_CRW_S_GT3 0x0D2A -#define PCI_CHIP_HASWELL_CRW_B_GT1 0x0D0B /* Reserved */ -#define PCI_CHIP_HASWELL_CRW_B_GT2 0x0D1B -#define PCI_CHIP_HASWELL_CRW_B_GT3 0x0D2B -#define PCI_CHIP_HASWELL_CRW_E_GT1 0x0D0E /* Reserved */ -#define PCI_CHIP_HASWELL_CRW_E_GT2 0x0D1E -#define PCI_CHIP_HASWELL_CRW_E_GT3 0x0D2E - -#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \ - devid == PCI_CHIP_Q45_G || \ - devid == PCI_CHIP_G45_G || \ - devid == PCI_CHIP_G41_G || \ - devid == PCI_CHIP_B43_G || \ - devid == PCI_CHIP_B43_G1) - -#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM) -#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid)) - -#define IS_IRONLAKE_D(devid) (devid == PCI_CHIP_IRONLAKE_D_G) -#define IS_IRONLAKE_M(devid) (devid == PCI_CHIP_IRONLAKE_M_G) -#define IS_IRONLAKE(devid) (IS_IRONLAKE_D(devid) || IS_IRONLAKE_M(devid)) - -#define IS_SNB_GT1(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT1 || \ - devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \ - devid == PCI_CHIP_SANDYBRIDGE_S_GT) - -#define IS_SNB_GT2(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT2 || \ - devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS || \ - devid == PCI_CHIP_SANDYBRIDGE_M_GT2 || \ - devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS) - -#define IS_GEN6(devid) (IS_SNB_GT1(devid) || \ - IS_SNB_GT2(devid)) - -#define IS_IVB_GT1(devid) (devid == PCI_CHIP_IVYBRIDGE_GT1 || \ - devid == PCI_CHIP_IVYBRIDGE_M_GT1 || \ - devid == PCI_CHIP_IVYBRIDGE_S_GT1) - -#define IS_IVB_GT2(devid) (devid == PCI_CHIP_IVYBRIDGE_GT2 || \ - devid == PCI_CHIP_IVYBRIDGE_M_GT2 || \ - devid == PCI_CHIP_IVYBRIDGE_S_GT2) - -#define IS_IVYBRIDGE(devid) (IS_IVB_GT1(devid) || \ - IS_IVB_GT2(devid)) - -#define IS_HSW_GT1(devid) (devid == PCI_CHIP_HASWELL_GT1 || \ - devid == PCI_CHIP_HASWELL_M_GT1 || \ - devid == PCI_CHIP_HASWELL_S_GT1 || \ - devid == PCI_CHIP_HASWELL_B_GT1 || \ - devid == PCI_CHIP_HASWELL_E_GT1 || \ - devid == PCI_CHIP_HASWELL_SDV_GT1 || \ - devid == PCI_CHIP_HASWELL_SDV_M_GT1 || \ - devid == PCI_CHIP_HASWELL_SDV_S_GT1 || \ - devid == PCI_CHIP_HASWELL_SDV_B_GT1 || \ - devid == PCI_CHIP_HASWELL_SDV_E_GT1 || \ - devid == PCI_CHIP_HASWELL_CRW_GT1 || \ - devid == PCI_CHIP_HASWELL_CRW_M_GT1 || \ - devid == PCI_CHIP_HASWELL_CRW_S_GT1 || \ - devid == PCI_CHIP_HASWELL_CRW_B_GT1 || \ - devid == PCI_CHIP_HASWELL_CRW_E_GT1 || \ - devid == PCI_CHIP_HASWELL_ULT_GT1 || \ - devid == PCI_CHIP_HASWELL_ULT_M_GT1 || \ - devid == PCI_CHIP_HASWELL_ULT_S_GT1 || \ - devid == PCI_CHIP_HASWELL_ULT_B_GT1 || \ - devid == PCI_CHIP_HASWELL_ULT_E_GT1) - - -#define IS_HSW_GT2(devid) (devid == PCI_CHIP_HASWELL_GT2|| \ - devid == PCI_CHIP_HASWELL_M_GT2|| \ - devid == PCI_CHIP_HASWELL_S_GT2|| \ - devid == PCI_CHIP_HASWELL_B_GT2 || \ - devid == PCI_CHIP_HASWELL_E_GT2 || \ - devid == PCI_CHIP_HASWELL_SDV_GT2|| \ - devid == PCI_CHIP_HASWELL_SDV_M_GT2|| \ - devid == PCI_CHIP_HASWELL_SDV_S_GT2|| \ - devid == PCI_CHIP_HASWELL_SDV_B_GT2 || \ - devid == PCI_CHIP_HASWELL_SDV_E_GT2 || \ - devid == PCI_CHIP_HASWELL_CRW_GT2|| \ - devid == PCI_CHIP_HASWELL_CRW_M_GT2|| \ - devid == PCI_CHIP_HASWELL_CRW_S_GT2|| \ - devid == PCI_CHIP_HASWELL_CRW_B_GT2|| \ - devid == PCI_CHIP_HASWELL_CRW_E_GT2|| \ - devid == PCI_CHIP_HASWELL_ULT_GT2|| \ - devid == PCI_CHIP_HASWELL_ULT_M_GT2|| \ - devid == PCI_CHIP_HASWELL_ULT_S_GT2|| \ - devid == PCI_CHIP_HASWELL_ULT_B_GT2 || \ - devid == PCI_CHIP_HASWELL_ULT_E_GT2) - - -#define IS_HSW_GT3(devid) (devid == PCI_CHIP_HASWELL_GT3 || \ - devid == PCI_CHIP_HASWELL_M_GT3 || \ - devid == PCI_CHIP_HASWELL_S_GT3 || \ - devid == PCI_CHIP_HASWELL_B_GT3 || \ - devid == PCI_CHIP_HASWELL_E_GT3 || \ - devid == PCI_CHIP_HASWELL_SDV_GT3 || \ - devid == PCI_CHIP_HASWELL_SDV_M_GT3 || \ - devid == PCI_CHIP_HASWELL_SDV_S_GT3 || \ - devid == PCI_CHIP_HASWELL_SDV_B_GT3 || \ - devid == PCI_CHIP_HASWELL_SDV_E_GT3 || \ - devid == PCI_CHIP_HASWELL_CRW_GT3 || \ - devid == PCI_CHIP_HASWELL_CRW_M_GT3 || \ - devid == PCI_CHIP_HASWELL_CRW_S_GT3 || \ - devid == PCI_CHIP_HASWELL_CRW_B_GT3 || \ - devid == PCI_CHIP_HASWELL_CRW_E_GT3 || \ - devid == PCI_CHIP_HASWELL_ULT_GT3 || \ - devid == PCI_CHIP_HASWELL_ULT_M_GT3 || \ - devid == PCI_CHIP_HASWELL_ULT_S_GT3 || \ - devid == PCI_CHIP_HASWELL_ULT_B_GT3 || \ - devid == PCI_CHIP_HASWELL_ULT_E_GT3) - -#define IS_HASWELL(devid) (IS_HSW_GT1(devid) || \ - IS_HSW_GT2(devid) || \ - IS_HSW_GT3(devid)) - -#define IS_GEN7(devid) (IS_IVYBRIDGE(devid) || \ - IS_HASWELL(devid)) +#define IS_G4X(device_info) (device_info->is_g4x) + +#define IS_IRONLAKE(device_info) (device_info->gen == 5) + +#define IS_GEN6(device_info) (device_info->gen == 6) + +#define IS_HASWELL(device_info) (device_info->is_haswell) +#define IS_GEN7(device_info) (device_info->gen == 7) + +#define IS_CHERRYVIEW(device_info) (device_info->is_cherryview) +#define IS_GEN8(device_info) (device_info->gen == 8) #endif /* _INTEL_DRIVER_H_ */ diff --git a/src/intel_media.h b/src/intel_media.h index b30740a..55136d6 100644 --- a/src/intel_media.h +++ b/src/intel_media.h @@ -39,6 +39,7 @@ struct gen_avc_surface dri_bo *dmv_top; dri_bo *dmv_bottom; int dmv_bottom_flag; + int frame_store_id; /* only used for H.264 on earlier generations (<HSW) */ }; extern void gen_free_avc_surface(void **data); diff --git a/src/intel_version.h.in b/src/intel_version.h.in new file mode 100644 index 0000000..050e834 --- /dev/null +++ b/src/intel_version.h.in @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef INTEL_VERSION_H +#define INTEL_VERSION_H + +/** + * INTEL_DRIVER_GIT_VERSION: + * + * The full version identifier of libva-intel-driver, from a git + * repository, in string form (suitable for string concatenation). + */ +#define INTEL_DRIVER_GIT_VERSION "@INTEL_DRIVER_GIT_VERSION@" + +#endif /* INTEL_VERSION_H */ diff --git a/src/shaders/post_processing/Makefile.am b/src/shaders/post_processing/Makefile.am index b19020f..0f5c2bf 100644 --- a/src/shaders/post_processing/Makefile.am +++ b/src/shaders/post_processing/Makefile.am @@ -1,4 +1,4 @@ -SUBDIRS = gen5_6 gen7 +SUBDIRS = gen5_6 gen7 gen75 gen8 # Extra clean files so that maintainer-clean removes *everything* MAINTAINERCLEANFILES = Makefile.in diff --git a/src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm index 280d37a..23bd306 100644 --- a/src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm +++ b/src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm @@ -62,24 +62,29 @@ #include "DI_Hist_Save.asm" ////////////////////////////////////// Save the DN Curr Frame for Next Run //////////////////////// - add (4) pCF_Y_OFFSET<1>:uw ubSRC_CF_OFFSET<4;4,1>:ub npDN_YUV:w - // check top/bottom field first - cmp.e.f0.0 (1) null<1>:w ubTFLD_FIRST<0;1,0>:ub 1:w - (f0.0) jmpi (1) TOP_FIELD_FIRST - -BOTTOM_FIELD_FIRST: - $for (0,0; <nY_NUM_OF_ROWS/2; 2,1) { - mov (4) mudMSGHDR_DN(1,%1*4)<1> udRESP(nDI_CURR_2ND_FIELD_LUMA_OFFSET,%2*4)<4;4,1> // 2nd field luma from current frame (line 0,2) - mov (4) mudMSGHDR_DN(1,%1*4+4)<1> udRESP(nDI_CURR_FRAME_LUMA_OFFSET+%2,4)<4;4,1> // 1st field luma from current frame (line 1,3) - } - jmpi (1) SAVE_DN_CURR - -TOP_FIELD_FIRST: - $for (0,0; <nY_NUM_OF_ROWS/2; 2,1) { - mov (4) mudMSGHDR_DN(1,%1*4)<1> udRESP(nDI_CURR_FRAME_LUMA_OFFSET+%2,0)<4;4,1> // 2nd field luma from current frame (line 0,2) - mov (4) mudMSGHDR_DN(1,%1*4+4)<1> udRESP(nDI_CURR_2ND_FIELD_LUMA_OFFSET,%2*4)<4;4,1> // 1st field luma from current frame (line 1,3) + // previous frame + $for (0; <nY_NUM_OF_ROWS/2; 1) { + mov (16) mubMSGHDR_DN(1, %1*16)<1> ubRESP(nDI_PREV_FRAME_LUMA_OFFSET,%1*16) } -SAVE_DN_CURR: + + mov (2) rMSGSRC.0<1>:ud wORIX<2;2,1>:w // X origin and Y origin + mov (1) rMSGSRC.2<1>:ud nDPW_BLOCK_SIZE_DN:ud // block width and height (16x4) + mov (8) mudMSGHDR_DN(0)<1> rMSGSRC.0<8;8,1>:ud + send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPMW_MSG_LEN_PL_DN_DI+nBI_DESTINATION_Y:ud + + //Write UV through DATAPORT + mov (2) rMSGSRC.0<1>:ud wORIX<2;2,1>:w // X origin and Y origin + asr (1) rMSGSRC.1<1>:d rMSGSRC.1<0;1,0>:d 1:w // U/V block origin should be half of Y's + mov (1) rMSGSRC.2<1>:ud nDPR_BLOCK_SIZE_UV:ud // block width and height (16x2) + mov (8) mudMSGHDR_DN(0)<1> rMSGSRC.0<8;8,1>:ud + + mov (8) mubMSGHDR_DN(1, 0)<2> ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET, 1)<16 ;8,2> + mov (8) mubMSGHDR_DN(1, 1)<2> ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET, 16)<16 ;8,2> + mov (8) mubMSGHDR_DN(1, 16)<2> ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET+1, 1)<16 ;8,2> + mov (8) mubMSGHDR_DN(1, 17)<2> ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET+1, 16)<16 ;8,2> + send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nMSGLEN_1+nBI_DESTINATION_UV:ud + + // current frame $for (0; <nY_NUM_OF_ROWS/2; 1) { mov (16) mubMSGHDR_DN(1, %1*16)<1> ubRESP(nDI_CURR_FRAME_LUMA_OFFSET,%1*16) } @@ -87,7 +92,7 @@ SAVE_DN_CURR: mov (2) rMSGSRC.0<1>:ud wORIX<2;2,1>:w // X origin and Y origin mov (1) rMSGSRC.2<1>:ud nDPW_BLOCK_SIZE_DN:ud // block width and height (16x4) mov (8) mudMSGHDR_DN(0)<1> rMSGSRC.0<8;8,1>:ud - send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPMW_MSG_LEN_PL_DN_DI+nBI_DESTINATION_Y:ud + send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPMW_MSG_LEN_PL_DN_DI+nBI_DESTINATION_1_Y:ud //Write UV through DATAPORT mov (2) rMSGSRC.0<1>:ud wORIX<2;2,1>:w // X origin and Y origin @@ -99,4 +104,4 @@ SAVE_DN_CURR: mov (8) mubMSGHDR_DN(1, 1)<2> ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET, 16)<16 ;8,2> mov (8) mubMSGHDR_DN(1, 16)<2> ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET+1, 1)<16 ;8,2> mov (8) mubMSGHDR_DN(1, 17)<2> ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET+1, 16)<16 ;8,2> - send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nMSGLEN_1+nBI_DESTINATION_UV:ud
\ No newline at end of file + send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nMSGLEN_1+nBI_DESTINATION_1_UV:ud diff --git a/src/shaders/post_processing/gen5_6/Makefile.am b/src/shaders/post_processing/gen5_6/Makefile.am index bb8caa4..4a13d9d 100755 --- a/src/shaders/post_processing/gen5_6/Makefile.am +++ b/src/shaders/post_processing/gen5_6/Makefile.am @@ -16,6 +16,7 @@ INTEL_PP_G4B_GEN5 = \ nv12_load_save_pl3.g4b.gen5 \ nv12_load_save_rgbx.g4b.gen5 \ nv12_scaling_nv12.g4b.gen5 \ + pa_load_save_pa.g4b.gen5 \ pa_load_save_nv12.g4b.gen5 \ pa_load_save_pl3.g4b.gen5 \ pl3_load_save_nv12.g4b.gen5 \ @@ -33,6 +34,7 @@ INTEL_PP_G6B = \ nv12_load_save_pl3.g6b \ nv12_load_save_rgbx.g6b \ nv12_scaling_nv12.g6b \ + pa_load_save_pa.g6b \ pa_load_save_nv12.g6b \ pa_load_save_pl3.g6b \ pl3_load_save_nv12.g6b \ @@ -50,6 +52,7 @@ INTEL_PP_ASM = \ nv12_load_save_pl3.asm \ nv12_load_save_rgbx.asm \ nv12_scaling_nv12.asm \ + pa_load_save_pa.asm \ pa_load_save_nv12.asm \ pa_load_save_pl3.asm \ pl3_load_save_nv12.asm \ @@ -178,7 +181,7 @@ endif all-local: $(TARGETS) -SUFFIXES = .g4a .g4b .g6a .g6b .g5s .g6s .asm +SUFFIXES = .g4a .g4b .g4b.gen5 .g6a .g6b .g5s .g6s .asm if HAVE_GEN4ASM .g4a.g4b: diff --git a/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5 b/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5 index 446fb4b..4563d20 100644 --- a/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5 +++ b/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5 @@ -44,18 +44,23 @@ { 0x00600001, 0x21a00022, 0x008d0100, 0x00000000 }, { 0x00000001, 0x21c00022, 0x00000560, 0x00000000 }, { 0x0d600031, 0x20000c04, 0x508d0000, 0x04082014 }, - { 0x00400040, 0x22083e28, 0x00690024, 0x07000700 }, - { 0x01000010, 0x20003e2c, 0x0000003b, 0x00010001 }, - { 0x00010220, 0x34001c00, 0x00001400, 0x0000000a }, - { 0x00400001, 0x20400022, 0x00690580, 0x00000000 }, - { 0x00400001, 0x20500022, 0x006904d0, 0x00000000 }, - { 0x00400001, 0x20600022, 0x00690590, 0x00000000 }, - { 0x00400001, 0x20700022, 0x006904f0, 0x00000000 }, - { 0x00000220, 0x34001c00, 0x00001400, 0x00000008 }, - { 0x00400001, 0x20400022, 0x006904c0, 0x00000000 }, - { 0x00400001, 0x20500022, 0x00690580, 0x00000000 }, - { 0x00400001, 0x20600022, 0x006904e0, 0x00000000 }, - { 0x00400001, 0x20700022, 0x00690590, 0x00000000 }, + { 0x00800001, 0x20400232, 0x00b10440, 0x00000000 }, + { 0x00800001, 0x20500232, 0x00b10450, 0x00000000 }, + { 0x00800001, 0x20600232, 0x00b10460, 0x00000000 }, + { 0x00800001, 0x20700232, 0x00b10470, 0x00000000 }, + { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 }, + { 0x00000001, 0x21080061, 0x00000000, 0x0003000f }, + { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 }, + { 0x01600031, 0x20000c04, 0x508d0000, 0x06082007 }, + { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 }, + { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 }, + { 0x00000001, 0x21080061, 0x00000000, 0x0001000f }, + { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 }, + { 0x00600001, 0x40400232, 0x00ae0481, 0x00000000 }, + { 0x00600001, 0x40410232, 0x00ae0490, 0x00000000 }, + { 0x00600001, 0x40500232, 0x00ae04a1, 0x00000000 }, + { 0x00600001, 0x40510232, 0x00ae04b0, 0x00000000 }, + { 0x01600031, 0x20000c04, 0x508d0000, 0x04082008 }, { 0x00800001, 0x20400232, 0x00b104c0, 0x00000000 }, { 0x00800001, 0x20500232, 0x00b104d0, 0x00000000 }, { 0x00800001, 0x20600232, 0x00b104e0, 0x00000000 }, @@ -63,7 +68,7 @@ { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 }, { 0x00000001, 0x21080061, 0x00000000, 0x0003000f }, { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 }, - { 0x01600031, 0x20000c04, 0x508d0000, 0x06082007 }, + { 0x01600031, 0x20000c04, 0x508d0000, 0x0608200a }, { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 }, { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 }, { 0x00000001, 0x21080061, 0x00000000, 0x0001000f }, @@ -72,7 +77,7 @@ { 0x00600001, 0x40410232, 0x00ae0510, 0x00000000 }, { 0x00600001, 0x40500232, 0x00ae0521, 0x00000000 }, { 0x00600001, 0x40510232, 0x00ae0530, 0x00000000 }, - { 0x01600031, 0x20000c04, 0x508d0000, 0x04082008 }, + { 0x01600031, 0x20000c04, 0x508d0000, 0x0408200b }, { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff }, { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 }, { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 }, @@ -81,10 +86,10 @@ { 0x01000010, 0x20003dac, 0x00000086, 0x00010001 }, { 0x00010001, 0x20b80129, 0x000000c4, 0x00000000 }, { 0x00010001, 0x20ba0231, 0x000000c6, 0x00000000 }, - { 0x00010220, 0x34001c00, 0x02001400, 0xffffff64 }, + { 0x00010220, 0x34001c00, 0x02001400, 0xffffff5a }, { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 }, { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 }, - { 0x00000220, 0x34001c00, 0x00001400, 0xffffff5e }, + { 0x00000220, 0x34001c00, 0x00001400, 0xffffff54 }, { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 }, { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 }, diff --git a/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b b/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b index 111d483..8d6ebe3 100644 --- a/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b +++ b/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b @@ -117,18 +117,23 @@ { 0x00600001, 0x21a00022, 0x008d0100, 0x00000000 }, { 0x00000001, 0x21c00022, 0x00000560, 0x00000000 }, { 0x05600031, 0x20000cc4, 0x000001a0, 0x04094014 }, - { 0x00400040, 0x22083e28, 0x00690024, 0x07000700 }, - { 0x01000010, 0x20003e2c, 0x0000003b, 0x00010001 }, - { 0x00010220, 0x34001c00, 0x00001400, 0x0000000a }, - { 0x00400001, 0x20400022, 0x00690580, 0x00000000 }, - { 0x00400001, 0x20500022, 0x006904d0, 0x00000000 }, - { 0x00400001, 0x20600022, 0x00690590, 0x00000000 }, - { 0x00400001, 0x20700022, 0x006904f0, 0x00000000 }, - { 0x00000220, 0x34001c00, 0x00001400, 0x00000008 }, - { 0x00400001, 0x20400022, 0x006904c0, 0x00000000 }, - { 0x00400001, 0x20500022, 0x00690580, 0x00000000 }, - { 0x00400001, 0x20600022, 0x006904e0, 0x00000000 }, - { 0x00400001, 0x20700022, 0x00690590, 0x00000000 }, + { 0x00800001, 0x20400232, 0x00b10440, 0x00000000 }, + { 0x00800001, 0x20500232, 0x00b10450, 0x00000000 }, + { 0x00800001, 0x20600232, 0x00b10460, 0x00000000 }, + { 0x00800001, 0x20700232, 0x00b10470, 0x00000000 }, + { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 }, + { 0x00000001, 0x21080061, 0x00000000, 0x0003000f }, + { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 }, + { 0x05600031, 0x20000cc4, 0x00000020, 0x06094007 }, + { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 }, + { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 }, + { 0x00000001, 0x21080061, 0x00000000, 0x0001000f }, + { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 }, + { 0x00600001, 0x40400232, 0x00ae0481, 0x00000000 }, + { 0x00600001, 0x40410232, 0x00ae0490, 0x00000000 }, + { 0x00600001, 0x40500232, 0x00ae04a1, 0x00000000 }, + { 0x00600001, 0x40510232, 0x00ae04b0, 0x00000000 }, + { 0x05600031, 0x20000cc4, 0x00000020, 0x04094008 }, { 0x00800001, 0x20400232, 0x00b104c0, 0x00000000 }, { 0x00800001, 0x20500232, 0x00b104d0, 0x00000000 }, { 0x00800001, 0x20600232, 0x00b104e0, 0x00000000 }, @@ -136,7 +141,7 @@ { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 }, { 0x00000001, 0x21080061, 0x00000000, 0x0003000f }, { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 }, - { 0x05600031, 0x20000cc4, 0x00000020, 0x06094007 }, + { 0x05600031, 0x20000cc4, 0x00000020, 0x0609400a }, { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 }, { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 }, { 0x00000001, 0x21080061, 0x00000000, 0x0001000f }, @@ -145,7 +150,7 @@ { 0x00600001, 0x40410232, 0x00ae0510, 0x00000000 }, { 0x00600001, 0x40500232, 0x00ae0521, 0x00000000 }, { 0x00600001, 0x40510232, 0x00ae0530, 0x00000000 }, - { 0x05600031, 0x20000cc4, 0x00000020, 0x04094008 }, + { 0x05600031, 0x20000cc4, 0x00000020, 0x0409400b }, { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff }, { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 }, { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 }, @@ -154,10 +159,10 @@ { 0x01000010, 0x20003dac, 0x00000086, 0x00010001 }, { 0x00010001, 0x20b80129, 0x000000c4, 0x00000000 }, { 0x00010001, 0x20ba0231, 0x000000c6, 0x00000000 }, - { 0x00010220, 0x34001c00, 0x02001400, 0xffffff64 }, + { 0x00010220, 0x34001c00, 0x02001400, 0xffffff5a }, { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 }, { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 }, - { 0x00000220, 0x34001c00, 0x00001400, 0xffffff5e }, + { 0x00000220, 0x34001c00, 0x00001400, 0xffffff54 }, { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 }, { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 }, diff --git a/src/shaders/post_processing/gen5_6/pa_load_save_pa.asm b/src/shaders/post_processing/gen5_6/pa_load_save_pa.asm new file mode 100644 index 0000000..72c2a8a --- /dev/null +++ b/src/shaders/post_processing/gen5_6/pa_load_save_pa.asm @@ -0,0 +1,17 @@ +// Module name: PA_LOAD_SAVE_PA +.kernel PA_LOAD_SAVE_PA +.code + +#include "SetupVPKernel.asm" +#include "Multiple_Loop_Head.asm" +#include "PA_Load_8x8.asm" +#include "PL8x8_Save_PA.asm" +#include "Multiple_Loop.asm" + +END_THREAD // End of Thread + +.end_code + +.end_kernel + +// end of pa_load_save_pa.asm diff --git a/src/shaders/post_processing/gen5_6/pa_load_save_pa.g4b.gen5 b/src/shaders/post_processing/gen5_6/pa_load_save_pa.g4b.gen5 new file mode 100644 index 0000000..a75c75a --- /dev/null +++ b/src/shaders/post_processing/gen5_6/pa_load_save_pa.g4b.gen5 @@ -0,0 +1,115 @@ + { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 }, + { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 }, + { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 }, + { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 }, + { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 }, + { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 }, + { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 }, + { 0x00000009, 0x21003da5, 0x00000100, 0x00010001 }, + { 0x00000001, 0x21080061, 0x00000000, 0x0007001f }, + { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 }, + { 0x01600031, 0x27000c01, 0x408d0000, 0x0288a001 }, + { 0x00400040, 0x22083e28, 0x00690024, 0x07000700 }, + { 0x00800001, 0x21400229, 0x00d29000, 0x00000000 }, + { 0x00600001, 0x22400229, 0x00cf9400, 0x00000000 }, + { 0x00600001, 0x23400229, 0x00cf9800, 0x00000000 }, + { 0x00800001, 0x21600229, 0x00d29020, 0x00000000 }, + { 0x00600001, 0x22500229, 0x00cf9420, 0x00000000 }, + { 0x00600001, 0x23500229, 0x00cf9820, 0x00000000 }, + { 0x00800001, 0x21800229, 0x00d29040, 0x00000000 }, + { 0x00600001, 0x22600229, 0x00cf9440, 0x00000000 }, + { 0x00600001, 0x23600229, 0x00cf9840, 0x00000000 }, + { 0x00800001, 0x21a00229, 0x00d29060, 0x00000000 }, + { 0x00600001, 0x22700229, 0x00cf9460, 0x00000000 }, + { 0x00600001, 0x23700229, 0x00cf9860, 0x00000000 }, + { 0x00800001, 0x21c00229, 0x00d29080, 0x00000000 }, + { 0x00600001, 0x22800229, 0x00cf9480, 0x00000000 }, + { 0x00600001, 0x23800229, 0x00cf9880, 0x00000000 }, + { 0x00800001, 0x21e00229, 0x00d290a0, 0x00000000 }, + { 0x00600001, 0x22900229, 0x00cf94a0, 0x00000000 }, + { 0x00600001, 0x23900229, 0x00cf98a0, 0x00000000 }, + { 0x00800001, 0x22000229, 0x00d290c0, 0x00000000 }, + { 0x00600001, 0x22a00229, 0x00cf94c0, 0x00000000 }, + { 0x00600001, 0x23a00229, 0x00cf98c0, 0x00000000 }, + { 0x00800001, 0x22200229, 0x00d290e0, 0x00000000 }, + { 0x00600001, 0x22b00229, 0x00cf94e0, 0x00000000 }, + { 0x00600001, 0x23b00229, 0x00cf98e0, 0x00000000 }, + { 0x00400040, 0x22083e28, 0x00690028, 0x07000700 }, + { 0x00800001, 0xd0000231, 0x00d20140, 0x00000000 }, + { 0x00800001, 0xd0200231, 0x00d20160, 0x00000000 }, + { 0x00800001, 0xd0400231, 0x00d20180, 0x00000000 }, + { 0x00800001, 0xd0600231, 0x00d201a0, 0x00000000 }, + { 0x00800001, 0xd0800231, 0x00d201c0, 0x00000000 }, + { 0x00800001, 0xd0a00231, 0x00d201e0, 0x00000000 }, + { 0x00800001, 0xd0c00231, 0x00d20200, 0x00000000 }, + { 0x00800001, 0xd0e00231, 0x00d20220, 0x00000000 }, + { 0x00600001, 0xf4000231, 0x00ae0240, 0x00000000 }, + { 0x00600001, 0xf8000231, 0x00ae0340, 0x00000000 }, + { 0x00600001, 0xf4200231, 0x00ae0250, 0x00000000 }, + { 0x00600001, 0xf8200231, 0x00ae0350, 0x00000000 }, + { 0x00600001, 0xf4400231, 0x00ae0260, 0x00000000 }, + { 0x00600001, 0xf8400231, 0x00ae0360, 0x00000000 }, + { 0x00600001, 0xf4600231, 0x00ae0270, 0x00000000 }, + { 0x00600001, 0xf8600231, 0x00ae0370, 0x00000000 }, + { 0x00600001, 0xf4800231, 0x00ae0280, 0x00000000 }, + { 0x00600001, 0xf8800231, 0x00ae0380, 0x00000000 }, + { 0x00600001, 0xf4a00231, 0x00ae0290, 0x00000000 }, + { 0x00600001, 0xf8a00231, 0x00ae0390, 0x00000000 }, + { 0x00600001, 0xf4c00231, 0x00ae02a0, 0x00000000 }, + { 0x00600001, 0xf8c00231, 0x00ae03a0, 0x00000000 }, + { 0x00600001, 0xf4e00231, 0x00ae02b0, 0x00000000 }, + { 0x00600001, 0xf8e00231, 0x00ae03b0, 0x00000000 }, + { 0x00000409, 0x21003da5, 0x000000a0, 0x00010001 }, + { 0x00000c01, 0x210401a5, 0x000000a2, 0x00000000 }, + { 0x00000801, 0x21080061, 0x00000000, 0x0007001f }, + { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 }, + { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff }, + { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff }, + { 0x00010220, 0x34001c00, 0x00001400, 0x0000002a }, + { 0x01600031, 0x21400c01, 0x408d0000, 0x0288a007 }, + { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 }, + { 0x00000001, 0x26000228, 0x000000ba, 0x00000000 }, + { 0x00610001, 0x24400129, 0x000000b8, 0x00000000 }, + { 0x00710001, 0x24400169, 0x00000000, 0x00000000 }, + { 0x00000001, 0x26020128, 0x00000440, 0x00000000 }, + { 0x00910001, 0x27000129, 0x02b10140, 0x00000000 }, + { 0x00000001, 0x26020128, 0x00000442, 0x00000000 }, + { 0x00910001, 0x27200129, 0x02b10160, 0x00000000 }, + { 0x00000001, 0x26020128, 0x00000444, 0x00000000 }, + { 0x00910001, 0x27400129, 0x02b10180, 0x00000000 }, + { 0x00000001, 0x26020128, 0x00000446, 0x00000000 }, + { 0x00910001, 0x27600129, 0x02b101a0, 0x00000000 }, + { 0x00000001, 0x26020128, 0x00000448, 0x00000000 }, + { 0x00910001, 0x27800129, 0x02b101c0, 0x00000000 }, + { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 }, + { 0x00910001, 0x27a00129, 0x02b101e0, 0x00000000 }, + { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 }, + { 0x00910001, 0x27c00129, 0x02b10200, 0x00000000 }, + { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 }, + { 0x00910001, 0x27e00129, 0x02b10220, 0x00000000 }, + { 0x00600001, 0x20400022, 0x008d0700, 0x00000000 }, + { 0x00600001, 0x20600022, 0x008d0720, 0x00000000 }, + { 0x00600001, 0x20800022, 0x008d0740, 0x00000000 }, + { 0x00600001, 0x20a00022, 0x008d0760, 0x00000000 }, + { 0x00600001, 0x20c00022, 0x008d0780, 0x00000000 }, + { 0x00600001, 0x20e00022, 0x008d07a0, 0x00000000 }, + { 0x00600001, 0x21000022, 0x008d07c0, 0x00000000 }, + { 0x00600001, 0x21200022, 0x008d07e0, 0x00000000 }, + { 0x01600031, 0x20000c04, 0x508d0000, 0x12082007 }, + { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff }, + { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 }, + { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 }, + { 0x00010220, 0x34001c00, 0x00001400, 0x00000010 }, + { 0x00000001, 0x20b80129, 0x000000c8, 0x00000000 }, + { 0x01000010, 0x20003dac, 0x00000086, 0x00010001 }, + { 0x00010001, 0x20b80129, 0x000000c4, 0x00000000 }, + { 0x00010001, 0x20ba0231, 0x000000c6, 0x00000000 }, + { 0x00010220, 0x34001c00, 0x02001400, 0xffffff36 }, + { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 }, + { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 }, + { 0x00000220, 0x34001c00, 0x00001400, 0xffffff30 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 }, + { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 }, + { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 }, + { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 }, diff --git a/src/shaders/post_processing/gen5_6/pa_load_save_pa.g6b b/src/shaders/post_processing/gen5_6/pa_load_save_pa.g6b new file mode 100644 index 0000000..5d9fe48 --- /dev/null +++ b/src/shaders/post_processing/gen5_6/pa_load_save_pa.g6b @@ -0,0 +1,188 @@ + { 0x00600001, 0x20e00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x21000061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x21200061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x21400061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x21600061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x21800061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x21a00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x21c00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22000061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22200061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22400061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22600061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22800061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22a00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22c00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22e00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x23000061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x23200061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x23400061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x23600061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x23800061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x23a00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x23c00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x23e00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x24000061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x24200061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x24400061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x24600061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x24800061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x24a00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x24c00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x24e00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x25000061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x25200061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x25400061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x25600061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x25800061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x25a00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x25c00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x25e00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x26000061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x26200061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x26400061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x26600061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x26800061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x26a00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x26c00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x26e00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x27000061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x27200061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x27400061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x27600061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x27800061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x27a00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x27c00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x27e00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28000061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28200061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28400061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28600061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28800061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28e00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x29000061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x29200061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x29400061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x29600061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x29800061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x29a00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x29c00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x29e00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 }, + { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 }, + { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 }, + { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 }, + { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 }, + { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 }, + { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 }, + { 0x00000009, 0x21003da5, 0x00000100, 0x00010001 }, + { 0x00000001, 0x21080061, 0x00000000, 0x0007001f }, + { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 }, + { 0x04600031, 0x27000cc1, 0x00000020, 0x02898001 }, + { 0x00400040, 0x22083e28, 0x00690024, 0x07000700 }, + { 0x00800001, 0x21400229, 0x00d29000, 0x00000000 }, + { 0x00600001, 0x22400229, 0x00cf9400, 0x00000000 }, + { 0x00600001, 0x23400229, 0x00cf9800, 0x00000000 }, + { 0x00800001, 0x21600229, 0x00d29020, 0x00000000 }, + { 0x00600001, 0x22500229, 0x00cf9420, 0x00000000 }, + { 0x00600001, 0x23500229, 0x00cf9820, 0x00000000 }, + { 0x00800001, 0x21800229, 0x00d29040, 0x00000000 }, + { 0x00600001, 0x22600229, 0x00cf9440, 0x00000000 }, + { 0x00600001, 0x23600229, 0x00cf9840, 0x00000000 }, + { 0x00800001, 0x21a00229, 0x00d29060, 0x00000000 }, + { 0x00600001, 0x22700229, 0x00cf9460, 0x00000000 }, + { 0x00600001, 0x23700229, 0x00cf9860, 0x00000000 }, + { 0x00800001, 0x21c00229, 0x00d29080, 0x00000000 }, + { 0x00600001, 0x22800229, 0x00cf9480, 0x00000000 }, + { 0x00600001, 0x23800229, 0x00cf9880, 0x00000000 }, + { 0x00800001, 0x21e00229, 0x00d290a0, 0x00000000 }, + { 0x00600001, 0x22900229, 0x00cf94a0, 0x00000000 }, + { 0x00600001, 0x23900229, 0x00cf98a0, 0x00000000 }, + { 0x00800001, 0x22000229, 0x00d290c0, 0x00000000 }, + { 0x00600001, 0x22a00229, 0x00cf94c0, 0x00000000 }, + { 0x00600001, 0x23a00229, 0x00cf98c0, 0x00000000 }, + { 0x00800001, 0x22200229, 0x00d290e0, 0x00000000 }, + { 0x00600001, 0x22b00229, 0x00cf94e0, 0x00000000 }, + { 0x00600001, 0x23b00229, 0x00cf98e0, 0x00000000 }, + { 0x00400040, 0x22083e28, 0x00690028, 0x07000700 }, + { 0x00800001, 0xd0000231, 0x00d20140, 0x00000000 }, + { 0x00800001, 0xd0200231, 0x00d20160, 0x00000000 }, + { 0x00800001, 0xd0400231, 0x00d20180, 0x00000000 }, + { 0x00800001, 0xd0600231, 0x00d201a0, 0x00000000 }, + { 0x00800001, 0xd0800231, 0x00d201c0, 0x00000000 }, + { 0x00800001, 0xd0a00231, 0x00d201e0, 0x00000000 }, + { 0x00800001, 0xd0c00231, 0x00d20200, 0x00000000 }, + { 0x00800001, 0xd0e00231, 0x00d20220, 0x00000000 }, + { 0x00600001, 0xf4000231, 0x00ae0240, 0x00000000 }, + { 0x00600001, 0xf8000231, 0x00ae0340, 0x00000000 }, + { 0x00600001, 0xf4200231, 0x00ae0250, 0x00000000 }, + { 0x00600001, 0xf8200231, 0x00ae0350, 0x00000000 }, + { 0x00600001, 0xf4400231, 0x00ae0260, 0x00000000 }, + { 0x00600001, 0xf8400231, 0x00ae0360, 0x00000000 }, + { 0x00600001, 0xf4600231, 0x00ae0270, 0x00000000 }, + { 0x00600001, 0xf8600231, 0x00ae0370, 0x00000000 }, + { 0x00600001, 0xf4800231, 0x00ae0280, 0x00000000 }, + { 0x00600001, 0xf8800231, 0x00ae0380, 0x00000000 }, + { 0x00600001, 0xf4a00231, 0x00ae0290, 0x00000000 }, + { 0x00600001, 0xf8a00231, 0x00ae0390, 0x00000000 }, + { 0x00600001, 0xf4c00231, 0x00ae02a0, 0x00000000 }, + { 0x00600001, 0xf8c00231, 0x00ae03a0, 0x00000000 }, + { 0x00600001, 0xf4e00231, 0x00ae02b0, 0x00000000 }, + { 0x00600001, 0xf8e00231, 0x00ae03b0, 0x00000000 }, + { 0x00000409, 0x21003da5, 0x000000a0, 0x00010001 }, + { 0x00000c01, 0x210401a5, 0x000000a2, 0x00000000 }, + { 0x00000801, 0x21080061, 0x00000000, 0x0007001f }, + { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 }, + { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff }, + { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff }, + { 0x00010220, 0x34001c00, 0x00001400, 0x0000002a }, + { 0x04600031, 0x21400cc1, 0x00000020, 0x02898007 }, + { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 }, + { 0x00000001, 0x26000228, 0x000000ba, 0x00000000 }, + { 0x00610001, 0x24400129, 0x000000b8, 0x00000000 }, + { 0x00710001, 0x24400169, 0x00000000, 0x00000000 }, + { 0x00000001, 0x26020128, 0x00000440, 0x00000000 }, + { 0x00910001, 0x27000129, 0x02b10140, 0x00000000 }, + { 0x00000001, 0x26020128, 0x00000442, 0x00000000 }, + { 0x00910001, 0x27200129, 0x02b10160, 0x00000000 }, + { 0x00000001, 0x26020128, 0x00000444, 0x00000000 }, + { 0x00910001, 0x27400129, 0x02b10180, 0x00000000 }, + { 0x00000001, 0x26020128, 0x00000446, 0x00000000 }, + { 0x00910001, 0x27600129, 0x02b101a0, 0x00000000 }, + { 0x00000001, 0x26020128, 0x00000448, 0x00000000 }, + { 0x00910001, 0x27800129, 0x02b101c0, 0x00000000 }, + { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 }, + { 0x00910001, 0x27a00129, 0x02b101e0, 0x00000000 }, + { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 }, + { 0x00910001, 0x27c00129, 0x02b10200, 0x00000000 }, + { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 }, + { 0x00910001, 0x27e00129, 0x02b10220, 0x00000000 }, + { 0x00600001, 0x20400022, 0x008d0700, 0x00000000 }, + { 0x00600001, 0x20600022, 0x008d0720, 0x00000000 }, + { 0x00600001, 0x20800022, 0x008d0740, 0x00000000 }, + { 0x00600001, 0x20a00022, 0x008d0760, 0x00000000 }, + { 0x00600001, 0x20c00022, 0x008d0780, 0x00000000 }, + { 0x00600001, 0x20e00022, 0x008d07a0, 0x00000000 }, + { 0x00600001, 0x21000022, 0x008d07c0, 0x00000000 }, + { 0x00600001, 0x21200022, 0x008d07e0, 0x00000000 }, + { 0x05600031, 0x20000cc4, 0x00000020, 0x12094007 }, + { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff }, + { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 }, + { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 }, + { 0x00010220, 0x34001c00, 0x00001400, 0x00000010 }, + { 0x00000001, 0x20b80129, 0x000000c8, 0x00000000 }, + { 0x01000010, 0x20003dac, 0x00000086, 0x00010001 }, + { 0x00010001, 0x20b80129, 0x000000c4, 0x00000000 }, + { 0x00010001, 0x20ba0231, 0x000000c6, 0x00000000 }, + { 0x00010220, 0x34001c00, 0x02001400, 0xffffff36 }, + { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 }, + { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 }, + { 0x00000220, 0x34001c00, 0x00001400, 0xffffff30 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 }, + { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 }, diff --git a/src/shaders/post_processing/gen7/Makefile.am b/src/shaders/post_processing/gen7/Makefile.am index f1a1c60..f4e2a8d 100644 --- a/src/shaders/post_processing/gen7/Makefile.am +++ b/src/shaders/post_processing/gen7/Makefile.am @@ -4,6 +4,7 @@ INTEL_PP_G7B = \ nv12_dn_nv12.g7b \ pa_to_pl2.g7b \ pa_to_pl3.g7b \ + pa_to_pa.g7b \ pl2_to_pa.g7b \ pl2_to_pl2.g7b \ pl2_to_pl3.g7b \ @@ -81,16 +82,18 @@ all-local: $(TARGETS) SUFFIXES = .g7b .g7s .asm +if HAVE_GEN4ASM $(INTEL_PP_GEN7_ASM): $(INTEL_PP_ASM) $(INTEL_PP_G4A) .asm.g7s: $(AM_V_GEN)cpp $< > _pp0.$@; \ ../../gpp.py _pp0.$@ $@; \ rm _pp0.$@ .g7s.g7b: - $(AM_V_GEN)intel-gen4asm -a -o $@ -g 7 $< + $(AM_V_GEN)$(GEN4ASM) -a -o $@ -g 7 $< .g7s.g75b: - $(AM_V_GEN)intel-gen4asm -a -o $@ -g 7.5 $< + $(AM_V_GEN)$(GEN4ASM) -a -o $@ -g 7.5 $< +endif CLEANFILES = $(INTEL_PP_GEN7_ASM) diff --git a/src/shaders/post_processing/gen7/pa_to_pa.asm b/src/shaders/post_processing/gen7/pa_to_pa.asm new file mode 100644 index 0000000..62f14bd --- /dev/null +++ b/src/shaders/post_processing/gen7/pa_to_pa.asm @@ -0,0 +1,17 @@ +// Module name: AVS +.kernel PA_TO_PL3 +.code + +#include "VP_Setup.g4a" +#include "Set_Layer_0.g4a" +#include "Set_AVS_Buf_0123_VYUA.g4a" +#include "PA_AVS_Buf_0.g4a" +#include "PA_AVS_Buf_1.g4a" +#include "PA_AVS_Buf_2.g4a" +#include "PA_AVS_Buf_3.g4a" +#include "Save_AVS_PA.g4a" +#include "EOT.g4a" + +.end_code + +.end_kernel diff --git a/src/shaders/post_processing/gen7/pa_to_pa.g75b b/src/shaders/post_processing/gen7/pa_to_pa.g75b new file mode 100644 index 0000000..0ccd59e --- /dev/null +++ b/src/shaders/post_processing/gen7/pa_to_pa.g75b @@ -0,0 +1,677 @@ + { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 }, + { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 }, + { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 }, + { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 }, + { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 }, + { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 }, + { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 }, + { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 }, + { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 }, + { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 }, + { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 }, + { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 }, + { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 }, + { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 }, + { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 }, + { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 }, + { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 }, + { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 }, + { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 }, + { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 }, + { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 }, + { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 }, + { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 }, + { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 }, + { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 }, + { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 }, + { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 }, + { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 }, + { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 }, + { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 }, + { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 }, + { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 }, + { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 }, + { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 }, + { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 }, + { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 }, + { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 }, + { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 }, + { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 }, + { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 }, + { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 }, + { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 }, + { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 }, + { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 }, + { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 }, + { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 }, + { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 }, + { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 }, + { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 }, + { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 }, + { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 }, + { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 }, + { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 }, + { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 }, + { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 }, + { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 }, + { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 }, + { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 }, + { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 }, + { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 }, + { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 }, + { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 }, + { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 }, + { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 }, + { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 }, + { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 }, + { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 }, + { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 }, + { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 }, + { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 }, + { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 }, + { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 }, + { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 }, + { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 }, + { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 }, + { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 }, + { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 }, + { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 }, + { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 }, + { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 }, + { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 }, + { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 }, + { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 }, + { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 }, + { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 }, + { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 }, + { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 }, + { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 }, + { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 }, + { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 }, + { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 }, + { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff }, + { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 }, + { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 }, + { 0x00110220, 0x34001c00, 0x02001400, 0x000000d0 }, + { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 }, + { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 }, + { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 }, + { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 }, + { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 }, + { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 }, + { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 }, + { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 }, + { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 }, + { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 }, + { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 }, + { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x2400036c, 0x00000000, 0x000062ea }, + { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 }, + { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 }, + { 0x00400001, 0x2400036c, 0x00000000, 0x00006420 }, + { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 }, + { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 }, + { 0x00000801, 0x22500061, 0x00000000, 0x01000100 }, + { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 }, + { 0x00110220, 0x34001c00, 0x02001400, 0x00000240 }, + { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 }, + { 0x00000001, 0x22080061, 0x00000000, 0x00000000 }, + { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 }, + { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 }, + { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 }, + { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 }, + { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 }, + { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 }, + { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 }, + { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 }, + { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 }, + { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 }, + { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 }, + { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 }, + { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 }, + { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 }, + { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 }, + { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 }, + { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 }, + { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 }, + { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 }, + { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 }, + { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 }, + { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 }, + { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 }, + { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 }, + { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff }, + { 0x00000001, 0x26000168, 0x00000000, 0x00000000 }, + { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c }, + { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 }, + { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 }, + { 0x02000031, 0x28000229, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00110220, 0x34001c00, 0x02001400, 0x00000230 }, + { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 }, + { 0x00000001, 0x22080061, 0x00000000, 0x00000000 }, + { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 }, + { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 }, + { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 }, + { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 }, + { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 }, + { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 }, + { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 }, + { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 }, + { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 }, + { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 }, + { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 }, + { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 }, + { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 }, + { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 }, + { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 }, + { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 }, + { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 }, + { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 }, + { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 }, + { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 }, + { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 }, + { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 }, + { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 }, + { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff }, + { 0x00000001, 0x26000168, 0x00000000, 0x00000000 }, + { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c }, + { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 }, + { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 }, + { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00110220, 0x34001c00, 0x02001400, 0x00000230 }, + { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 }, + { 0x00000001, 0x22080061, 0x00000000, 0x00000000 }, + { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 }, + { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 }, + { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 }, + { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 }, + { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 }, + { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 }, + { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 }, + { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 }, + { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 }, + { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 }, + { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 }, + { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 }, + { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 }, + { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 }, + { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 }, + { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 }, + { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 }, + { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 }, + { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 }, + { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 }, + { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 }, + { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 }, + { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 }, + { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff }, + { 0x00000001, 0x26000168, 0x00000000, 0x00000000 }, + { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c }, + { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 }, + { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 }, + { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00110220, 0x34001c00, 0x02001400, 0x00000230 }, + { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 }, + { 0x00000001, 0x22080061, 0x00000000, 0x00000000 }, + { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 }, + { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 }, + { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 }, + { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 }, + { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 }, + { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 }, + { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 }, + { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 }, + { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 }, + { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 }, + { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 }, + { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 }, + { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 }, + { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 }, + { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 }, + { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 }, + { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 }, + { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 }, + { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 }, + { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 }, + { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 }, + { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 }, + { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 }, + { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff }, + { 0x00000001, 0x26000168, 0x00000000, 0x00000000 }, + { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c }, + { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 }, + { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 }, + { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 }, + { 0x00400040, 0x22082e2c, 0x0069005c, 0x03a003a0 }, + { 0x00000409, 0x23603da5, 0x000000e0, 0x00010001 }, + { 0x00000c01, 0x236401a5, 0x000000e2, 0x00000000 }, + { 0x00000801, 0x23680061, 0x00000000, 0x0001001f }, + { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 }, + { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 }, + { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 }, + { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 }, + { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 }, + { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 }, + { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 }, + { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 }, + { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 }, + { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 }, + { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 }, + { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 }, + { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 }, + { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 }, + { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 }, + { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 }, + { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 }, + { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 }, + { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 }, + { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 }, + { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 }, + { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 }, + { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 }, + { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x02000200 }, + { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 }, + { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 }, + { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 }, + { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 }, + { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 }, + { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 }, + { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 }, + { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 }, + { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 }, + { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 }, + { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 }, + { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 }, + { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 }, + { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 }, + { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 }, + { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 }, + { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 }, + { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 }, + { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 }, + { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 }, + { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 }, + { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 }, + { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 }, + { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 }, + { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 }, + { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 }, + { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 }, + { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 }, + { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 }, + { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 }, + { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 }, + { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 }, + { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 }, + { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 }, + { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 }, + { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 }, + { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 }, + { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 }, + { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 }, + { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 }, + { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 }, + { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 }, + { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 }, + { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 }, + { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 }, + { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 }, + { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 }, + { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x06000600 }, + { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 }, + { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 }, + { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 }, + { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 }, + { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 }, + { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 }, + { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 }, + { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 }, + { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 }, + { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 }, + { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 }, + { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 }, + { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 }, + { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 }, + { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 }, + { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 }, + { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 }, + { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 }, + { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 }, + { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 }, + { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 }, + { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 }, + { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 }, + { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x08000800 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 }, + { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 }, + { 0x00000040, 0x24a41ca5, 0x00000364, 0x00000002 }, + { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 }, + { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 }, + { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 }, + { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 }, + { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 }, + { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 }, + { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 }, + { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 }, + { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 }, + { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 }, + { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 }, + { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 }, + { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x02000200 }, + { 0x00000040, 0x23841ca5, 0x00000364, 0x00000004 }, + { 0x00000040, 0x24a41ca5, 0x00000364, 0x00000006 }, + { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 }, + { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 }, + { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 }, + { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 }, + { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 }, + { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 }, + { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 }, + { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 }, + { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 }, + { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 }, + { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 }, + { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 }, + { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 }, + { 0x00000040, 0x23841ca5, 0x00000364, 0x00000008 }, + { 0x00000040, 0x24a41ca5, 0x00000364, 0x0000000a }, + { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 }, + { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 }, + { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 }, + { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 }, + { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 }, + { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 }, + { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 }, + { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 }, + { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 }, + { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 }, + { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 }, + { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 }, + { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x06000600 }, + { 0x00000040, 0x23841ca5, 0x00000364, 0x0000000c }, + { 0x00000040, 0x24a41ca5, 0x00000364, 0x0000000e }, + { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 }, + { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 }, + { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 }, + { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 }, + { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 }, + { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 }, + { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 }, + { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 }, + { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 }, + { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 }, + { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 }, + { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 }, + { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 }, + { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 }, diff --git a/src/shaders/post_processing/gen7/pa_to_pa.g7b b/src/shaders/post_processing/gen7/pa_to_pa.g7b new file mode 100644 index 0000000..20728b5 --- /dev/null +++ b/src/shaders/post_processing/gen7/pa_to_pa.g7b @@ -0,0 +1,677 @@ + { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 }, + { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 }, + { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 }, + { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 }, + { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 }, + { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 }, + { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 }, + { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 }, + { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 }, + { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 }, + { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 }, + { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 }, + { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 }, + { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 }, + { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 }, + { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 }, + { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 }, + { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 }, + { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 }, + { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 }, + { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 }, + { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 }, + { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 }, + { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 }, + { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 }, + { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 }, + { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 }, + { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 }, + { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 }, + { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 }, + { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 }, + { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 }, + { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 }, + { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 }, + { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 }, + { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 }, + { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 }, + { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 }, + { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 }, + { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 }, + { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 }, + { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 }, + { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 }, + { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 }, + { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 }, + { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 }, + { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 }, + { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 }, + { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 }, + { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 }, + { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 }, + { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 }, + { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 }, + { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 }, + { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 }, + { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 }, + { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 }, + { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 }, + { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 }, + { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 }, + { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 }, + { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 }, + { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 }, + { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 }, + { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 }, + { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 }, + { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 }, + { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 }, + { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 }, + { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 }, + { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 }, + { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 }, + { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 }, + { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 }, + { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 }, + { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 }, + { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 }, + { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 }, + { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 }, + { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e }, + { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e }, + { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 }, + { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 }, + { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 }, + { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 }, + { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 }, + { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 }, + { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 }, + { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 }, + { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 }, + { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 }, + { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 }, + { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 }, + { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 }, + { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 }, + { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e }, + { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 }, + { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 }, + { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 }, + { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 }, + { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 }, + { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 }, + { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff }, + { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 }, + { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 }, + { 0x00110220, 0x34001c00, 0x02001400, 0x0000001a }, + { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 }, + { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 }, + { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 }, + { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 }, + { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 }, + { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 }, + { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 }, + { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 }, + { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 }, + { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 }, + { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 }, + { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x2400036c, 0x00000000, 0x000062ea }, + { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 }, + { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 }, + { 0x00400001, 0x2400036c, 0x00000000, 0x00006420 }, + { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 }, + { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 }, + { 0x00000801, 0x22500061, 0x00000000, 0x01000100 }, + { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 }, + { 0x00110220, 0x34001c00, 0x02001400, 0x00000048 }, + { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 }, + { 0x00000001, 0x22080061, 0x00000000, 0x00000000 }, + { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 }, + { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 }, + { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 }, + { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 }, + { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 }, + { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 }, + { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 }, + { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 }, + { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 }, + { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 }, + { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 }, + { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 }, + { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 }, + { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 }, + { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 }, + { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 }, + { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 }, + { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 }, + { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 }, + { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 }, + { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 }, + { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 }, + { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 }, + { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 }, + { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff }, + { 0x00000001, 0x26000168, 0x00000000, 0x00000000 }, + { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c }, + { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 }, + { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 }, + { 0x02000031, 0x28000229, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00110220, 0x34001c00, 0x02001400, 0x00000046 }, + { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 }, + { 0x00000001, 0x22080061, 0x00000000, 0x00000000 }, + { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 }, + { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 }, + { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 }, + { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 }, + { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 }, + { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 }, + { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 }, + { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 }, + { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 }, + { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 }, + { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 }, + { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 }, + { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 }, + { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 }, + { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 }, + { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 }, + { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 }, + { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 }, + { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 }, + { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 }, + { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 }, + { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 }, + { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 }, + { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff }, + { 0x00000001, 0x26000168, 0x00000000, 0x00000000 }, + { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c }, + { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 }, + { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 }, + { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00110220, 0x34001c00, 0x02001400, 0x00000046 }, + { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 }, + { 0x00000001, 0x22080061, 0x00000000, 0x00000000 }, + { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 }, + { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 }, + { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 }, + { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 }, + { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 }, + { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 }, + { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 }, + { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 }, + { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 }, + { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 }, + { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 }, + { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 }, + { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 }, + { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 }, + { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 }, + { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 }, + { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 }, + { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 }, + { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 }, + { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 }, + { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 }, + { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 }, + { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 }, + { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff }, + { 0x00000001, 0x26000168, 0x00000000, 0x00000000 }, + { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c }, + { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 }, + { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 }, + { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00110220, 0x34001c00, 0x02001400, 0x00000046 }, + { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 }, + { 0x00000001, 0x22080061, 0x00000000, 0x00000000 }, + { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 }, + { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 }, + { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 }, + { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 }, + { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 }, + { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 }, + { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 }, + { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 }, + { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 }, + { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 }, + { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 }, + { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 }, + { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 }, + { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 }, + { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 }, + { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 }, + { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 }, + { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 }, + { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 }, + { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 }, + { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 }, + { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff }, + { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 }, + { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 }, + { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff }, + { 0x00000001, 0x26000168, 0x00000000, 0x00000000 }, + { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c }, + { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 }, + { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 }, + { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 }, + { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 }, + { 0x00400040, 0x22082e2c, 0x0069005c, 0x03a003a0 }, + { 0x00000409, 0x23603da5, 0x000000e0, 0x00010001 }, + { 0x00000c01, 0x236401a5, 0x000000e2, 0x00000000 }, + { 0x00000801, 0x23680061, 0x00000000, 0x0001001f }, + { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 }, + { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 }, + { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 }, + { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 }, + { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 }, + { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 }, + { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 }, + { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 }, + { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 }, + { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 }, + { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 }, + { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 }, + { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 }, + { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 }, + { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 }, + { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 }, + { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 }, + { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 }, + { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 }, + { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 }, + { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 }, + { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 }, + { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 }, + { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x02000200 }, + { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 }, + { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 }, + { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 }, + { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 }, + { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 }, + { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 }, + { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 }, + { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 }, + { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 }, + { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 }, + { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 }, + { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 }, + { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 }, + { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 }, + { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 }, + { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 }, + { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 }, + { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 }, + { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 }, + { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 }, + { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 }, + { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 }, + { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 }, + { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 }, + { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 }, + { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 }, + { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 }, + { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 }, + { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 }, + { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 }, + { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 }, + { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 }, + { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 }, + { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 }, + { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 }, + { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 }, + { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 }, + { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 }, + { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 }, + { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 }, + { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 }, + { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 }, + { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 }, + { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 }, + { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 }, + { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 }, + { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 }, + { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x06000600 }, + { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 }, + { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 }, + { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 }, + { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 }, + { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 }, + { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 }, + { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 }, + { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 }, + { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 }, + { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 }, + { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 }, + { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 }, + { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 }, + { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 }, + { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 }, + { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 }, + { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 }, + { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 }, + { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 }, + { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 }, + { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 }, + { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 }, + { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 }, + { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x08000800 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 }, + { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 }, + { 0x00000040, 0x24a41ca5, 0x00000364, 0x00000002 }, + { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 }, + { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 }, + { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 }, + { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 }, + { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 }, + { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 }, + { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 }, + { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 }, + { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 }, + { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 }, + { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 }, + { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 }, + { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x02000200 }, + { 0x00000040, 0x23841ca5, 0x00000364, 0x00000004 }, + { 0x00000040, 0x24a41ca5, 0x00000364, 0x00000006 }, + { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 }, + { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 }, + { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 }, + { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 }, + { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 }, + { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 }, + { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 }, + { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 }, + { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 }, + { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 }, + { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 }, + { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 }, + { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 }, + { 0x00000040, 0x23841ca5, 0x00000364, 0x00000008 }, + { 0x00000040, 0x24a41ca5, 0x00000364, 0x0000000a }, + { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 }, + { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 }, + { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 }, + { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 }, + { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 }, + { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 }, + { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 }, + { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 }, + { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 }, + { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 }, + { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 }, + { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 }, + { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 }, + { 0x00400040, 0x22002da8, 0x006902c0, 0x06000600 }, + { 0x00000040, 0x23841ca5, 0x00000364, 0x0000000c }, + { 0x00000040, 0x24a41ca5, 0x00000364, 0x0000000e }, + { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 }, + { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 }, + { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 }, + { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 }, + { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 }, + { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 }, + { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 }, + { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 }, + { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 }, + { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 }, + { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 }, + { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 }, + { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 }, + { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 }, diff --git a/src/shaders/post_processing/gen75/Makefile.am b/src/shaders/post_processing/gen75/Makefile.am new file mode 100644 index 0000000..cdad1d9 --- /dev/null +++ b/src/shaders/post_processing/gen75/Makefile.am @@ -0,0 +1,9 @@ +INTEL_PP_PRE_G75B = \ + sharpening_h_blur.g75b \ + sharpening_unmask.g75b \ + sharpening_v_blur.g75b + +EXTRA_DIST = $(INTEL_PP_PRE_G75B) + +# Extra clean files so that maintainer-clean removes *everything* +MAINTAINERCLEANFILES = Makefile.in diff --git a/src/shaders/post_processing/gen8/EOT.g8a b/src/shaders/post_processing/gen8/EOT.g8a new file mode 100644 index 0000000..72c3da3 --- /dev/null +++ b/src/shaders/post_processing/gen8/EOT.g8a @@ -0,0 +1,166 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 2 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +//End of Thread message + +mov (8) r127<1>:ud r0.0<8;8,1>:ud + send (1) null<1>:d r127 0x27 0x02000010 diff --git a/src/shaders/post_processing/gen8/Makefile.am b/src/shaders/post_processing/gen8/Makefile.am new file mode 100644 index 0000000..54533fc --- /dev/null +++ b/src/shaders/post_processing/gen8/Makefile.am @@ -0,0 +1,79 @@ +INTEL_PP_G8B = \ + pl2_to_pl2.g8b \ + pl2_to_pl3.g8b \ + pl3_to_pl2.g8b \ + pl3_to_pl3.g8b \ + pl2_to_rgbx.g8b \ + rgbx_to_nv12.g8b \ + pl2_to_pa.g8b \ + pl3_to_pa.g8b \ + pa_to_pl2.g8b \ + pa_to_pl3.g8b \ + pa_to_pa.g8b \ + $(NULL) + +INTEL_PP_PRE_G8B = \ + sharpening_h_blur.g8b \ + sharpening_unmask.g8b \ + sharpening_v_blur.g8b + +INTEL_PP_G8A = \ + EOT.g8a \ + PL2_AVS_Buf_0.g8a \ + PL2_AVS_Buf_1.g8a \ + PL2_AVS_Buf_2.g8a \ + PL2_AVS_Buf_3.g8a \ + PL3_AVS_Buf_0.g8a \ + PL3_AVS_Buf_1.g8a \ + PL3_AVS_Buf_2.g8a \ + PL3_AVS_Buf_3.g8a \ + PA_AVS_Buf_0.g8a \ + PA_AVS_Buf_1.g8a \ + PA_AVS_Buf_2.g8a \ + PA_AVS_Buf_3.g8a \ + Save_AVS_NV12.g8a \ + Save_AVS_PL3.g8a \ + Save_AVS_RGBX.g8a \ + Save_AVS_PA.g8a \ + Set_AVS_Buf_0123_PL2.g8a \ + Set_AVS_Buf_0123_PL3.g8a \ + Set_AVS_Buf_0123_BGRA.g8a \ + Set_AVS_Buf_0123_VYUA.g8a \ + YUV_to_RGB.g8a \ + RGB_to_YUV.g8a \ + Set_Layer_0.g8a \ + VP_Setup.g8a \ + $(NULL) + +INTEL_PP_ASM = $(INTEL_PP_G8B:%.g8b=%.asm) +INTEL_PP_GEN8_ASM = $(INTEL_PP_G8B:%.g8b=%.g8s) + +TARGETS = +if HAVE_GEN4ASM +TARGETS += $(INTEL_PP_G8B) +endif + +all-local: $(TARGETS) + +SUFFIXES = .g8b .g8s .asm + +if HAVE_GEN4ASM +$(INTEL_PP_GEN8_ASM): $(INTEL_PP_ASM) $(INTEL_PP_G8A) +.asm.g8s: + $(AM_V_GEN)cpp $< > _pp0.$@; \ + ../../gpp.py _pp0.$@ $@; \ + rm _pp0.$@ +.g8s.g8b: + $(AM_V_GEN)$(GEN4ASM) -a -o $@ -g 8 $< +endif + +CLEANFILES = $(INTEL_PP_GEN7_ASM) + +EXTRA_DIST = \ + $(INTEL_PP_ASM) \ + $(INTEL_PP_G8A) \ + $(INTEL_PP_G8B) \ + $(INTEL_PP_PRE_G8B) + +# Extra clean files so that maintainer-clean removes *everything* +MAINTAINERCLEANFILES = Makefile.in diff --git a/src/shaders/post_processing/gen8/PA_AVS_Buf_0.g8a b/src/shaders/post_processing/gen8/PA_AVS_Buf_0.g8a new file mode 100644 index 0000000..228b256 --- /dev/null +++ b/src/shaders/post_processing/gen8/PA_AVS_Buf_0.g8a @@ -0,0 +1,457 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 44 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// FileName: PL2_AVS_Buf_0.asm +// Author: Tatiya, Rupesh +// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0 + + + +// FileName : PL2_AVS_Buf.asm +// Author : Tatiya, Rupesh +// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N + + + +// Module name: Scaling.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + // Message Header + // m0.7 31:0 Debug + // m0.6 31:0 Debug + // m0.5 31:0 Ignored + // m0.4 31:0 Ignored + // m0.3 31:0 Ignored + // m0.2 31:16 Ignored + // 15 Alpha Write Channel Mask enable=0, disable=1 + // 14 Blue Write Channel Mask (U) + // 13 Green Write Channel Mask (Y) + // 12 Red Write Channel Mask (V) + // 11:0 Ignored + // m0.1 Ignored + // m0.0 Ignored + + + // AVS payload + // m1.7 Group ID Number + // m1.6 U 2nd Derivative ---> NLAS dx + // m1.5 Delta V ---> Step Y + // m1.4 Delta U ---> Step X + // m1.3 Pixel 0 V Address ---> ORIY (Y0) + // m1.2 Pixel 0 U Address ---> ORIX (X0) + // m1.1 Vertical Block Number + // m1.0 Reserved + + // Sampler Message Descriptor + // 31:29 Reserved 000 + // 28:25 Message length 0010 + // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm) + // 19 Header Present 1 + // 18:17 SIMD Mode 11 ---> SIMD32/64 + // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix) + // 11:8 Sampler Index xxxx + // 7:0 Binding Table Index xxxxxxxx + + + // Msg Header M0.2 + // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back + // 14:14 Blue Write Channel Mask + // 13:13 Green Write Channel Mask + // 12:12 Red Write Channel Mask + + +//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7 + + +//used to generate LABELS at compile time. + + + // 18:17 SIMD Mode 10 ---> SIMD16 + // 16:12 Message Type xxxxx ---> 00000 (SIMD16) + + +//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels) +//r18-19 - 2 GRFs to store sampler ramp. + + .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub + + + .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub + .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + + // Sampler ramp is used for Scaling 0X_0.34X + .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements + + + //#define rMSGDSC_UV r23.0 + + +//End of _SCALING_ + + + //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it. + //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0. + mov (1) r22.4<1>:ud 0x400040:ud + + + mov (1) r16.3<1>:ud r0.3<0;1,0>:ud + + + //AVS_PAYLOAD already has all the data loaded at this point + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x50EB000:ud //msg desc + + mov (1) r16.2<1>:ud 0x00000000:ud // Enable ARGB channels + + + + // set the vertical block number + + mov (1) r25.1<1>:ud 0:ud + + mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs + + send (1) uwBUFFER_0(0)<1> r16 0x2 a0.0:ud + // Returns RGBA data in 16 GRFs in scrambled order + +SKIP_AVS_LOAD_L0_0_: + nop + + diff --git a/src/shaders/post_processing/gen8/PA_AVS_Buf_1.g8a b/src/shaders/post_processing/gen8/PA_AVS_Buf_1.g8a new file mode 100644 index 0000000..c93806d --- /dev/null +++ b/src/shaders/post_processing/gen8/PA_AVS_Buf_1.g8a @@ -0,0 +1,457 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 44 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// FileName: PL2_AVS_Buf_0.asm +// Author: Tatiya, Rupesh +// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0 + + + +// FileName : PL2_AVS_Buf.asm +// Author : Tatiya, Rupesh +// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N + + + +// Module name: Scaling.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + // Message Header + // m0.7 31:0 Debug + // m0.6 31:0 Debug + // m0.5 31:0 Ignored + // m0.4 31:0 Ignored + // m0.3 31:0 Ignored + // m0.2 31:16 Ignored + // 15 Alpha Write Channel Mask enable=0, disable=1 + // 14 Blue Write Channel Mask (U) + // 13 Green Write Channel Mask (Y) + // 12 Red Write Channel Mask (V) + // 11:0 Ignored + // m0.1 Ignored + // m0.0 Ignored + + + // AVS payload + // m1.7 Group ID Number + // m1.6 U 2nd Derivative ---> NLAS dx + // m1.5 Delta V ---> Step Y + // m1.4 Delta U ---> Step X + // m1.3 Pixel 0 V Address ---> ORIY (Y0) + // m1.2 Pixel 0 U Address ---> ORIX (X0) + // m1.1 Vertical Block Number + // m1.0 Reserved + + // Sampler Message Descriptor + // 31:29 Reserved 000 + // 28:25 Message length 0010 + // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm) + // 19 Header Present 1 + // 18:17 SIMD Mode 11 ---> SIMD32/64 + // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix) + // 11:8 Sampler Index xxxx + // 7:0 Binding Table Index xxxxxxxx + + + // Msg Header M0.2 + // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back + // 14:14 Blue Write Channel Mask + // 13:13 Green Write Channel Mask + // 12:12 Red Write Channel Mask + + +//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7 + + +//used to generate LABELS at compile time. + + + // 18:17 SIMD Mode 10 ---> SIMD16 + // 16:12 Message Type xxxxx ---> 00000 (SIMD16) + + +//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels) +//r18-19 - 2 GRFs to store sampler ramp. + + .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub + + + .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub + .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + + // Sampler ramp is used for Scaling 0X_0.34X + .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements + + + //#define rMSGDSC_UV r23.0 + + +//End of _SCALING_ + + + //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it. + //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0. + mov (1) r22.4<1>:ud 0x400040:ud + + + mov (1) r16.3<1>:ud r0.3<0;1,0>:ud + + + //AVS_PAYLOAD already has all the data loaded at this point + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x50EB000:ud //msg desc + + mov (1) r16.2<1>:ud 0x00000000:ud // Enable ARGB channels + + + + // set the vertical block number + + mov (1) r25.1<1>:ud 1:ud + + mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs + + send (1) uwBUFFER_1(0)<1> r16 0x2 a0.0:ud + // Returns RGBA data in 16 GRFs in scrambled order + +SKIP_AVS_LOAD_L0_0_: + nop + + diff --git a/src/shaders/post_processing/gen8/PA_AVS_Buf_2.g8a b/src/shaders/post_processing/gen8/PA_AVS_Buf_2.g8a new file mode 100644 index 0000000..2cfc90c --- /dev/null +++ b/src/shaders/post_processing/gen8/PA_AVS_Buf_2.g8a @@ -0,0 +1,457 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 44 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// FileName: PL2_AVS_Buf_0.asm +// Author: Tatiya, Rupesh +// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0 + + + +// FileName : PL2_AVS_Buf.asm +// Author : Tatiya, Rupesh +// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N + + + +// Module name: Scaling.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + // Message Header + // m0.7 31:0 Debug + // m0.6 31:0 Debug + // m0.5 31:0 Ignored + // m0.4 31:0 Ignored + // m0.3 31:0 Ignored + // m0.2 31:16 Ignored + // 15 Alpha Write Channel Mask enable=0, disable=1 + // 14 Blue Write Channel Mask (U) + // 13 Green Write Channel Mask (Y) + // 12 Red Write Channel Mask (V) + // 11:0 Ignored + // m0.1 Ignored + // m0.0 Ignored + + + // AVS payload + // m1.7 Group ID Number + // m1.6 U 2nd Derivative ---> NLAS dx + // m1.5 Delta V ---> Step Y + // m1.4 Delta U ---> Step X + // m1.3 Pixel 0 V Address ---> ORIY (Y0) + // m1.2 Pixel 0 U Address ---> ORIX (X0) + // m1.1 Vertical Block Number + // m1.0 Reserved + + // Sampler Message Descriptor + // 31:29 Reserved 000 + // 28:25 Message length 0010 + // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm) + // 19 Header Present 1 + // 18:17 SIMD Mode 11 ---> SIMD32/64 + // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix) + // 11:8 Sampler Index xxxx + // 7:0 Binding Table Index xxxxxxxx + + + // Msg Header M0.2 + // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back + // 14:14 Blue Write Channel Mask + // 13:13 Green Write Channel Mask + // 12:12 Red Write Channel Mask + + +//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7 + + +//used to generate LABELS at compile time. + + + // 18:17 SIMD Mode 10 ---> SIMD16 + // 16:12 Message Type xxxxx ---> 00000 (SIMD16) + + +//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels) +//r18-19 - 2 GRFs to store sampler ramp. + + .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub + + + .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub + .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + + // Sampler ramp is used for Scaling 0X_0.34X + .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements + + + //#define rMSGDSC_UV r23.0 + + +//End of _SCALING_ + + + //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it. + //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0. + mov (1) r22.4<1>:ud 0x400040:ud + + + mov (1) r16.3<1>:ud r0.3<0;1,0>:ud + + + //AVS_PAYLOAD already has all the data loaded at this point + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x50EB000:ud //msg desc + + mov (1) r16.2<1>:ud 0x00000000:ud // Enable ARGB channels + + + + // set the vertical block number + + mov (1) r25.1<1>:ud 2:ud + + mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs + + send (1) uwBUFFER_2(0)<1> r16 0x2 a0.0:ud + // Returns RGBA data in 16 GRFs in scrambled order + +SKIP_AVS_LOAD_L0_0_: + nop + + diff --git a/src/shaders/post_processing/gen8/PA_AVS_Buf_3.g8a b/src/shaders/post_processing/gen8/PA_AVS_Buf_3.g8a new file mode 100644 index 0000000..0cbc4ba --- /dev/null +++ b/src/shaders/post_processing/gen8/PA_AVS_Buf_3.g8a @@ -0,0 +1,457 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 44 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// FileName: PL2_AVS_Buf_0.asm +// Author: Tatiya, Rupesh +// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0 + + + +// FileName : PL2_AVS_Buf.asm +// Author : Tatiya, Rupesh +// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N + + + +// Module name: Scaling.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + // Message Header + // m0.7 31:0 Debug + // m0.6 31:0 Debug + // m0.5 31:0 Ignored + // m0.4 31:0 Ignored + // m0.3 31:0 Ignored + // m0.2 31:16 Ignored + // 15 Alpha Write Channel Mask enable=0, disable=1 + // 14 Blue Write Channel Mask (U) + // 13 Green Write Channel Mask (Y) + // 12 Red Write Channel Mask (V) + // 11:0 Ignored + // m0.1 Ignored + // m0.0 Ignored + + + // AVS payload + // m1.7 Group ID Number + // m1.6 U 2nd Derivative ---> NLAS dx + // m1.5 Delta V ---> Step Y + // m1.4 Delta U ---> Step X + // m1.3 Pixel 0 V Address ---> ORIY (Y0) + // m1.2 Pixel 0 U Address ---> ORIX (X0) + // m1.1 Vertical Block Number + // m1.0 Reserved + + // Sampler Message Descriptor + // 31:29 Reserved 000 + // 28:25 Message length 0010 + // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm) + // 19 Header Present 1 + // 18:17 SIMD Mode 11 ---> SIMD32/64 + // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix) + // 11:8 Sampler Index xxxx + // 7:0 Binding Table Index xxxxxxxx + + + // Msg Header M0.2 + // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back + // 14:14 Blue Write Channel Mask + // 13:13 Green Write Channel Mask + // 12:12 Red Write Channel Mask + + +//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7 + + +//used to generate LABELS at compile time. + + + // 18:17 SIMD Mode 10 ---> SIMD16 + // 16:12 Message Type xxxxx ---> 00000 (SIMD16) + + +//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels) +//r18-19 - 2 GRFs to store sampler ramp. + + .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub + + + .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub + .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + + // Sampler ramp is used for Scaling 0X_0.34X + .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements + + + //#define rMSGDSC_UV r23.0 + + +//End of _SCALING_ + + + //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it. + //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0. + mov (1) r22.4<1>:ud 0x400040:ud + + + mov (1) r16.3<1>:ud r0.3<0;1,0>:ud + + + //AVS_PAYLOAD already has all the data loaded at this point + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x50EB000:ud //msg desc + + mov (1) r16.2<1>:ud 0x00000000:ud // Enable ARGB channels + + + + // set the vertical block number + + mov (1) r25.1<1>:ud 3:ud + + mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs + + send (1) uwBUFFER_3(0)<1> r16 0x2 a0.0:ud + // Returns RGBA data in 16 GRFs in scrambled order + +SKIP_AVS_LOAD_L0_0_: + nop + + diff --git a/src/shaders/post_processing/gen8/PL2_AVS_Buf_0.g8a b/src/shaders/post_processing/gen8/PL2_AVS_Buf_0.g8a new file mode 100644 index 0000000..bbff22c --- /dev/null +++ b/src/shaders/post_processing/gen8/PL2_AVS_Buf_0.g8a @@ -0,0 +1,462 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 44 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// FileName: PL2_AVS_Buf_0.asm +// Author: Tatiya, Rupesh +// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0 + + + +// FileName : PL2_AVS_Buf.asm +// Author : Tatiya, Rupesh +// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N + + + +// Module name: Scaling.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + // Message Header + // m0.7 31:0 Debug + // m0.6 31:0 Debug + // m0.5 31:0 Ignored + // m0.4 31:0 Ignored + // m0.3 31:0 Ignored + // m0.2 31:16 Ignored + // 15 Alpha Write Channel Mask enable=0, disable=1 + // 14 Blue Write Channel Mask (U) + // 13 Green Write Channel Mask (Y) + // 12 Red Write Channel Mask (V) + // 11:0 Ignored + // m0.1 Ignored + // m0.0 Ignored + + + // AVS payload + // m1.7 Group ID Number + // m1.6 U 2nd Derivative ---> NLAS dx + // m1.5 Delta V ---> Step Y + // m1.4 Delta U ---> Step X + // m1.3 Pixel 0 V Address ---> ORIY (Y0) + // m1.2 Pixel 0 U Address ---> ORIX (X0) + // m1.1 Vertical Block Number + // m1.0 Reserved + + // Sampler Message Descriptor + // 31:29 Reserved 000 + // 28:25 Message length 0010 + // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm) + // 19 Header Present 1 + // 18:17 SIMD Mode 11 ---> SIMD32/64 + // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix) + // 11:8 Sampler Index xxxx + // 7:0 Binding Table Index xxxxxxxx + + + // Msg Header M0.2 + // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back + // 14:14 Blue Write Channel Mask + // 13:13 Green Write Channel Mask + // 12:12 Red Write Channel Mask + + +//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7 + + +//used to generate LABELS at compile time. + + + // 18:17 SIMD Mode 10 ---> SIMD16 + // 16:12 Message Type xxxxx ---> 00000 (SIMD16) + + +//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels) +//r18-19 - 2 GRFs to store sampler ramp. + + .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub + + + .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub + .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + + // Sampler ramp is used for Scaling 0X_0.34X + .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements + + + //#define rMSGDSC_UV r23.0 + + +//End of _SCALING_ + + + //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it. + //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0. + mov (1) r22.4<1>:ud 0x400040:ud + + + mov (1) r16.3<1>:ud r0.3<0;1,0>:ud + + + //AVS_PAYLOAD already has all the data loaded at this point + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB000:ud //msg desc + + mov (1) r16.2<1>:ud 0x0000D000:ud // Enable Red channel + + + + // set the vertical block number + + + mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs + + send (1) uwBUFFER_0(0)<1> r16 0x2 a0.0:ud + // Returns Y data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x48EB001:ud // msg desc; 1 is added to change BI to UV + mov (1) r16.2<1>:ud 0x0000A000:ud // Enable Red+Blue channel + + send (1) uwBUFFER_0(4)<1> r16 0x2 a0.0:ud + // Returns UV data in 8 GRFs in scrambled order + +SKIP_AVS_LOAD_L0_0_: + nop + + diff --git a/src/shaders/post_processing/gen8/PL2_AVS_Buf_1.g8a b/src/shaders/post_processing/gen8/PL2_AVS_Buf_1.g8a new file mode 100644 index 0000000..e916576 --- /dev/null +++ b/src/shaders/post_processing/gen8/PL2_AVS_Buf_1.g8a @@ -0,0 +1,458 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 42 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// FileName: PL2_AVS_Buf_1.asm +// Author: Tatiya, Rupesh +// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 1 + + + +// FileName : PL2_AVS_Buf.asm +// Author : Tatiya, Rupesh +// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N + + + +// Module name: Scaling.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + // Message Header + // m0.7 31:0 Debug + // m0.6 31:0 Debug + // m0.5 31:0 Ignored + // m0.4 31:0 Ignored + // m0.3 31:0 Ignored + // m0.2 31:16 Ignored + // 15 Alpha Write Channel Mask enable=0, disable=1 + // 14 Blue Write Channel Mask (U) + // 13 Green Write Channel Mask (Y) + // 12 Red Write Channel Mask (V) + // 11:0 Ignored + // m0.1 Ignored + // m0.0 Ignored + + + // AVS payload + // m1.7 Group ID Number + // m1.6 U 2nd Derivative ---> NLAS dx + // m1.5 Delta V ---> Step Y + // m1.4 Delta U ---> Step X + // m1.3 Pixel 0 V Address ---> ORIY (Y0) + // m1.2 Pixel 0 U Address ---> ORIX (X0) + // m1.1 Vertical Block Number + // m1.0 Reserved + + // Sampler Message Descriptor + // 31:29 Reserved 000 + // 28:25 Message length 0010 + // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm) + // 19 Header Present 1 + // 18:17 SIMD Mode 11 ---> SIMD32/64 + // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix) + // 11:8 Sampler Index xxxx + // 7:0 Binding Table Index xxxxxxxx + + + // Msg Header M0.2 + // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back + // 14:14 Blue Write Channel Mask + // 13:13 Green Write Channel Mask + // 12:12 Red Write Channel Mask + + +//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7 + + +//used to generate LABELS at compile time. + + + // 18:17 SIMD Mode 10 ---> SIMD16 + // 16:12 Message Type xxxxx ---> 00000 (SIMD16) + + +//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels) +//r18-19 - 2 GRFs to store sampler ramp. + + .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub + + + .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub + .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + + // Sampler ramp is used for Scaling 0X_0.34X + .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements + + + //#define rMSGDSC_UV r23.0 + + +//End of _SCALING_ + + + mov (1) r16.3<1>:ud r0.3<0;1,0>:ud + + + //AVS_PAYLOAD already has all the data loaded at this point + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB000:ud //msg desc + + mov (1) r16.2<1>:ud 0x0000D000:ud // Enable Red channel + + + // set the vertical block number + + mov (1) r25.1<1>:ud 1:ud + + + mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs + + send (1) uwBUFFER_1(0)<1> r16 0x2 a0.0:ud + // Returns Y data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x48EB001:ud // msg desc; 1 is added to change BI to UV + mov (1) r16.2<1>:ud 0x0000A000:ud // Enable Red+Blue channel + + send (1) uwBUFFER_1(4)<1> r16 0x2 a0.0:ud + // Returns UV data in 8 GRFs in scrambled order + +SKIP_AVS_LOAD_L0_1_: + nop + + diff --git a/src/shaders/post_processing/gen8/PL2_AVS_Buf_2.g8a b/src/shaders/post_processing/gen8/PL2_AVS_Buf_2.g8a new file mode 100644 index 0000000..ed51a19 --- /dev/null +++ b/src/shaders/post_processing/gen8/PL2_AVS_Buf_2.g8a @@ -0,0 +1,458 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 42 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// FileName: PL2_AVS_Buf_2.asm +// Author: Tatiya, Rupesh +// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 2 + + + +// FileName : PL2_AVS_Buf.asm +// Author : Tatiya, Rupesh +// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N + + + +// Module name: Scaling.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + // Message Header + // m0.7 31:0 Debug + // m0.6 31:0 Debug + // m0.5 31:0 Ignored + // m0.4 31:0 Ignored + // m0.3 31:0 Ignored + // m0.2 31:16 Ignored + // 15 Alpha Write Channel Mask enable=0, disable=1 + // 14 Blue Write Channel Mask (U) + // 13 Green Write Channel Mask (Y) + // 12 Red Write Channel Mask (V) + // 11:0 Ignored + // m0.1 Ignored + // m0.0 Ignored + + + // AVS payload + // m1.7 Group ID Number + // m1.6 U 2nd Derivative ---> NLAS dx + // m1.5 Delta V ---> Step Y + // m1.4 Delta U ---> Step X + // m1.3 Pixel 0 V Address ---> ORIY (Y0) + // m1.2 Pixel 0 U Address ---> ORIX (X0) + // m1.1 Vertical Block Number + // m1.0 Reserved + + // Sampler Message Descriptor + // 31:29 Reserved 000 + // 28:25 Message length 0010 + // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm) + // 19 Header Present 1 + // 18:17 SIMD Mode 11 ---> SIMD32/64 + // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix) + // 11:8 Sampler Index xxxx + // 7:0 Binding Table Index xxxxxxxx + + + // Msg Header M0.2 + // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back + // 14:14 Blue Write Channel Mask + // 13:13 Green Write Channel Mask + // 12:12 Red Write Channel Mask + + +//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7 + + +//used to generate LABELS at compile time. + + + // 18:17 SIMD Mode 10 ---> SIMD16 + // 16:12 Message Type xxxxx ---> 00000 (SIMD16) + + +//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels) +//r18-19 - 2 GRFs to store sampler ramp. + + .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub + + + .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub + .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + + // Sampler ramp is used for Scaling 0X_0.34X + .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements + + + //#define rMSGDSC_UV r23.0 + + +//End of _SCALING_ + + + mov (1) r16.3<1>:ud r0.3<0;1,0>:ud + + //AVS_PAYLOAD already has all the data loaded at this point + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB000:ud //msg desc + + mov (1) r16.2<1>:ud 0x0000D000:ud // Enable Red channel + + + // set the vertical block number + + + mov (1) r25.1<1>:ud 2:ud + + + mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs + + send (1) uwBUFFER_2(0)<1> r16 0x2 a0.0:ud + // Returns Y data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x48EB001:ud // msg desc; 1 is added to change BI to UV + mov (1) r16.2<1>:ud 0x0000A000:ud // Enable Red+Blue channel + + send (1) uwBUFFER_2(4)<1> r16 0x2 a0.0:ud + // Returns UV data in 8 GRFs in scrambled order + +SKIP_AVS_LOAD_L0_2_: + nop + + diff --git a/src/shaders/post_processing/gen8/PL2_AVS_Buf_3.g8a b/src/shaders/post_processing/gen8/PL2_AVS_Buf_3.g8a new file mode 100644 index 0000000..5b46bf7 --- /dev/null +++ b/src/shaders/post_processing/gen8/PL2_AVS_Buf_3.g8a @@ -0,0 +1,460 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 42 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// FileName: PL2_AVS_Buf_3.asm +// Author: Tatiya, Rupesh +// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0 + + + +// FileName : PL2_AVS_Buf.asm +// Author : Tatiya, Rupesh +// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N + + + +// Module name: Scaling.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + // Message Header + // m0.7 31:0 Debug + // m0.6 31:0 Debug + // m0.5 31:0 Ignored + // m0.4 31:0 Ignored + // m0.3 31:0 Ignored + // m0.2 31:16 Ignored + // 15 Alpha Write Channel Mask enable=0, disable=1 + // 14 Blue Write Channel Mask (U) + // 13 Green Write Channel Mask (Y) + // 12 Red Write Channel Mask (V) + // 11:0 Ignored + // m0.1 Ignored + // m0.0 Ignored + + + // AVS payload + // m1.7 Group ID Number + // m1.6 U 2nd Derivative ---> NLAS dx + // m1.5 Delta V ---> Step Y + // m1.4 Delta U ---> Step X + // m1.3 Pixel 0 V Address ---> ORIY (Y0) + // m1.2 Pixel 0 U Address ---> ORIX (X0) + // m1.1 Vertical Block Number + // m1.0 Reserved + + // Sampler Message Descriptor + // 31:29 Reserved 000 + // 28:25 Message length 0010 + // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm) + // 19 Header Present 1 + // 18:17 SIMD Mode 11 ---> SIMD32/64 + // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix) + // 11:8 Sampler Index xxxx + // 7:0 Binding Table Index xxxxxxxx + + + // Msg Header M0.2 + // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back + // 14:14 Blue Write Channel Mask + // 13:13 Green Write Channel Mask + // 12:12 Red Write Channel Mask + + +//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7 + + +//used to generate LABELS at compile time. + + + // 18:17 SIMD Mode 10 ---> SIMD16 + // 16:12 Message Type xxxxx ---> 00000 (SIMD16) + + +//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels) +//r18-19 - 2 GRFs to store sampler ramp. + + .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub + + + .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub + .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + + // Sampler ramp is used for Scaling 0X_0.34X + .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements + + + //#define rMSGDSC_UV r23.0 + + +//End of _SCALING_ + + + + mov (1) r16.3<1>:ud r0.3<0;1,0>:ud + + + //AVS_PAYLOAD already has all the data loaded at this point + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB000:ud //msg desc + + mov (1) r16.2<1>:ud 0x0000D000:ud // Enable Red channel + + + // set the vertical block number + + + mov (1) r25.1<1>:ud 3:ud + + + mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs + + send (1) uwBUFFER_3(0)<1> r16 0x2 a0.0:ud + // Returns Y data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x48EB001:ud // msg desc; 1 is added to change BI to UV + mov (1) r16.2<1>:ud 0x0000A000:ud // Enable Red+Blue channel + + send (1) uwBUFFER_3(4)<1> r16 0x2 a0.0:ud + // Returns UV data in 8 GRFs in scrambled order + +SKIP_AVS_LOAD_L0_3_: + nop + + diff --git a/src/shaders/post_processing/gen8/PL3_AVS_Buf_0.g8a b/src/shaders/post_processing/gen8/PL3_AVS_Buf_0.g8a new file mode 100644 index 0000000..b5b85d5 --- /dev/null +++ b/src/shaders/post_processing/gen8/PL3_AVS_Buf_0.g8a @@ -0,0 +1,470 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 44 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// FileName: PL2_AVS_Buf_0.asm +// Author: Tatiya, Rupesh +// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0 + + + +// FileName : PL2_AVS_Buf.asm +// Author : Tatiya, Rupesh +// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N + + + +// Module name: Scaling.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + // Message Header + // m0.7 31:0 Debug + // m0.6 31:0 Debug + // m0.5 31:0 Ignored + // m0.4 31:0 Ignored + // m0.3 31:0 Ignored + // m0.2 31:16 Ignored + // 15 Alpha Write Channel Mask enable=0, disable=1 + // 14 Blue Write Channel Mask (U) + // 13 Green Write Channel Mask (Y) + // 12 Red Write Channel Mask (V) + // 11:0 Ignored + // m0.1 Ignored + // m0.0 Ignored + + + // AVS payload + // m1.7 Group ID Number + // m1.6 U 2nd Derivative ---> NLAS dx + // m1.5 Delta V ---> Step Y + // m1.4 Delta U ---> Step X + // m1.3 Pixel 0 V Address ---> ORIY (Y0) + // m1.2 Pixel 0 U Address ---> ORIX (X0) + // m1.1 Vertical Block Number + // m1.0 Reserved + + // Sampler Message Descriptor + // 31:29 Reserved 000 + // 28:25 Message length 0010 + // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm) + // 19 Header Present 1 + // 18:17 SIMD Mode 11 ---> SIMD32/64 + // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix) + // 11:8 Sampler Index xxxx + // 7:0 Binding Table Index xxxxxxxx + + + // Msg Header M0.2 + // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back + // 14:14 Blue Write Channel Mask + // 13:13 Green Write Channel Mask + // 12:12 Red Write Channel Mask + + +//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7 + + +//used to generate LABELS at compile time. + + + // 18:17 SIMD Mode 10 ---> SIMD16 + // 16:12 Message Type xxxxx ---> 00000 (SIMD16) + + +//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels) +//r18-19 - 2 GRFs to store sampler ramp. + + .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub + + + .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub + .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + + // Sampler ramp is used for Scaling 0X_0.34X + .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements + + + //#define rMSGDSC_UV r23.0 + + +//End of _SCALING_ + + + //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it. + //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0. + mov (1) r22.4<1>:ud 0x400040:ud + + + mov (1) r16.3<1>:ud r0.3<0;1,0>:ud + + + //AVS_PAYLOAD already has all the data loaded at this point + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB000:ud //msg desc + + mov (1) r16.2<1>:ud 0x0000D000:ud // Enable Red channel + + + + // set the vertical block number + + mov (1) r25.1<1>:ud 0:ud + + mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs + + send (1) uwBUFFER_0(0)<1> r16 0x2 a0.0:ud + // Returns Y data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB001:ud // msg desc; 1 is added to change BI to UV + + mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel + + send (1) uwBUFFER_0(4)<1> r16 0x2 a0.0:ud + // Returns U data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB002:ud // msg desc; 1 is added to change BI to UV + mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel + + send (1) uwBUFFER_0(8)<1> r16 0x2 a0.0:ud + // Returns V data in 4 GRFs in scrambled order + +SKIP_AVS_LOAD_L0_0_: + nop + + diff --git a/src/shaders/post_processing/gen8/PL3_AVS_Buf_1.g8a b/src/shaders/post_processing/gen8/PL3_AVS_Buf_1.g8a new file mode 100644 index 0000000..8457ae1 --- /dev/null +++ b/src/shaders/post_processing/gen8/PL3_AVS_Buf_1.g8a @@ -0,0 +1,470 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 44 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// FileName: PL2_AVS_Buf_0.asm +// Author: Tatiya, Rupesh +// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0 + + + +// FileName : PL2_AVS_Buf.asm +// Author : Tatiya, Rupesh +// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N + + + +// Module name: Scaling.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + // Message Header + // m0.7 31:0 Debug + // m0.6 31:0 Debug + // m0.5 31:0 Ignored + // m0.4 31:0 Ignored + // m0.3 31:0 Ignored + // m0.2 31:16 Ignored + // 15 Alpha Write Channel Mask enable=0, disable=1 + // 14 Blue Write Channel Mask (U) + // 13 Green Write Channel Mask (Y) + // 12 Red Write Channel Mask (V) + // 11:0 Ignored + // m0.1 Ignored + // m0.0 Ignored + + + // AVS payload + // m1.7 Group ID Number + // m1.6 U 2nd Derivative ---> NLAS dx + // m1.5 Delta V ---> Step Y + // m1.4 Delta U ---> Step X + // m1.3 Pixel 0 V Address ---> ORIY (Y0) + // m1.2 Pixel 0 U Address ---> ORIX (X0) + // m1.1 Vertical Block Number + // m1.0 Reserved + + // Sampler Message Descriptor + // 31:29 Reserved 000 + // 28:25 Message length 0010 + // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm) + // 19 Header Present 1 + // 18:17 SIMD Mode 11 ---> SIMD32/64 + // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix) + // 11:8 Sampler Index xxxx + // 7:0 Binding Table Index xxxxxxxx + + + // Msg Header M0.2 + // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back + // 14:14 Blue Write Channel Mask + // 13:13 Green Write Channel Mask + // 12:12 Red Write Channel Mask + + +//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7 + + +//used to generate LABELS at compile time. + + + // 18:17 SIMD Mode 10 ---> SIMD16 + // 16:12 Message Type xxxxx ---> 00000 (SIMD16) + + +//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels) +//r18-19 - 2 GRFs to store sampler ramp. + + .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub + + + .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub + .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + + // Sampler ramp is used for Scaling 0X_0.34X + .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements + + + //#define rMSGDSC_UV r23.0 + + +//End of _SCALING_ + + + //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it. + //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0. + mov (1) r22.4<1>:ud 0x400040:ud + + + mov (1) r16.3<1>:ud r0.3<0;1,0>:ud + + + //AVS_PAYLOAD already has all the data loaded at this point + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB000:ud //msg desc + + mov (1) r16.2<1>:ud 0x0000D000:ud // Enable Red channel + + + + // set the vertical block number + + mov (1) r25.1<1>:ud 1:ud + + mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs + + send (1) uwBUFFER_1(0)<1> r16 0x2 a0.0:ud + // Returns Y data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB001:ud // msg desc; 1 is added to change BI to UV + + mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel + + send (1) uwBUFFER_1(4)<1> r16 0x2 a0.0:ud + // Returns U data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB002:ud // msg desc; 1 is added to change BI to UV + mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel + + send (1) uwBUFFER_1(8)<1> r16 0x2 a0.0:ud + // Returns V data in 4 GRFs in scrambled order + +SKIP_AVS_LOAD_L0_0_: + nop + + diff --git a/src/shaders/post_processing/gen8/PL3_AVS_Buf_2.g8a b/src/shaders/post_processing/gen8/PL3_AVS_Buf_2.g8a new file mode 100644 index 0000000..99b40fe --- /dev/null +++ b/src/shaders/post_processing/gen8/PL3_AVS_Buf_2.g8a @@ -0,0 +1,470 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 44 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// FileName: PL2_AVS_Buf_0.asm +// Author: Tatiya, Rupesh +// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0 + + + +// FileName : PL2_AVS_Buf.asm +// Author : Tatiya, Rupesh +// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N + + + +// Module name: Scaling.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + // Message Header + // m0.7 31:0 Debug + // m0.6 31:0 Debug + // m0.5 31:0 Ignored + // m0.4 31:0 Ignored + // m0.3 31:0 Ignored + // m0.2 31:16 Ignored + // 15 Alpha Write Channel Mask enable=0, disable=1 + // 14 Blue Write Channel Mask (U) + // 13 Green Write Channel Mask (Y) + // 12 Red Write Channel Mask (V) + // 11:0 Ignored + // m0.1 Ignored + // m0.0 Ignored + + + // AVS payload + // m1.7 Group ID Number + // m1.6 U 2nd Derivative ---> NLAS dx + // m1.5 Delta V ---> Step Y + // m1.4 Delta U ---> Step X + // m1.3 Pixel 0 V Address ---> ORIY (Y0) + // m1.2 Pixel 0 U Address ---> ORIX (X0) + // m1.1 Vertical Block Number + // m1.0 Reserved + + // Sampler Message Descriptor + // 31:29 Reserved 000 + // 28:25 Message length 0010 + // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm) + // 19 Header Present 1 + // 18:17 SIMD Mode 11 ---> SIMD32/64 + // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix) + // 11:8 Sampler Index xxxx + // 7:0 Binding Table Index xxxxxxxx + + + // Msg Header M0.2 + // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back + // 14:14 Blue Write Channel Mask + // 13:13 Green Write Channel Mask + // 12:12 Red Write Channel Mask + + +//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7 + + +//used to generate LABELS at compile time. + + + // 18:17 SIMD Mode 10 ---> SIMD16 + // 16:12 Message Type xxxxx ---> 00000 (SIMD16) + + +//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels) +//r18-19 - 2 GRFs to store sampler ramp. + + .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub + + + .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub + .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + + // Sampler ramp is used for Scaling 0X_0.34X + .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements + + + //#define rMSGDSC_UV r23.0 + + +//End of _SCALING_ + + + //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it. + //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0. + mov (1) r22.4<1>:ud 0x400040:ud + + + mov (1) r16.3<1>:ud r0.3<0;1,0>:ud + + + //AVS_PAYLOAD already has all the data loaded at this point + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB000:ud //msg desc + + mov (1) r16.2<1>:ud 0x0000D000:ud // Enable Red channel + + + + // set the vertical block number + + mov (1) r25.1<1>:ud 2:ud + + mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs + + send (1) uwBUFFER_2(0)<1> r16 0x2 a0.0:ud + // Returns Y data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB001:ud // msg desc; 1 is added to change BI to UV + + mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel + + send (1) uwBUFFER_2(4)<1> r16 0x2 a0.0:ud + // Returns U data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB002:ud // msg desc; 1 is added to change BI to UV + mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel + + send (1) uwBUFFER_2(8)<1> r16 0x2 a0.0:ud + // Returns V data in 4 GRFs in scrambled order + +SKIP_AVS_LOAD_L0_0_: + nop + + diff --git a/src/shaders/post_processing/gen8/PL3_AVS_Buf_3.g8a b/src/shaders/post_processing/gen8/PL3_AVS_Buf_3.g8a new file mode 100644 index 0000000..8659876 --- /dev/null +++ b/src/shaders/post_processing/gen8/PL3_AVS_Buf_3.g8a @@ -0,0 +1,470 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 44 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// FileName: PL2_AVS_Buf_0.asm +// Author: Tatiya, Rupesh +// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0 + + + +// FileName : PL2_AVS_Buf.asm +// Author : Tatiya, Rupesh +// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N + + + +// Module name: Scaling.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + // Message Header + // m0.7 31:0 Debug + // m0.6 31:0 Debug + // m0.5 31:0 Ignored + // m0.4 31:0 Ignored + // m0.3 31:0 Ignored + // m0.2 31:16 Ignored + // 15 Alpha Write Channel Mask enable=0, disable=1 + // 14 Blue Write Channel Mask (U) + // 13 Green Write Channel Mask (Y) + // 12 Red Write Channel Mask (V) + // 11:0 Ignored + // m0.1 Ignored + // m0.0 Ignored + + + // AVS payload + // m1.7 Group ID Number + // m1.6 U 2nd Derivative ---> NLAS dx + // m1.5 Delta V ---> Step Y + // m1.4 Delta U ---> Step X + // m1.3 Pixel 0 V Address ---> ORIY (Y0) + // m1.2 Pixel 0 U Address ---> ORIX (X0) + // m1.1 Vertical Block Number + // m1.0 Reserved + + // Sampler Message Descriptor + // 31:29 Reserved 000 + // 28:25 Message length 0010 + // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm) + // 19 Header Present 1 + // 18:17 SIMD Mode 11 ---> SIMD32/64 + // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix) + // 11:8 Sampler Index xxxx + // 7:0 Binding Table Index xxxxxxxx + + + // Msg Header M0.2 + // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back + // 14:14 Blue Write Channel Mask + // 13:13 Green Write Channel Mask + // 12:12 Red Write Channel Mask + + +//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7 + + +//used to generate LABELS at compile time. + + + // 18:17 SIMD Mode 10 ---> SIMD16 + // 16:12 Message Type xxxxx ---> 00000 (SIMD16) + + +//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels) +//r18-19 - 2 GRFs to store sampler ramp. + + .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub + + + .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub + .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + + // Sampler ramp is used for Scaling 0X_0.34X + .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements + + + //#define rMSGDSC_UV r23.0 + + +//End of _SCALING_ + + + //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it. + //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0. + mov (1) r22.4<1>:ud 0x400040:ud + + + mov (1) r16.3<1>:ud r0.3<0;1,0>:ud + + + //AVS_PAYLOAD already has all the data loaded at this point + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB000:ud //msg desc + + mov (1) r16.2<1>:ud 0x0000D000:ud // Enable Red channel + + + + // set the vertical block number + + mov (1) r25.1<1>:ud 3:ud + + mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs + + send (1) uwBUFFER_3(0)<1> r16 0x2 a0.0:ud + // Returns Y data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB001:ud // msg desc; 1 is added to change BI to UV + + mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel + + send (1) uwBUFFER_3(4)<1> r16 0x2 a0.0:ud + // Returns U data in 4 GRFs in scrambled order + + add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB002:ud // msg desc; 1 is added to change BI to UV + mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel + + send (1) uwBUFFER_3(8)<1> r16 0x2 a0.0:ud + // Returns V data in 4 GRFs in scrambled order + +SKIP_AVS_LOAD_L0_0_: + nop + + diff --git a/src/shaders/post_processing/gen8/RGB_to_YUV.g8a b/src/shaders/post_processing/gen8/RGB_to_YUV.g8a new file mode 100644 index 0000000..2cda31e --- /dev/null +++ b/src/shaders/post_processing/gen8/RGB_to_YUV.g8a @@ -0,0 +1,910 @@ +/* + * Copyright 2000-2013 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * Authors: + * Zhao Yakui <yakui.zhao@intel.com> + */ + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// Module name: YUV_to_RGB.asm +// +// Convert YUV to RGB, handle it by 16x4 block +// + + +// Description: Includes all definitions explicit to Fast Composite. + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare bBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare bBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare bBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare bBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +//Pointer to mask reg + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + +//Msg payload buffers; upto 4 full-size messages can be written + +//Unnecessary to use the MSGPayLoad, So it is temporiarily used for conversion of YUV->RGB + +.declare fBUFFER_R Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> Type=f +.declare fBUFFER_G Base=r30.0 ElementSize=4 SrcRegion=<8;8,1> Type=f +.declare fBUFFER_B Base=r32.0 ElementSize=4 SrcRegion=<8;8,1> Type=f + +.declare fBUFFER_Y Base=r36.0 ElementSize=4 SrcRegion=<8;8,1> Type=f +.declare fBUFFER_U Base=r38.0 ElementSize=4 SrcRegion=<8;8,1> Type=f +.declare fBUFFER_V Base=r40.0 ElementSize=4 SrcRegion=<8;8,1> Type=f + +.declare wTempY Base=r42.0 ElementSize=2 Type=w +.declare wTempU Base=r44.0 ElementSize=2 Type=w +.declare wTempV Base=r46.0 ElementSize=2 Type=w + +.declare ubTempY Base=r42.0 ElementSize=1 Type=ub +.declare ubTempU Base=r44.0 ElementSize=1 Type=ub +.declare ubTempV Base=r46.0 ElementSize=1 Type=ub + + // the r17 register (nTEMP0) is originally defined from "Common.inc" + // instead of re-defining a nTEMP0 here, we use "SAVE_RGB" suffix for its naming + + .declare uwTemp0 Base=r17.0 ElementSize=2 Type=uw + + +//_SAVE_INC_ + // ITU-R conversion, Now we are using ITU-R conversion + // Y = 0.299R + 0.587G + 0.114B + // U = -0.169R - 0.331G + 0.499B + 128 + // V = 0.499R - 0.418G - 0.0813B+ 128 + + // At the save module we have all 8 address sub-registers available. + // So we will use PING-PONG type of scheme to save the data using + // pointers pBUF_CHNL_TOP_8x4 and pBUF_CHNL_BOT_8x4. This will help + // reduce dependency. - rT + + //wBUFF_CHNL_PTR points to either buffer 0 or buffer 4. + //Add appropriate offsets to get pointers for all buffers (1,2,3 or 5). + //Offsets are zero for buffer 0 and buffer 4. + //It always uses the YUVA layout. +//for BUFFER_0 + mov (4) a0.0<1>:uw r22.0<4;4,1>:uw + mov (4) a0.4<1>:uw r22.0<4;4,1>:uw + // YUV uses the a0.5,a0.6 and a0.4 as the indirect-register + // Y = a0.5, U=a0.6, V=a0.4 + // if channel swap? + // This means that it should be BGRX(B is the LSB) or RGBX + // 1 means that it is BGRX. + and.nz.f0.0 null<1>:w r2.0<0;1,0>:uw 0x01:w + // pointer swap + (f0.0) mov (1) uwTemp0<1> a0.0:uw + (f0.0) mov (1) a0.0:uw a0.1:uw + (f0.0) mov (1) a0.1:uw uwTemp0<0;1,0> + +//the first line in the block 0 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 1]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 17]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 1]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 17]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 1]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 17]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 0]<1>:uw 0:uw + mov (16) r[a0.6, 0]<1>:uw 0:uw + mov (16) r[a0.4, 0]<1>:uw 0:uw + mov (16) r[a0.5,1]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,1]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,1]<2>:ub ubTempV(0, 0)<32;8,4> + + +//the second line in the block 0 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 33]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 49]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 33]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 49]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 33]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 49]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 32]<1>:uw 0:uw + mov (16) r[a0.6, 32]<1>:uw 0:uw + mov (16) r[a0.4, 32]<1>:uw 0:uw + mov (16) r[a0.5,33]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,33]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,33]<2>:ub ubTempV(0, 0)<32;8,4> + +//the third line in the block 0 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 65]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 81]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 65]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 81]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 65]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 81]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 64]<1>:uw 0:uw + mov (16) r[a0.6, 64]<1>:uw 0:uw + mov (16) r[a0.4, 64]<1>:uw 0:uw + mov (16) r[a0.5,65]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,65]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,65]<2>:ub ubTempV(0, 0)<32;8,4> + +//the fourth line in the block 0 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 97]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 113]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 97]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 113]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 97]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 113]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 96]<1>:uw 0:uw + mov (16) r[a0.6, 96]<1>:uw 0:uw + mov (16) r[a0.4, 96]<1>:uw 0:uw + mov (16) r[a0.5,97]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,97]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,97]<2>:ub ubTempV(0, 0)<32;8,4> + +//for Buffer_1 + + add (8) a0.0<1>:uw a0.0<8;8,1>:uw 512:uw +//the first line in the block 1 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 1]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 17]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 1]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 17]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 1]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 17]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 0]<1>:uw 0:uw + mov (16) r[a0.6, 0]<1>:uw 0:uw + mov (16) r[a0.4, 0]<1>:uw 0:uw + mov (16) r[a0.5,1]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,1]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,1]<2>:ub ubTempV(0, 0)<32;8,4> + + +//the second line in the block 1 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 33]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 49]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 33]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 49]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 33]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 49]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 32]<1>:uw 0:uw + mov (16) r[a0.6, 32]<1>:uw 0:uw + mov (16) r[a0.4, 32]<1>:uw 0:uw + mov (16) r[a0.5,33]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,33]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,33]<2>:ub ubTempV(0, 0)<32;8,4> + +//the third line in the block 1 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 65]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 81]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 65]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 81]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 65]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 81]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 64]<1>:uw 0:uw + mov (16) r[a0.6, 64]<1>:uw 0:uw + mov (16) r[a0.4, 64]<1>:uw 0:uw + mov (16) r[a0.5,65]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,65]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,65]<2>:ub ubTempV(0, 0)<32;8,4> + +//the fourth line in the block 1 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 97]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 113]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 97]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 113]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 97]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 113]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 96]<1>:uw 0:uw + mov (16) r[a0.6, 96]<1>:uw 0:uw + mov (16) r[a0.4, 96]<1>:uw 0:uw + mov (16) r[a0.5,97]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,97]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,97]<2>:ub ubTempV(0, 0)<32;8,4> + +//for Buffer_2 + add (8) a0.0<1>:uw a0.0<8;8,1>:uw 512:uw +//the first line in the block 2 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 1]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 17]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 1]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 17]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 1]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 17]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 0]<1>:uw 0:uw + mov (16) r[a0.6, 0]<1>:uw 0:uw + mov (16) r[a0.4, 0]<1>:uw 0:uw + mov (16) r[a0.5,1]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,1]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,1]<2>:ub ubTempV(0, 0)<32;8,4> + +//the second line in the block 2 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 33]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 49]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 33]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 49]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 33]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 49]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 32]<1>:uw 0:uw + mov (16) r[a0.6, 32]<1>:uw 0:uw + mov (16) r[a0.4, 32]<1>:uw 0:uw + mov (16) r[a0.5,33]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,33]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,33]<2>:ub ubTempV(0, 0)<32;8,4> + +//the third line in the block 2 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 65]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 81]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 65]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 81]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 65]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 81]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 64]<1>:uw 0:uw + mov (16) r[a0.6, 64]<1>:uw 0:uw + mov (16) r[a0.4, 64]<1>:uw 0:uw + mov (16) r[a0.5,65]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,65]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,65]<2>:ub ubTempV(0, 0)<32;8,4> + +//the fourth line in the block 2 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 97]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 113]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 97]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 113]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 97]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 113]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 96]<1>:uw 0:uw + mov (16) r[a0.6, 96]<1>:uw 0:uw + mov (16) r[a0.4, 96]<1>:uw 0:uw + mov (16) r[a0.5,97]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,97]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,97]<2>:ub ubTempV(0, 0)<32;8,4> + +//for Buffer_3 + add (8) a0.0<1>:uw a0.0<8;8,1>:uw 512:uw +//the first line in the block 3 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 1]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 17]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 1]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 17]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 1]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 17]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 0]<1>:uw 0:uw + mov (16) r[a0.6, 0]<1>:uw 0:uw + mov (16) r[a0.4, 0]<1>:uw 0:uw + mov (16) r[a0.5,1]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,1]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,1]<2>:ub ubTempV(0, 0)<32;8,4> + + +//the second line in the block 3 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 33]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 49]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 33]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 49]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 33]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 49]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 32]<1>:uw 0:uw + mov (16) r[a0.6, 32]<1>:uw 0:uw + mov (16) r[a0.4, 32]<1>:uw 0:uw + mov (16) r[a0.5,33]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,33]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,33]<2>:ub ubTempV(0, 0)<32;8,4> + +//the third line in the block 3 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 65]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 81]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 65]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 81]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 65]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 81]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 64]<1>:uw 0:uw + mov (16) r[a0.6, 64]<1>:uw 0:uw + mov (16) r[a0.4, 64]<1>:uw 0:uw + mov (16) r[a0.5,65]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,65]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,65]<2>:ub ubTempV(0, 0)<32;8,4> + +//the fourth line in the block 3 + mov (8) fBUFFER_R(0, 0)<1> r[a0.1, 97]<16;8,2>:ub + mov (8) fBUFFER_R(1, 0)<1> r[a0.1, 113]<16;8,2>:ub + mov (8) fBUFFER_G(0, 0)<1> r[a0.2, 97]<16;8,2>:ub + mov (8) fBUFFER_G(1, 0)<1> r[a0.2, 113]<16;8,2>:ub + mov (8) fBUFFER_B(0, 0)<1> r[a0.0, 97]<16;8,2>:ub + mov (8) fBUFFER_B(1, 0)<1> r[a0.0, 113]<16;8,2>:ub + + mul (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.299f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> 0.587f + mac (16) fBUFFER_Y(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.114f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> -0.169f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.331f + mac (16) fBUFFER_U(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.499f + + mov (16) acc0.0<1>:f 128.0f + mac (16) acc0.0<1>:f fBUFFER_R(0, 0)<8;8,1> 0.499f + mac (16) acc0.0<1>:f fBUFFER_G(0, 0)<8;8,1> -0.418f + mac (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> -0.0813f + + mov (16) wTempY(0,0)<2> fBUFFER_Y(0, 0)<8;8,1> + mov (16) wTempU(0,0)<2> fBUFFER_U(0, 0)<8;8,1> + mov (16) wTempV(0,0)<2> fBUFFER_V(0, 0)<8;8,1> + + mov (16) r[a0.5, 96]<1>:uw 0:uw + mov (16) r[a0.6, 96]<1>:uw 0:uw + mov (16) r[a0.4, 96]<1>:uw 0:uw + mov (16) r[a0.5,97]<2>:ub ubTempY(0, 0)<32;8,4> + mov (16) r[a0.6,97]<2>:ub ubTempU(0, 0)<32;8,4> + mov (16) r[a0.4,97]<2>:ub ubTempV(0, 0)<32;8,4> + diff --git a/src/shaders/post_processing/gen8/Save_AVS_NV12.g8a b/src/shaders/post_processing/gen8/Save_AVS_NV12.g8a new file mode 100644 index 0000000..dcb7ce0 --- /dev/null +++ b/src/shaders/post_processing/gen8/Save_AVS_NV12.g8a @@ -0,0 +1,621 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 131 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// Module name: Save_AVS_NV12.asm +// +// Save NV12 420 frame data block of size 16x16 +// +// To save 16x16 block (16x16 bytes of Y and 16x8 bytes of interleaved UV), we need 2 send instructions with of size 16x16 and 16x8 each. +// --------------- +// | 16x16 | +// | YUYV | +// --------------- +// | 16x8 UV | +// --------------- + +//----------------------------------------------------------------- +//The layout of data is as follows: +//mMSGHDR0 : Y data header (16x16) +//mubMSGPAYLOAD0 : Y data payload (8 GRFs) +//mMSGHDR1 : U data header (16x8) +//mubMSGPAYLOAD1 : U data payload (4 GRFs) +//------------------------------------------------------------------ + + + +// Module name: Save.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + +//Msg payload buffers; upto 4 full-size messages can be written + + +.declare mudMSGPAYLOAD0 Base=r29.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mudMSGPAYLOAD1 Base=r38.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mudMSGPAYLOAD2 Base=r47.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mudMSGPAYLOAD3 Base=r56.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud + +.declare muwMSGPAYLOAD0 Base=r29.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare muwMSGPAYLOAD1 Base=r38.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare muwMSGPAYLOAD2 Base=r47.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare muwMSGPAYLOAD3 Base=r56.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw + +.declare mubMSGPAYLOAD0 Base=r29.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD1 Base=r38.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD2 Base=r47.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD3 Base=r56.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD4 Base=r32.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD5 Base=r41.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD6 Base=r50.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD7 Base=r59.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub + + + // the r17 register (nTEMP0) is originally defined from "Common.inc" + // instead of re-defining a nTEMP0 here, we use "SAVE_RGB" suffix for its naming + + .declare uwTemp0 Base=r17.0 ElementSize=2 Type=uw + + +//_SAVE_INC_ + + + // At the save module we have all 8 address sub-registers available. + // So we will use PING-PONG type of scheme to save the data using + // pointers pBUF_CHNL_TOP_8x4 and pBUF_CHNL_BOT_8x4. This will help + // reduce dependency. - rT + + //wBUFF_CHNL_PTR points to either buffer 0 or buffer 4. + //Add appropriate offsets to get pointers for all buffers (1,2,3 or 5). + //Offsets are zero for buffer 0 and buffer 4. + add (4) a0.0<1>:uw r22.0<4;4,1>:w 0:uw + add (4) a0.4<1>:uw r22.0<4;4,1>:w 512:uw + + //Set up header for Y,U and V data + mov (8) r28<1>:ud r27<8;8,1>:ud + mov (8) r37<1>:ud r27<8;8,1>:ud + + mov (2) r28.0<1>:d r7.0<2;2,1>:w { NoDDClr } //ORI Y (LUMA) = ORI + mov (1) r37.0<1>:d r7.0<0;1,0>:w { NoDDClr } //H ORI (CHROMA) = H ORI + shr (1) r37.1<1>:d r7.1<0;1,0>:w 1:w { NoDDClr, NoDDChk } //V ORI (CHROMA) = V ORI/2 + + mov (1) r28.2<1>:ud 0xF000F:ud { NoDDChk } // Y Block width and height (16x16) + mov (1) r37.2<1>:ud 0x7000F:ud { NoDDChk } // UV Block width and height(16x8) + +// Unscramble, and pack data directly to MRFs + +// Data 16x16 block is divided as - +// --------- +// | 0 | +// --------- +// | 1 | +// --------- +// | 2 | +// --------- +// | 3 | +// --------- +// All sub-blocks are of size 16x4 +// 0: ubBUFFER_0 +// 1: ubBUFFER_1, ubBUFFER_0+16 +// 2: ubBUFFER_2 +// 3: ubBUFFER_3, ubBUFFER_2+16 + + //Y Rounding 16x4 top part + add.sat (16) r[a0.1,0]<1>:uw r[a0.1,0]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.1,32]<1>:uw r[a0.1,32]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.1,64]<1>:uw r[a0.1,64]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.1,96]<1>:uw r[a0.1,96]<16;16,1>:uw 0x0080:uw + + // U Averaging and Rounding, 8x2 top part + shr (8) uwBUFFER_5(0,0)<2> r[a0.2,0]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(1,0)<2> r[a0.2,32]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(2,0)<2> r[a0.2,64]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(3,0)<2> r[a0.2,96]<16;8,2>:uw 1:w + + add (8) uwBUFFER_5(0,0)<2> uwBUFFER_5(0,0)<16;8,2> uwBUFFER_5(1,0)<16;8,2> + add.sat (8) r[a0.2,0]<2>:uw uwBUFFER_5(0,0)<16;8,2> 0x0080:uw + + add (8) uwBUFFER_5(2,0)<2> uwBUFFER_5(2,0)<16;8,2> uwBUFFER_5(3,0)<16;8,2> + add.sat (8) r[a0.2,64]<2>:uw uwBUFFER_5(2,0)<16;8,2> 0x0080:uw + + // V Averaging and Rounding, 8x2 top part + shr (8) uwBUFFER_5(4,0)<2> r[a0.0,0]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(5,0)<2> r[a0.0,32]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(6,0)<2> r[a0.0,64]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(7,0)<2> r[a0.0,96]<16;8,2>:uw 1:w + + add (8) uwBUFFER_5(4,0)<2> uwBUFFER_5(4,0)<16;8,2> uwBUFFER_5(5,0)<16;8,2> + add.sat (8) r[a0.0,0]<2>:uw uwBUFFER_5(4,0)<16;8,2> 0x0080:uw + + add (8) uwBUFFER_5(6,0)<2> uwBUFFER_5(6,0)<16;8,2> uwBUFFER_5(7,0)<16;8,2> + add.sat (8) r[a0.0,64]<2>:uw uwBUFFER_5(6,0)<16;8,2> 0x0080:uw + + add (4) a0.0<1>:uw r22.0<4;4,1>:w 1024:uw //Update Buffer 2 pointers + + //Y Rounding, 16x4 bottom part + add.sat (16) r[a0.5,0]<1>:uw r[a0.5,0]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.5,32]<1>:uw r[a0.5,32]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.5,64]<1>:uw r[a0.5,64]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.5,96]<1>:uw r[a0.5,96]<16;16,1>:uw 0x0080:uw + + // U Averaging and Rounding, 8x2 bottom part + shr (8) uwBUFFER_5(0,0)<2> r[a0.6,0]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(1,0)<2> r[a0.6,32]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(2,0)<2> r[a0.6,64]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(3,0)<2> r[a0.6,96]<16;8,2>:uw 1:w + + add (8) uwBUFFER_5(0,0)<2> uwBUFFER_5(0,0)<16;8,2> uwBUFFER_5(1,0)<16;8,2> + add.sat (8) r[a0.6,0]<2>:uw uwBUFFER_5(0,0)<16;8,2> 0x0080:uw + + add (8) uwBUFFER_5(2,0)<2> uwBUFFER_5(2,0)<16;8,2> uwBUFFER_5(3,0)<16;8,2> + add.sat (8) r[a0.6,64]<2>:uw uwBUFFER_5(2,0)<16;8,2> 0x0080:uw + + // V Averaging and Rounding, 8x2 bottom part + shr (8) uwBUFFER_5(4,0)<2> r[a0.4,0]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(5,0)<2> r[a0.4,32]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(6,0)<2> r[a0.4,64]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(7,0)<2> r[a0.4,96]<16;8,2>:uw 1:w + + add (8) uwBUFFER_5(4,0)<2> uwBUFFER_5(4,0)<16;8,2> uwBUFFER_5(5,0)<16;8,2> + add.sat (8) r[a0.4,0]<2>:uw uwBUFFER_5(4,0)<16;8,2> 0x0080:uw + + add (8) uwBUFFER_5(6,0)<2> uwBUFFER_5(6,0)<16;8,2> uwBUFFER_5(7,0)<16;8,2> + add.sat (8) r[a0.4,64]<2>:uw uwBUFFER_5(6,0)<16;8,2> 0x0080:uw + + add (4) a0.4<1>:uw r22.0<4;4,1>:w 1536:uw //Update Buffer 3 pointers + //Y Rounding 16x4 top part + add.sat (16) r[a0.1,0]<1>:uw r[a0.1,0]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.1,32]<1>:uw r[a0.1,32]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.1,64]<1>:uw r[a0.1,64]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.1,96]<1>:uw r[a0.1,96]<16;16,1>:uw 0x0080:uw + + // U Averaging and Rounding, 8x2 top part + shr (8) uwBUFFER_5(0,0)<2> r[a0.2,0]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(1,0)<2> r[a0.2,32]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(2,0)<2> r[a0.2,64]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(3,0)<2> r[a0.2,96]<16;8,2>:uw 1:w + + add (8) uwBUFFER_5(0,0)<2> uwBUFFER_5(0,0)<16;8,2> uwBUFFER_5(1,0)<16;8,2> + add.sat (8) r[a0.2,0]<2>:uw uwBUFFER_5(0,0)<16;8,2> 0x0080:uw + + add (8) uwBUFFER_5(2,0)<2> uwBUFFER_5(2,0)<16;8,2> uwBUFFER_5(3,0)<16;8,2> + add.sat (8) r[a0.2,64]<2>:uw uwBUFFER_5(2,0)<16;8,2> 0x0080:uw + + // V Averaging and Rounding, 8x2 top part + shr (8) uwBUFFER_5(4,0)<2> r[a0.0,0]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(5,0)<2> r[a0.0,32]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(6,0)<2> r[a0.0,64]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(7,0)<2> r[a0.0,96]<16;8,2>:uw 1:w + + add (8) uwBUFFER_5(4,0)<2> uwBUFFER_5(4,0)<16;8,2> uwBUFFER_5(5,0)<16;8,2> + add.sat (8) r[a0.0,0]<2>:uw uwBUFFER_5(4,0)<16;8,2> 0x0080:uw + + add (8) uwBUFFER_5(6,0)<2> uwBUFFER_5(6,0)<16;8,2> uwBUFFER_5(7,0)<16;8,2> + add.sat (8) r[a0.0,64]<2>:uw uwBUFFER_5(6,0)<16;8,2> 0x0080:uw + + add (4) a0.0<1>:uw r22.0<4;4,1>:w 1024:uw //Update Buffer 2 pointers + + //Y Rounding, 16x4 bottom part + add.sat (16) r[a0.5,0]<1>:uw r[a0.5,0]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.5,32]<1>:uw r[a0.5,32]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.5,64]<1>:uw r[a0.5,64]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.5,96]<1>:uw r[a0.5,96]<16;16,1>:uw 0x0080:uw + + // U Averaging and Rounding, 8x2 bottom part + shr (8) uwBUFFER_5(0,0)<2> r[a0.6,0]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(1,0)<2> r[a0.6,32]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(2,0)<2> r[a0.6,64]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(3,0)<2> r[a0.6,96]<16;8,2>:uw 1:w + + add (8) uwBUFFER_5(0,0)<2> uwBUFFER_5(0,0)<16;8,2> uwBUFFER_5(1,0)<16;8,2> + add.sat (8) r[a0.6,0]<2>:uw uwBUFFER_5(0,0)<16;8,2> 0x0080:uw + + add (8) uwBUFFER_5(2,0)<2> uwBUFFER_5(2,0)<16;8,2> uwBUFFER_5(3,0)<16;8,2> + add.sat (8) r[a0.6,64]<2>:uw uwBUFFER_5(2,0)<16;8,2> 0x0080:uw + + // V Averaging and Rounding, 8x2 bottom part + shr (8) uwBUFFER_5(4,0)<2> r[a0.4,0]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(5,0)<2> r[a0.4,32]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(6,0)<2> r[a0.4,64]<16;8,2>:uw 1:w + shr (8) uwBUFFER_5(7,0)<2> r[a0.4,96]<16;8,2>:uw 1:w + + add (8) uwBUFFER_5(4,0)<2> uwBUFFER_5(4,0)<16;8,2> uwBUFFER_5(5,0)<16;8,2> + add.sat (8) r[a0.4,0]<2>:uw uwBUFFER_5(4,0)<16;8,2> 0x0080:uw + + add (8) uwBUFFER_5(6,0)<2> uwBUFFER_5(6,0)<16;8,2> uwBUFFER_5(7,0)<16;8,2> + add.sat (8) r[a0.4,64]<2>:uw uwBUFFER_5(6,0)<16;8,2> 0x0080:uw + + add (4) a0.4<1>:uw r22.0<4;4,1>:w 1536:uw //Update Buffer 3 pointers + // restore pBUF_CHNL_TOP_8x4 and pBUF_CHNL_BOT_8x4 registers + add (4) a0.0<1>:uw r22.0<4;4,1>:w 0:uw + add (4) a0.4<1>:uw r22.0<4;4,1>:w 512:uw + +//Buffer 0 +//Move Y to msg payload + mov (16) mubMSGPAYLOAD0(0,0)<1> r[a0.1, 1]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(0,16)<1> r[a0.1, 33]<32;16,2>:ub { NoDDChk } + mov (16) mubMSGPAYLOAD0(1,0)<1> r[a0.1, 65]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(1,16)<1> r[a0.1, 97]<32;16,2>:ub { NoDDChk } + +//Move U to msg payload + mov (8) mubMSGPAYLOAD1(0,0)<2> r[a0.2, 1]<32;8,4>:ub { NoDDClr } + mov (8) mubMSGPAYLOAD1(0,16)<2> r[a0.2, 65]<32;8,4>:ub { NoDDClr, NoDDChk } + +//Move V to msg payload + mov (8) mubMSGPAYLOAD1(0,1)<2> r[a0.0, 1]<32;8,4>:ub { NoDDClr, NoDDChk } + mov (8) mubMSGPAYLOAD1(0,17)<2> r[a0.0, 65]<32;8,4>:ub { NoDDChk } + + add (4) a0.0<1>:uw r22.0<4;4,1>:w 1024:uw //Update Buffer 2 pointers + +//Buffer 1 + mov (16) mubMSGPAYLOAD0(2,0)<1> r[a0.5, 1]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(2,16)<1> r[a0.5, 33]<32;16,2>:ub { NoDDChk } + mov (16) mubMSGPAYLOAD0(3,0)<1> r[a0.5, 65]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(3,16)<1> r[a0.5, 97]<32;16,2>:ub { NoDDChk } + + mov (8) mubMSGPAYLOAD1(1,0)<2> r[a0.6, 1]<32;8,4>:ub { NoDDClr } + mov (8) mubMSGPAYLOAD1(1,16)<2> r[a0.6, 65]<32;8,4>:ub { NoDDClr, NoDDChk } + + mov (8) mubMSGPAYLOAD1(1,1)<2> r[a0.4, 1]<32;8,4>:ub { NoDDClr, NoDDChk } + mov (8) mubMSGPAYLOAD1(1,17)<2> r[a0.4, 65]<32;8,4>:ub { NoDDChk } + + add (4) a0.4<1>:uw r22.0<4;4,1>:w 1536:uw //Update Buffer 3 pointers + +//Buffer 2 + mov (16) mubMSGPAYLOAD0(4,0)<1> r[a0.1, 1]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(4,16)<1> r[a0.1, 33]<32;16,2>:ub { NoDDChk } + mov (16) mubMSGPAYLOAD0(5,0)<1> r[a0.1, 65]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(5,16)<1> r[a0.1, 97]<32;16,2>:ub { NoDDChk } + + mov (8) mubMSGPAYLOAD1(2,0)<2> r[a0.2, 1]<32;8,4>:ub { NoDDClr } + mov (8) mubMSGPAYLOAD1(2,16)<2> r[a0.2, 65]<32;8,4>:ub { NoDDClr, NoDDChk } + + mov (8) mubMSGPAYLOAD1(2,1)<2> r[a0.0, 1]<32;8,4>:ub { NoDDClr, NoDDChk } + mov (8) mubMSGPAYLOAD1(2,17)<2> r[a0.0, 65]<32;8,4>:ub { NoDDChk } + +//Buffer 3 + mov (16) mubMSGPAYLOAD0(6,0)<1> r[a0.5, 1]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(6,16)<1> r[a0.5, 33]<32;16,2>:ub { NoDDChk } + mov (16) mubMSGPAYLOAD0(7,0)<1> r[a0.5, 65]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(7,16)<1> r[a0.5, 97]<32;16,2>:ub { NoDDChk } + + mov (8) mubMSGPAYLOAD1(3,0)<2> r[a0.6, 1]<32;8,4>:ub { NoDDClr } + mov (8) mubMSGPAYLOAD1(3,16)<2> r[a0.6, 65]<32;8,4>:ub { NoDDClr, NoDDChk } + + mov (8) mubMSGPAYLOAD1(3,1)<2> r[a0.4, 1]<32;8,4>:ub { NoDDClr, NoDDChk } + mov (8) mubMSGPAYLOAD1(3,17)<2> r[a0.4, 65]<32;8,4>:ub { NoDDChk } +//=========================================================================== + +send (1) null<1>:d r28 0xc 0x120A8018:ud +send (1) null<1>:d r37 0xc 0xA0A8019:ud diff --git a/src/shaders/post_processing/gen8/Save_AVS_PA.g8a b/src/shaders/post_processing/gen8/Save_AVS_PA.g8a new file mode 100644 index 0000000..1cedac7 --- /dev/null +++ b/src/shaders/post_processing/gen8/Save_AVS_PA.g8a @@ -0,0 +1,629 @@ +/* + * Copyright 2000-2013 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * Authors: Zhao Yakui <yakui.zhao@intel.com> + */ +// 174 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// Module name: Save_AVS_PA.asm +// +// Save PA 422 frame data block of size 16x16 +// +// To save 16x16 block (32x16 bytes of YUYV) we need 2 send instructions with of size 16x16 each. +// ------------------------------- +// | 16x16 | 16x16 | +// | YUYV | YUYV | +// ------------------------------- +// these 2 sends are replaced by 8 32x2 sends to improve performance + + + +// Module name: Save.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + +//Msg payload buffers; upto 4 full-size messages can be written + + +.declare mudMSGPAYLOAD0 Base=r29.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mudMSGPAYLOAD1 Base=r38.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mudMSGPAYLOAD2 Base=r47.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mudMSGPAYLOAD3 Base=r56.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud + +.declare muwMSGPAYLOAD0 Base=r29.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare muwMSGPAYLOAD1 Base=r38.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare muwMSGPAYLOAD2 Base=r47.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare muwMSGPAYLOAD3 Base=r56.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw + +.declare mubMSGPAYLOAD0 Base=r29.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD1 Base=r38.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD2 Base=r47.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD3 Base=r56.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD4 Base=r32.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD5 Base=r41.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD6 Base=r50.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD7 Base=r59.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub + + + // the r17 register (nTEMP0) is originally defined from "Common.inc" + // instead of re-defining a nTEMP0 here, we use "SAVE_RGB" suffix for its naming + + .declare uwTemp0 Base=r17.0 ElementSize=2 Type=uw + + +//_SAVE_INC_ + + + //wBUFF_CHNL_PTR points to buffer 0. + //Add appropriate offsets to get pointers for all buffers (1,2,3). + //Offset is zero for buffer 0. + add (4) a0.0<1>:uw r22.0<4;4,1>:w 0:uw + + //Set DEST pointers according to output packing i.e. YUYV, YVYU, UYVY, VYUY + add (4) a0.4<1>:w r2.28<4;4,1>:ub 928:uw + + /* X block origin. YUY2 or UYUV */ + shl (1) r27.0<1>:d r7.0<0;1,0>:w 1:w { NoDDClr } // H. block origin need to be 2 times + mov (1) r27.1<1>:d r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Block origin (1st quadrant) + mov (1) r27.2<1>:ud 0x1001F:ud { NoDDChk } // Block width and height (32x2) + +// Rounding + // left + add.sat (4) r[a0.0, 0]<2>:uw r[a0.0, 0]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,32]<2>:uw r[a0.0, 32]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,64]<2>:uw r[a0.0, 64]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,96]<2>:uw r[a0.0, 96]<8;4,2>:uw 0x0080:uw + + add.sat (8) r[a0.1, 0]<1>:uw r[a0.1, 0]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,32]<1>:uw r[a0.1, 32]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,64]<1>:uw r[a0.1, 64]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,96]<1>:uw r[a0.1, 96]<8;8,1>:uw 0x0080:uw + + add.sat (4) r[a0.2, 0]<2>:uw r[a0.2, 0]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,32]<2>:uw r[a0.2, 32]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,64]<2>:uw r[a0.2, 64]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,96]<2>:uw r[a0.2, 96]<8;4,2>:uw 0x0080:uw + + // right + add.sat (4) r[a0.0,16]<2>:uw r[a0.0, 16]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,48]<2>:uw r[a0.0, 48]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,80]<2>:uw r[a0.0, 80]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,112]<2>:uw r[a0.0, 112]<8;4,2>:uw 0x0080:uw + + add.sat (8) r[a0.1, 16]<1>:uw r[a0.1, 16]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,48]<1>:uw r[a0.1, 48]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,80]<1>:uw r[a0.1, 80]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,112]<1>:uw r[a0.1, 112]<8;8,1>:uw 0x0080:uw + + add.sat (4) r[a0.2, 16]<2>:uw r[a0.2, 16]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,48]<2>:uw r[a0.2, 48]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,80]<2>:uw r[a0.2, 80]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,112]<2>:uw r[a0.2, 112]<8;4,2>:uw 0x0080:uw + + add (4) a0.0<1>:uw r22.0<4;4,1>:w 512:uw + // left + add.sat (4) r[a0.0, 0]<2>:uw r[a0.0, 0]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,32]<2>:uw r[a0.0, 32]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,64]<2>:uw r[a0.0, 64]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,96]<2>:uw r[a0.0, 96]<8;4,2>:uw 0x0080:uw + + add.sat (8) r[a0.1, 0]<1>:uw r[a0.1, 0]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,32]<1>:uw r[a0.1, 32]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,64]<1>:uw r[a0.1, 64]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,96]<1>:uw r[a0.1, 96]<8;8,1>:uw 0x0080:uw + + add.sat (4) r[a0.2, 0]<2>:uw r[a0.2, 0]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,32]<2>:uw r[a0.2, 32]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,64]<2>:uw r[a0.2, 64]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,96]<2>:uw r[a0.2, 96]<8;4,2>:uw 0x0080:uw + + // right + add.sat (4) r[a0.0,16]<2>:uw r[a0.0, 16]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,48]<2>:uw r[a0.0, 48]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,80]<2>:uw r[a0.0, 80]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,112]<2>:uw r[a0.0, 112]<8;4,2>:uw 0x0080:uw + + add.sat (8) r[a0.1, 16]<1>:uw r[a0.1, 16]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,48]<1>:uw r[a0.1, 48]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,80]<1>:uw r[a0.1, 80]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,112]<1>:uw r[a0.1, 112]<8;8,1>:uw 0x0080:uw + + add.sat (4) r[a0.2, 16]<2>:uw r[a0.2, 16]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,48]<2>:uw r[a0.2, 48]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,80]<2>:uw r[a0.2, 80]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,112]<2>:uw r[a0.2, 112]<8;4,2>:uw 0x0080:uw + + add (4) a0.0<1>:uw r22.0<4;4,1>:w 1024:uw + // left + add.sat (4) r[a0.0, 0]<2>:uw r[a0.0, 0]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,32]<2>:uw r[a0.0, 32]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,64]<2>:uw r[a0.0, 64]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,96]<2>:uw r[a0.0, 96]<8;4,2>:uw 0x0080:uw + + add.sat (8) r[a0.1, 0]<1>:uw r[a0.1, 0]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,32]<1>:uw r[a0.1, 32]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,64]<1>:uw r[a0.1, 64]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,96]<1>:uw r[a0.1, 96]<8;8,1>:uw 0x0080:uw + + add.sat (4) r[a0.2, 0]<2>:uw r[a0.2, 0]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,32]<2>:uw r[a0.2, 32]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,64]<2>:uw r[a0.2, 64]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,96]<2>:uw r[a0.2, 96]<8;4,2>:uw 0x0080:uw + + // right + add.sat (4) r[a0.0,16]<2>:uw r[a0.0, 16]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,48]<2>:uw r[a0.0, 48]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,80]<2>:uw r[a0.0, 80]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,112]<2>:uw r[a0.0, 112]<8;4,2>:uw 0x0080:uw + + add.sat (8) r[a0.1, 16]<1>:uw r[a0.1, 16]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,48]<1>:uw r[a0.1, 48]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,80]<1>:uw r[a0.1, 80]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,112]<1>:uw r[a0.1, 112]<8;8,1>:uw 0x0080:uw + + add.sat (4) r[a0.2, 16]<2>:uw r[a0.2, 16]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,48]<2>:uw r[a0.2, 48]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,80]<2>:uw r[a0.2, 80]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,112]<2>:uw r[a0.2, 112]<8;4,2>:uw 0x0080:uw + + add (4) a0.0<1>:uw r22.0<4;4,1>:w 1536:uw + // left + add.sat (4) r[a0.0, 0]<2>:uw r[a0.0, 0]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,32]<2>:uw r[a0.0, 32]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,64]<2>:uw r[a0.0, 64]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,96]<2>:uw r[a0.0, 96]<8;4,2>:uw 0x0080:uw + + add.sat (8) r[a0.1, 0]<1>:uw r[a0.1, 0]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,32]<1>:uw r[a0.1, 32]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,64]<1>:uw r[a0.1, 64]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,96]<1>:uw r[a0.1, 96]<8;8,1>:uw 0x0080:uw + + add.sat (4) r[a0.2, 0]<2>:uw r[a0.2, 0]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,32]<2>:uw r[a0.2, 32]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,64]<2>:uw r[a0.2, 64]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,96]<2>:uw r[a0.2, 96]<8;4,2>:uw 0x0080:uw + + // right + add.sat (4) r[a0.0,16]<2>:uw r[a0.0, 16]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,48]<2>:uw r[a0.0, 48]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,80]<2>:uw r[a0.0, 80]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.0,112]<2>:uw r[a0.0, 112]<8;4,2>:uw 0x0080:uw + + add.sat (8) r[a0.1, 16]<1>:uw r[a0.1, 16]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,48]<1>:uw r[a0.1, 48]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,80]<1>:uw r[a0.1, 80]<8;8,1>:uw 0x0080:uw + add.sat (8) r[a0.1,112]<1>:uw r[a0.1, 112]<8;8,1>:uw 0x0080:uw + + add.sat (4) r[a0.2, 16]<2>:uw r[a0.2, 16]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,48]<2>:uw r[a0.2, 48]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,80]<2>:uw r[a0.2, 80]<8;4,2>:uw 0x0080:uw + add.sat (4) r[a0.2,112]<2>:uw r[a0.2, 112]<8;4,2>:uw 0x0080:uw + + add (4) a0.0<1>:uw r22.0<4;4,1>:w 2048:uw + // restore pointer + add (4) a0.0<1>:uw r22.0<4;4,1>:w 0:uw + + mov (8) r28<1>:ud r27<8;8,1>:ud + mov (8) r37<1>:ud r27<8;8,1>:ud + add (1) r37.1<1>:d r27.1<0;1,0>:d 2:d // Point to 2nd part + + /* a0.2 U, a0.1 Y, a0.0 V */ + + mov (8) r[a0.6, 0]<4>:ub r[a0.0, 1]<32;8,4>:ub { NoDDClr } + mov (8) r[a0.6, 32]<4>:ub r[a0.0,33]<32;8,4>:ub { NoDDClr } + mov (16) r[a0.4, 0]<2>:ub r[a0.1, 1]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (16) r[a0.4, 32]<2>:ub r[a0.1,33]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (8) r[a0.5, 0]<4>:ub r[a0.2, 1]<32;8,4>:ub { NoDDChk } + mov (8) r[a0.5, 32]<4>:ub r[a0.2,33]<32;8,4>:ub { NoDDChk } + + /* a0.4 + 288 = r38 */ + mov (8) r[a0.6, 288]<4>:ub r[a0.0,65]<32;8,4>:ub { NoDDClr } + mov (8) r[a0.6, 320]<4>:ub r[a0.0,97]<32;8,4>:ub { NoDDClr } + mov (16) r[a0.4,288]<2>:ub r[a0.1,65]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (16) r[a0.4,320]<2>:ub r[a0.1,97]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (8) r[a0.5,288]<4>:ub r[a0.2,65]<32;8,4>:ub { NoDDChk } + mov (8) r[a0.5,320]<4>:ub r[a0.2,97]<32;8,4>:ub { NoDDChk } + + send (1) null<1>:d r28 0xc 0x60A8018:ud + send (1) null<1>:d r37 0xc 0x60A8018:ud + + // restore pointer + add (4) a0.0<1>:uw r22.0<4;4,1>:w 512:uw + + add (1) r28.1<1>:d r27.1<0;1,0>:d 4:d // Point to 2nd part + add (1) r37.1<1>:d r27.1<0;1,0>:d 6:d // Point to 2nd part + + + mov (8) r[a0.6, 0]<4>:ub r[a0.0, 1]<32;8,4>:ub { NoDDClr } + mov (8) r[a0.6, 32]<4>:ub r[a0.0,33]<32;8,4>:ub { NoDDClr } + mov (16) r[a0.4, 0]<2>:ub r[a0.1, 1]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (16) r[a0.4, 32]<2>:ub r[a0.1,33]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (8) r[a0.5, 0]<4>:ub r[a0.2, 1]<32;8,4>:ub { NoDDChk } + mov (8) r[a0.5, 32]<4>:ub r[a0.2,33]<32;8,4>:ub { NoDDChk } + + mov (8) r[a0.6, 288]<4>:ub r[a0.0,65]<32;8,4>:ub { NoDDClr } + mov (8) r[a0.6, 320]<4>:ub r[a0.0,97]<32;8,4>:ub { NoDDClr } + mov (16) r[a0.4,288]<2>:ub r[a0.1,65]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (16) r[a0.4,320]<2>:ub r[a0.1,97]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (8) r[a0.5,288]<4>:ub r[a0.2,65]<32;8,4>:ub { NoDDChk } + mov (8) r[a0.5,320]<4>:ub r[a0.2,97]<32;8,4>:ub { NoDDChk } + + send (1) null<1>:d r28 0xc 0x60A8018:ud + send (1) null<1>:d r37 0xc 0x60A8018:ud + + // restore pointer + add (4) a0.0<1>:uw r22.0<4;4,1>:w 1024:uw + + add (1) r28.1<1>:d r27.1<0;1,0>:d 8:d // Point to 2nd part + add (1) r37.1<1>:d r27.1<0;1,0>:d 10:d // Point to 2nd part + + + mov (8) r[a0.6, 0]<4>:ub r[a0.0, 1]<32;8,4>:ub { NoDDClr } + mov (8) r[a0.6, 32]<4>:ub r[a0.0,33]<32;8,4>:ub { NoDDClr } + mov (16) r[a0.4, 0]<2>:ub r[a0.1, 1]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (16) r[a0.4, 32]<2>:ub r[a0.1,33]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (8) r[a0.5, 0]<4>:ub r[a0.2, 1]<32;8,4>:ub { NoDDChk } + mov (8) r[a0.5, 32]<4>:ub r[a0.2,33]<32;8,4>:ub { NoDDChk } + + mov (8) r[a0.6, 288]<4>:ub r[a0.0,65]<32;8,4>:ub { NoDDClr } + mov (8) r[a0.6, 320]<4>:ub r[a0.0,97]<32;8,4>:ub { NoDDClr } + mov (16) r[a0.4,288]<2>:ub r[a0.1,65]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (16) r[a0.4,320]<2>:ub r[a0.1,97]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (8) r[a0.5,288]<4>:ub r[a0.2,65]<32;8,4>:ub { NoDDChk } + mov (8) r[a0.5,320]<4>:ub r[a0.2,97]<32;8,4>:ub { NoDDChk } + + send (1) null<1>:d r28 0xc 0x60A8018:ud + send (1) null<1>:d r37 0xc 0x60A8018:ud + + // restore pointer + add (4) a0.0<1>:uw r22.0<4;4,1>:w 1536:uw + + add (1) r28.1<1>:d r27.1<0;1,0>:d 12:d // Point to 2nd part + add (1) r37.1<1>:d r27.1<0;1,0>:d 14:d // Point to 2nd part + + mov (8) r[a0.6, 0]<4>:ub r[a0.0, 1]<32;8,4>:ub { NoDDClr } + mov (8) r[a0.6, 32]<4>:ub r[a0.0,33]<32;8,4>:ub { NoDDClr } + mov (16) r[a0.4, 0]<2>:ub r[a0.1, 1]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (16) r[a0.4, 32]<2>:ub r[a0.1,33]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (8) r[a0.5, 0]<4>:ub r[a0.2, 1]<32;8,4>:ub { NoDDChk } + mov (8) r[a0.5, 32]<4>:ub r[a0.2,33]<32;8,4>:ub { NoDDChk } + + mov (8) r[a0.6, 288]<4>:ub r[a0.0,65]<32;8,4>:ub { NoDDClr } + mov (8) r[a0.6, 320]<4>:ub r[a0.0,97]<32;8,4>:ub { NoDDClr } + mov (16) r[a0.4,288]<2>:ub r[a0.1,65]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (16) r[a0.4,320]<2>:ub r[a0.1,97]<32;16,2>:ub { NoDDClr, NoDDChk } + mov (8) r[a0.5,288]<4>:ub r[a0.2,65]<32;8,4>:ub { NoDDChk } + mov (8) r[a0.5,320]<4>:ub r[a0.2,97]<32;8,4>:ub { NoDDChk } + + send (1) null<1>:d r28 0xc 0x60A8018:ud + send (1) null<1>:d r37 0xc 0x60A8018:ud + diff --git a/src/shaders/post_processing/gen8/Save_AVS_PL3.g8a b/src/shaders/post_processing/gen8/Save_AVS_PL3.g8a new file mode 100644 index 0000000..417fd4f --- /dev/null +++ b/src/shaders/post_processing/gen8/Save_AVS_PL3.g8a @@ -0,0 +1,565 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * Author: Zhao Yakui <yakui.zhao@intel.com> + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 84 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// Module name: Save_AVS_PL3.asm +// +// Save PL3 420 frame data block of size 16x16 +// +// To save 16x16 block (16x16 byte of Y and 8x8 byte of U and V each) we need 3 send instructions with one of size 16x16 and two of size 8x8. +// ----------------- +// | 16x16 Y | +// | | +// ----------------- +// | 8x8 U | +// --------- +// | 8x8 V | +// --------- + +//----------------------------------------------------------------- +//The layout of data is as follows: +//mMSGHDR0 : Y data header (16x16) +//mubMSGPAYLOAD0 : Y data payload (8 GRFs) +//mMSGHDR1 : U data header (8x8) +//mubMSGPAYLOAD1 : U data payload (2 GRFs) +//mMSGHDR2 : V data header (8x8) +//mubMSGPAYLOAD2 : V data payload (2 GRFs) +//------------------------------------------------------------------ + + + +// Module name: Save.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + +//Msg payload buffers; upto 4 full-size messages can be written + + +.declare mudMSGPAYLOAD0 Base=r29.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mudMSGPAYLOAD1 Base=r38.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mudMSGPAYLOAD2 Base=r47.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mudMSGPAYLOAD3 Base=r56.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud + +.declare muwMSGPAYLOAD0 Base=r29.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare muwMSGPAYLOAD1 Base=r38.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare muwMSGPAYLOAD2 Base=r47.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare muwMSGPAYLOAD3 Base=r56.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw + +.declare mubMSGPAYLOAD0 Base=r29.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD1 Base=r38.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD2 Base=r47.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD3 Base=r56.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD4 Base=r32.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD5 Base=r41.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD6 Base=r50.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD7 Base=r59.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub + + + // the r17 register (nTEMP0) is originally defined from "Common.inc" + // instead of re-defining a nTEMP0 here, we use "SAVE_RGB" suffix for its naming + + .declare uwTemp0 Base=r17.0 ElementSize=2 Type=uw + + +//_SAVE_INC_ + + + // At the save module we have all 8 address sub-registers available. + // So we will use PING-PONG type of scheme to save the data using + // pointers pBUF_CHNL_TOP_8x4 and pBUF_CHNL_BOT_8x4. This will help + // reduce dependency. - rT + + //wBUFF_CHNL_PTR points to either buffer 0 or buffer 4. + //Add appropriate offsets to get pointers for all buffers (1,2,3 or 5). + //Offsets are zero for buffer 0 and buffer 4. + add (4) a0.0<1>:uw r22.0<4;4,1>:w 0:uw + add (4) a0.4<1>:uw r22.0<4;4,1>:w 512:uw + + //Set up header for Y,U and V data + mov (8) r28<1>:ud r27<8;8,1>:ud + mov (8) r37<1>:ud r27<8;8,1>:ud + mov (8) r46<1>:ud r27<8;8,1>:ud + + mov (2) r28.0<1>:d r7.0<2;2,1>:w { NoDDClr } //ORI Y (LUMA) = ORI + shr (2) r37.0<1>:d r7.0<2;2,1>:w 1:w { NoDDClr } //H/V ORI U = H/V ORI/2 + shr (2) r46.0<1>:d r7.0<2;2,1>:w 1:w { NoDDClr } //H/V ORI V = H/V ORI/2 + + mov (1) r28.2<1>:ud 0xF000F:ud { NoDDChk } // Y Block width and height (16x16) + mov (1) r37.2<1>:ud 0x70007:ud { NoDDChk } // U Block width and height (8x8) + mov (1) r46.2<1>:ud 0x70007:ud { NoDDChk } // V Block width and height (8x8) + +// Unscramble, and pack data directly to MRFs + +// Data 16x16 block is divided as - +// --------- +// | 0 | +// --------- +// | 1 | +// --------- +// | 2 | +// --------- +// | 3 | +// --------- +// All sub-blocks are of size 16x4 +// 0: ubBUFFER_0 +// 1: ubBUFFER_1, ubBUFFER_0+16 +// 2: ubBUFFER_2 +// 3: ubBUFFER_3, ubBUFFER_2+16 + + //Y Rounding, first + add.sat (16) r[a0.1,0]<1>:uw r[a0.1,0]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.1,32]<1>:uw r[a0.1,32]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.1,64]<1>:uw r[a0.1,64]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.1,96]<1>:uw r[a0.1,96]<16;16,1>:uw 0x0080:uw + + // U rounding + add.sat (8) r[a0.2,0]<2>:uw r[a0.2,0]<16;8,2>:uw 0x0080:uw + add.sat (8) r[a0.2,64]<2>:uw r[a0.2,64]<16;8,2>:uw 0x0080:uw + + // V rounding + add.sat (8) r[a0.0,0]<2>:uw r[a0.0,0]<16;8,2>:uw 0x0080:uw + add.sat (8) r[a0.0,64]<2>:uw r[a0.0,64]<16;8,2>:uw 0x0080:uw + + add (4) a0.0<1>:uw r22.0<4;4,1>:w 1024:uw //Update Buffer 2 pointers + + //Y Rounding, second + add.sat (16) r[a0.5,0]<1>:uw r[a0.5,0]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.5,32]<1>:uw r[a0.5,32]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.5,64]<1>:uw r[a0.5,64]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.5,96]<1>:uw r[a0.5,96]<16;16,1>:uw 0x0080:uw + + // U rounding + add.sat (8) r[a0.6,0]<2>:uw r[a0.6,0]<16;8,2>:uw 0x0080:uw + add.sat (8) r[a0.6,64]<2>:uw r[a0.6,64]<16;8,2>:uw 0x0080:uw + + // V rounding + add.sat (8) r[a0.4,0]<2>:uw r[a0.4,0]<16;8,2>:uw 0x0080:uw + add.sat (8) r[a0.4,64]<2>:uw r[a0.4,64]<16;8,2>:uw 0x0080:uw + + add (4) a0.4<1>:uw r22.0<4;4,1>:w 1536:uw //Update Buffer 3 pointers + + //Y Rounding, third + add.sat (16) r[a0.1,0]<1>:uw r[a0.1,0]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.1,32]<1>:uw r[a0.1,32]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.1,64]<1>:uw r[a0.1,64]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.1,96]<1>:uw r[a0.1,96]<16;16,1>:uw 0x0080:uw + + // U rounding + add.sat (8) r[a0.2,0]<2>:uw r[a0.2,0]<16;8,2>:uw 0x0080:uw + add.sat (8) r[a0.2,64]<2>:uw r[a0.2,64]<16;8,2>:uw 0x0080:uw + + // V rounding + add.sat (8) r[a0.0,0]<2>:uw r[a0.0,0]<16;8,2>:uw 0x0080:uw + add.sat (8) r[a0.0,64]<2>:uw r[a0.0,64]<16;8,2>:uw 0x0080:uw + + + //Y Rounding, fourth + add.sat (16) r[a0.5,0]<1>:uw r[a0.5,0]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.5,32]<1>:uw r[a0.5,32]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.5,64]<1>:uw r[a0.5,64]<16;16,1>:uw 0x0080:uw + add.sat (16) r[a0.5,96]<1>:uw r[a0.5,96]<16;16,1>:uw 0x0080:uw + + // U rounding + add.sat (8) r[a0.6,0]<2>:uw r[a0.6,0]<16;8,2>:uw 0x0080:uw + add.sat (8) r[a0.6,64]<2>:uw r[a0.6,64]<16;8,2>:uw 0x0080:uw + + // V rounding + add.sat (8) r[a0.4,0]<2>:uw r[a0.4,0]<16;8,2>:uw 0x0080:uw + add.sat (8) r[a0.4,64]<2>:uw r[a0.4,64]<16;8,2>:uw 0x0080:uw + + // restore the TOP and BOT pointers + add (4) a0.0<1>:uw r22.0<4;4,1>:w 0:uw + add (4) a0.4<1>:uw r22.0<4;4,1>:w 512:uw + +//Buffer 0 +//Move Y to msg payload + mov (16) mubMSGPAYLOAD0(0,0)<1> r[a0.1, 1]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(0,16)<1> r[a0.1, 33]<32;16,2>:ub { NoDDChk } + mov (16) mubMSGPAYLOAD0(1,0)<1> r[a0.1, 65]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(1,16)<1> r[a0.1, 97]<32;16,2>:ub { NoDDChk } + +//Move U to msg payload + mov (8) mubMSGPAYLOAD1(0,0)<1> r[a0.2, 1]<32;8,4>:ub { NoDDClr } + mov (8) mubMSGPAYLOAD1(0,8)<1> r[a0.2, 65]<32;8,4>:ub { NoDDClr, NoDDChk } + +//Move V to msg payload + mov (8) mubMSGPAYLOAD2(0,0)<1> r[a0.0, 1]<32;8,4>:ub { NoDDClr } + mov (8) mubMSGPAYLOAD2(0,8)<1> r[a0.0, 65]<32;8,4>:ub { NoDDClr, NoDDChk } + + add (4) a0.0<1>:uw r22.0<4;4,1>:w 1024:uw //Update Buffer 2 pointers + +//Buffer 1 + mov (16) mubMSGPAYLOAD0(2,0)<1> r[a0.5, 1]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(2,16)<1> r[a0.5, 33]<32;16,2>:ub { NoDDChk } + mov (16) mubMSGPAYLOAD0(3,0)<1> r[a0.5, 65]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(3,16)<1> r[a0.5, 97]<32;16,2>:ub { NoDDChk } + + mov (8) mubMSGPAYLOAD1(0,16)<1> r[a0.6, 1]<32;8,4>:ub { NoDDClr, NoDDChk } + mov (8) mubMSGPAYLOAD1(0,24)<1> r[a0.6, 65]<32;8,4>:ub { NoDDChk } + + mov (8) mubMSGPAYLOAD2(0,16)<1> r[a0.4, 1]<32;8,4>:ub { NoDDClr, NoDDChk } + mov (8) mubMSGPAYLOAD2(0,24)<1> r[a0.4, 65]<32;8,4>:ub { NoDDChk } + + add (4) a0.4<1>:uw r22.0<4;4,1>:w 1536:uw //Update Buffer 3 pointers + +//Buffer 2 + mov (16) mubMSGPAYLOAD0(4,0)<1> r[a0.1, 1]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(4,16)<1> r[a0.1, 33]<32;16,2>:ub { NoDDChk } + mov (16) mubMSGPAYLOAD0(5,0)<1> r[a0.1, 65]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(5,16)<1> r[a0.1, 97]<32;16,2>:ub { NoDDChk } + + mov (8) mubMSGPAYLOAD1(1,0)<1> r[a0.2, 1]<32;8,4>:ub { NoDDClr } + mov (8) mubMSGPAYLOAD1(1,8)<1> r[a0.2, 65]<32;8,4>:ub { NoDDClr, NoDDChk } + + mov (8) mubMSGPAYLOAD2(1,0)<1> r[a0.0, 1]<32;8,4>:ub { NoDDClr } + mov (8) mubMSGPAYLOAD2(1,8)<1> r[a0.0, 65]<32;8,4>:ub { NoDDClr, NoDDChk } + +//Buffer 3 + mov (16) mubMSGPAYLOAD0(6,0)<1> r[a0.5, 1]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(6,16)<1> r[a0.5, 33]<32;16,2>:ub { NoDDChk } + mov (16) mubMSGPAYLOAD0(7,0)<1> r[a0.5, 65]<32;16,2>:ub { NoDDClr } + mov (16) mubMSGPAYLOAD0(7,16)<1> r[a0.5, 97]<32;16,2>:ub { NoDDChk } + + mov (8) mubMSGPAYLOAD1(1,16)<1> r[a0.6, 1]<32;8,4>:ub { NoDDClr, NoDDChk } + mov (8) mubMSGPAYLOAD1(1,24)<1> r[a0.6, 65]<32;8,4>:ub { NoDDChk } + + mov (8) mubMSGPAYLOAD2(1,16)<1> r[a0.4, 1]<32;8,4>:ub { NoDDClr, NoDDChk } + mov (8) mubMSGPAYLOAD2(1,24)<1> r[a0.4, 65]<32;8,4>:ub { NoDDChk } + +//=========================================================================== + +send (1) null<1>:d r28 0xc 0x120A8018:ud +send (1) null<1>:d r37 0xc 0x60A8019:ud +send (1) null<1>:d r46 0xc 0x60A801A:ud diff --git a/src/shaders/post_processing/gen8/Save_AVS_RGBX.g8a b/src/shaders/post_processing/gen8/Save_AVS_RGBX.g8a new file mode 100644 index 0000000..d2df8e4 --- /dev/null +++ b/src/shaders/post_processing/gen8/Save_AVS_RGBX.g8a @@ -0,0 +1,641 @@ +/* + * Copyright 2000-2013 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * Authors: + * Zhao Yakui <yakui.zhao@intel.com> + */ + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// Module name: Save_AVS_RGBX.asm +// +// Save packed ARGB 444 frame data block of size 16x16 +// +// To save 16x16 block (64x16 byte layout for ARGB8888) we need 8 send instructions with 32x4 in each +// -------- +// | 0 | 1 | +// | 2 | 3 | +// | 4 | 5 | +// | 6 | 7 | +// --------- +// the 8 32x4 block send is used + + + +// Module name: Save.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + +//Msg payload buffers; upto 4 full-size messages can be written + + +.declare mudMSGPAYLOAD0 Base=r29.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mudMSGPAYLOAD1 Base=r38.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mudMSGPAYLOAD2 Base=r47.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mudMSGPAYLOAD3 Base=r56.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud + +.declare muwMSGPAYLOAD0 Base=r29.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare muwMSGPAYLOAD1 Base=r38.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare muwMSGPAYLOAD2 Base=r47.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare muwMSGPAYLOAD3 Base=r56.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw + +.declare mubMSGPAYLOAD0 Base=r29.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD1 Base=r38.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD2 Base=r47.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD3 Base=r56.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD4 Base=r32.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD5 Base=r41.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD6 Base=r50.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare mubMSGPAYLOAD7 Base=r59.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub + + + // the r17 register (nTEMP0) is originally defined from "Common.inc" + // instead of re-defining a nTEMP0 here, we use "SAVE_RGB" suffix for its naming + + .declare uwTemp0 Base=r17.0 ElementSize=2 Type=uw + + +//_SAVE_INC_ + + +// At the save module we have all 8 address sub-registers available. +// So we will use PING-PONG type of scheme to save the data using +// pointers pBUF_CHNL_TOP_8x4 and pBUF_CHNL_BOT_8x4. This will help +// reduce dependency. - rT + + //Internal LAYOUT:(RRGGBBAA) + //Assign buffer channel order for Buffer 0123 in the order RGBA a0.3>A, a0.2>B, a0.1>G, a0.0>R + // R = 0, G= 4, B = 8, A = 12. + mov (4) acc0.0<1>:w 0x62EA:v + add (4) acc0.0<1>:w acc0<4;4,1>:w 70:uw + shl (4) r22.0<1>:w acc0<4;4,1>:w 5:uw + + // if channel swap? + // This means that it should be BGRA(B is the LSB) or RGBA + // the internal format is always RGBA(MSB-A-B-G-R). + and.nz.f0.0 null<1>:w r2.3<0;1,0>:uw 0x01:w + +//wBUFF_CHNL_PTR points to either buffer 0 or buffer 4. +//Add appropriate offsets to get pointers for all buffers (1,2,3 or 5). +//Offsets are zero for buffer 0 and buffer 4. + add (4) a0.0<1>:uw r22.0<4;4,1>:w 0:uw + + // pointer swap + (f0.0) mov (1) uwTemp0<1> a0.0<0;1,0>:uw + (f0.0) mov (1) a0.0<1>:uw a0.2<0;1,0>:uw + (f0.0) mov (1) a0.2<1>:uw uwTemp0<0;1,0> + + shl (1) r27.0<1>:d r7.0<0;1,0>:w 2:w { NoDDClr } // H. block origin need to be quadrupled + mov (1) r27.1<1>:d r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Block origin (1st quadrant) + mov (1) r27.2<1>:ud 0x3001F:ud { NoDDChk } // Block width and height (32x4) + + mov (4) a0.4<1>:uw a0.0<4;4,1>:uw + + mov (8) r28<1>:ud r27<8;8,1>:ud + mov (8) r37<1>:ud r27<8;8,1>:ud + mov (8) r46<1>:ud r27<8;8,1>:ud + mov (8) r55<1>:ud r27<8;8,1>:ud + + mov (8) r31<1>:ud r27<8;8,1>:ud + mov (8) r40<1>:ud r27<8;8,1>:ud + mov (8) r49<1>:ud r27<8;8,1>:ud + mov (8) r58<1>:ud r27<8;8,1>:ud + +//Buffer 0/1 are written by using 4 32x4. + + add (1) r37.0<1>:d r27.0<0;1,0>:d 32:d + + add (1) r46.1<1>:d r27.1<0;1,0>:d 4:d + + add (1) r55.1<1>:d r27.1<0;1,0>:d 4:d + add (1) r55.0<1>:d r27.0<0;1,0>:d 32:d + + // write Buf_0 to 1st quarter of four horizontal output blocks + +// Please note the scattered order of NODDCLR, NODDCHK flags. Since the sub-registers +// of destination reg are not updated at one place and hence even flags are scattered. -rT + +/* for block 0 the left part of buffer 0 and 1 */ + mov (8) mubMSGPAYLOAD0(0, 0)<4> r[a0.0, 1]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(0, 1)<4> r[a0.1, 1]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(0, 2)<4> r[a0.2, 1]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(0, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD0(1, 0)<4> r[a0.0, 33]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(1, 1)<4> r[a0.1, 33]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(1, 2)<4> r[a0.2, 33]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(1, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD1(0, 0)<4> r[a0.0, 17]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(0, 1)<4> r[a0.1, 17]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(0, 2)<4> r[a0.2, 17]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(0, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD1(1, 0)<4> r[a0.0, 49]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(1, 1)<4> r[a0.1, 49]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(1, 2)<4> r[a0.2, 49]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(1, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD0(2, 0)<4> r[a0.0, 65]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(2, 1)<4> r[a0.1, 65]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(2, 2)<4> r[a0.2, 65]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(2, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD0(3, 0)<4> r[a0.0, 97]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(3, 1)<4> r[a0.1, 97]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(3, 2)<4> r[a0.2, 97]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(3, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD1(2, 0)<4> r[a0.0, 81]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(2, 1)<4> r[a0.1, 81]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(2, 2)<4> r[a0.2, 81]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(2, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD1(3, 0)<4> r[a0.0, 113]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(3, 1)<4> r[a0.1, 113]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(3, 2)<4> r[a0.2, 113]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(3, 3)<4> r2.31:ub + +/* For Buffer 0 */ + send (16) null<1>:d r28 0xc 0x0A0A8018:ud + send (16) null<1>:d r37 0xc 0x0A0A8018:ud + + add (4) a0.0<1>:uw a0.4<4;4,1>:uw 512:uw + mov (8) mubMSGPAYLOAD2(0, 0)<4> r[a0.0, 1]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(0, 1)<4> r[a0.1, 1]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(0, 2)<4> r[a0.2, 1]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(0, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD2(1, 0)<4> r[a0.0, 33]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(1, 1)<4> r[a0.1, 33]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(1, 2)<4> r[a0.2, 33]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(1, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD3(0, 0)<4> r[a0.0, 17]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(0, 1)<4> r[a0.1, 17]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(0, 2)<4> r[a0.2, 17]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(0, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD3(1, 0)<4> r[a0.0, 49]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(1, 1)<4> r[a0.1, 49]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(1, 2)<4> r[a0.2, 49]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(1, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD2(2, 0)<4> r[a0.0, 65]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(2, 1)<4> r[a0.1, 65]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(2, 2)<4> r[a0.2, 65]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(2, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD2(3, 0)<4> r[a0.0, 97]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(3, 1)<4> r[a0.1, 97]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(3, 2)<4> r[a0.2, 97]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(3, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD3(2, 0)<4> r[a0.0, 81]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(2, 1)<4> r[a0.1, 81]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(2, 2)<4> r[a0.2, 81]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(2, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD3(3, 0)<4> r[a0.0, 113]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(3, 1)<4> r[a0.1, 113]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(3, 2)<4> r[a0.2, 113]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(3, 3)<4> r2.31:ub + // send Buffer 1 + send (16) null<1>:d r46 0xc 0x0A0A8018:ud + send (16) null<1>:d r55 0xc 0x0A0A8018:ud + + +/* for Buffer 2/3 */ + mov (8) r28<1>:ud r27<8;8,1>:ud + mov (8) r37<1>:ud r27<8;8,1>:ud + mov (8) r46<1>:ud r27<8;8,1>:ud + mov (8) r55<1>:ud r27<8;8,1>:ud + + add (1) r28.1<1>:d r27.1<0;1,0>:d 8:d + + add (1) r37.0<1>:d r27.0<0;1,0>:d 32:d + add (1) r37.1<1>:d r27.1<0;1,0>:d 8:d + + add (1) r46.1<1>:d r27.1<0;1,0>:d 12:d + + add (1) r55.1<1>:d r27.1<0;1,0>:d 12:d + add (1) r55.0<1>:d r27.0<0;1,0>:d 32:d + + add (4) a0.0<1>:uw a0.4<4;4,1>:uw 1024:uw + + mov (8) mubMSGPAYLOAD0(0, 0)<4> r[a0.0, 1]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(0, 1)<4> r[a0.1, 1]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(0, 2)<4> r[a0.2, 1]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(0, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD0(1, 0)<4> r[a0.0, 33]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(1, 1)<4> r[a0.1, 33]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(1, 2)<4> r[a0.2, 33]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(1, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD1(0, 0)<4> r[a0.0, 17]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(0, 1)<4> r[a0.1, 17]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(0, 2)<4> r[a0.2, 17]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(0, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD1(1, 0)<4> r[a0.0, 49]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(1, 1)<4> r[a0.1, 49]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(1, 2)<4> r[a0.2, 49]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(1, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD0(2, 0)<4> r[a0.0, 65]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(2, 1)<4> r[a0.1, 65]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(2, 2)<4> r[a0.2, 65]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(2, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD0(3, 0)<4> r[a0.0, 97]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(3, 1)<4> r[a0.1, 97]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(3, 2)<4> r[a0.2, 97]<16;8,2>:ub + mov (8) mubMSGPAYLOAD0(3, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD1(2, 0)<4> r[a0.0, 81]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(2, 1)<4> r[a0.1, 81]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(2, 2)<4> r[a0.2, 81]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(2, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD1(3, 0)<4> r[a0.0, 113]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(3, 1)<4> r[a0.1, 113]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(3, 2)<4> r[a0.2, 113]<16;8,2>:ub + mov (8) mubMSGPAYLOAD1(3, 3)<4> r2.31:ub + +// Send Buffer 2 + send (16) null<1>:d r28 0xc 0x0A0A8018:ud + send (16) null<1>:d r37 0xc 0x0A0A8018:ud + + add (4) a0.0<1>:uw a0.4<4;4,1>:uw 1536:uw + mov (8) mubMSGPAYLOAD2(0, 0)<4> r[a0.0, 1]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(0, 1)<4> r[a0.1, 1]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(0, 2)<4> r[a0.2, 1]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(0, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD2(1, 0)<4> r[a0.0, 33]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(1, 1)<4> r[a0.1, 33]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(1, 2)<4> r[a0.2, 33]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(1, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD3(0, 0)<4> r[a0.0, 17]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(0, 1)<4> r[a0.1, 17]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(0, 2)<4> r[a0.2, 17]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(0, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD3(1, 0)<4> r[a0.0, 49]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(1, 1)<4> r[a0.1, 49]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(1, 2)<4> r[a0.2, 49]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(1, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD2(2, 0)<4> r[a0.0, 65]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(2, 1)<4> r[a0.1, 65]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(2, 2)<4> r[a0.2, 65]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(2, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD2(3, 0)<4> r[a0.0, 97]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(3, 1)<4> r[a0.1, 97]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(3, 2)<4> r[a0.2, 97]<16;8,2>:ub + mov (8) mubMSGPAYLOAD2(3, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD3(2, 0)<4> r[a0.0, 81]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(2, 1)<4> r[a0.1, 81]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(2, 2)<4> r[a0.2, 81]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(2, 3)<4> r2.31:ub + + mov (8) mubMSGPAYLOAD3(3, 0)<4> r[a0.0, 113]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(3, 1)<4> r[a0.1, 113]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(3, 2)<4> r[a0.2, 113]<16;8,2>:ub + mov (8) mubMSGPAYLOAD3(3, 3)<4> r2.31:ub + // send buffer 3 + send (16) null<1>:d r46 0xc 0x0A0A8018:ud + send (16) null<1>:d r55 0xc 0x0A0A8018:ud + + + diff --git a/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_BGRA.g8a b/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_BGRA.g8a new file mode 100644 index 0000000..798564f --- /dev/null +++ b/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_BGRA.g8a @@ -0,0 +1,368 @@ +/* + * Copyright 2000-2013 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Authors: Zhao Yakui <yakui.zhao@intel.com> + */ +// 7 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +//Module Name: Set_AVS_Buf_0123_BGRA.asm + + + +//Module Name: Set_Buf_0123_BGRA + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + //AVS LAYOUT:(UUYYVVAA) + //AVS RGBX LAYOUT (RRGGBBAA) + //Assign buffer channel order for Buffer 0123 in the order AUYV a0.3>A, a0.2>U, a0.1>Y, a0.0>V + // V = 8, Y= 0, U = 4, A = 12. + // And a0.x is used as indirect-register for RGBX. R=a0.1, G=a0.2, B=a0.0 + // B = 8, R= 0, G = 4, A = 12 + mov (4) acc0.0<1>:w 0x6EA2:v + add (4) acc0.0<1>:w acc0<4;4,1>:w 70:uw + shl (4) r22.0<1>:w acc0<4;4,1>:w 5:uw + + //OPT: wAVS_SU_SHUFFLE_PTR_0 and udAVS_SU_SHUFFLE_OFF_0 are sub-regs of same GRF. -rT + + //SU LAYOUT:(VYUAVYUA) + //V = 4, Y = 2, U = 0, A = 6 + //B = 4, G = 2, R = 0, A = 6 + mov (4) acc0.0<1>:w 0x6204:v + add (4) acc0.0<1>:w acc0<4;4,1>:w 64:uw + shl (4) r18.0<1>:w acc0<4;4,1>:w 5:uw { NoDDClr } //Convert to BYTE address. + + //OFFSET: + mov (1) r18.4<1>:ud 0x1000100:ud { NoDDChk } + + diff --git a/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_PL2.g8a b/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_PL2.g8a new file mode 100644 index 0000000..1d38ae2 --- /dev/null +++ b/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_PL2.g8a @@ -0,0 +1,361 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 7 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +//Module Name: Set_AVS_Buf_0123_PL2.asm + + + +//Module Name: Set_Buf_0123_PL2 + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + //AVS LAYOUT: (YYUUVVAA) + //Assign buffer channel order for Buffer 0123 in the order AUYV a0.3>A, a0.2>U, a0.1>Y, a0.0>V + //For PL2-AVS: V = 8, Y= 0, U = 4, A = 12. + mov (4) acc0.0<1>:w 0x6EA2:v //Subtract 6 from 0,4,8,12 + add (4) acc0.0<1>:w acc0<4;4,1>:w 70:uw //add 6 back + shl (4) r22.0<1>:w acc0<4;4,1>:w 5:uw //Convert to BYTE address. + + //OPT: wAVS_SU_SHUFFLE_PTR_0 and udAVS_SU_SHUFFLE_OFF_0 are sub-regs of same GRF. -rT + //SU LAYOUT:(YUVAYUVA) + //V = 4, Y = 0, U = 2, A = 6 + mov (4) acc0.0<1>:w 0x6204:v + add (4) acc0.0<1>:w acc0<4;4,1>:w 64:uw + shl (4) r18.0<1>:w acc0<4;4,1>:w 5:uw { NoDDClr } //Convert to BYTE address. + + //OFFSET: + mov (1) r18.4<1>:ud 0x1000100:ud { NoDDChk } + + diff --git a/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_PL3.g8a b/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_PL3.g8a new file mode 100644 index 0000000..0533666 --- /dev/null +++ b/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_PL3.g8a @@ -0,0 +1,362 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 7 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +//Module Name: Set_AVS_Buf_0123_PL3.asm + + + +//Module Name: Set_Buf_0123_PL3 + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + //AVS LAYOUT: (YYUUVVAA) + //Assign buffer channel order for Buffer 0123 in the order AUYV a0.3>A, a0.2>U, a0.1>Y, a0.0>V + //For PL3-AVS: V = 8, Y= 0, U = 4, A = 12. + mov (4) acc0.0<1>:w 0x6EA2:v + //Subtract 6 from 0,4,8,12 + add (4) acc0.0<1>:w acc0<4;4,1>:w 70:uw //add 6 back + shl (4) r22.0<1>:w acc0<4;4,1>:w 5:uw //Convert to BYTE address. + + //OPT: wAVS_SU_SHUFFLE_PTR_0 and udAVS_SU_SHUFFLE_OFF_0 are sub-regs of same GRF. -rT + //SU LAYOUT:(YUVAYUVA) + //V = 4, Y = 0, U = 2, A = 6 + mov (4) acc0.0<1>:w 0x6204:v + add (4) acc0.0<1>:w acc0<4;4,1>:w 64:uw + shl (4) r18.0<1>:w acc0<4;4,1>:w 5:uw { NoDDClr } //Convert to BYTE address. + + //OFFSET: + mov (1) r18.4<1>:ud 0x1000100:ud { NoDDChk } + + diff --git a/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_VYUA.g8a b/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_VYUA.g8a new file mode 100644 index 0000000..3573e2b --- /dev/null +++ b/src/shaders/post_processing/gen8/Set_AVS_Buf_0123_VYUA.g8a @@ -0,0 +1,366 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 7 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +//Module Name: Set_AVS_Buf_0123_VYUA.asm + + + +//Module Name: Set_Buf_0123_VYUA + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + + //AVS LAYOUT:(VVYYUUAA) + //Assign buffer channel order for Buffer 0123 in the order AUYV a0.3>A, a0.2>U, a0.1>Y, a0.0>V + // V = 0, Y= 4, U = 8, A = 12. + //YCrCb or YCrCb_Swap returns the following data: + //Cr is returned on R-channel. 0 + //Y is returned on G channel. 4 + //Cb is returned on B channel. 8 + mov (4) acc0.0<1>:w 0x62EA:v //Subtract 6 from 0,4,8,12 + add (4) acc0.0<1>:w acc0<4;4,1>:w 70:uw //add 6 back + shl (4) r22.0<1>:w acc0<4;4,1>:w 5:uw //Convert to BYTE address. + + //OPT: wAVS_SU_SHUFFLE_PTR_0 and udAVS_SU_SHUFFLE_OFF_0 are sub-regs of same GRF. -rT + + //SU LAYOUT:(VYUAVYUA) + //V = 0, Y = 2, U = 4, A = 6 + mov (4) acc0.0<1>:w 0x6420:v + add (4) acc0.0<1>:w acc0<4;4,1>:w 64:uw + shl (4) r18.0<1>:w acc0<4;4,1>:w 5:uw { NoDDClr } //Convert to BYTE address. + + //OFFSET: + mov (1) r18.4<1>:ud 0x1000100:ud { NoDDChk } + + diff --git a/src/shaders/post_processing/gen8/Set_Layer_0.g8a b/src/shaders/post_processing/gen8/Set_Layer_0.g8a new file mode 100644 index 0000000..b1b574e --- /dev/null +++ b/src/shaders/post_processing/gen8/Set_Layer_0.g8a @@ -0,0 +1,483 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// 18 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + +#define MSG_AVS_SAMPLE 0x00000000 +#define MSG_CONVOLE_SAMPLE 0x10000000 +#define MSG_MINMAX_SAMPLE 0x20000000 +#define MSG_MINMAXF_SAMPLE 0x30000000 +#define MSG_ERODE_SAMPLE 0x40000000 +#define MSG_DILATE_SAMPLE 0x50000000 +#define MSG_BOOLCENT_SAMPLE 0x60000000 +#define MSG_CENTROID_SAMPLE 0x70000000 + +#define MSG_IEF_BYPASS 0x08000000 +#define MSG_IEF_ENABLE 0x00000000 + +//16x4 or 8x4 or 16x8 or 4x4 +#define MSG_AVS_164 0x00000000 +#define MSG_AVS_84 0x02000000 +#define MSG_AVS_168 0x04000000 +#define MSG_AVS_44 0x06000000 + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + + + + + +//Module name: Set_Layer_N.inc + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + +//Used to generate LABELS at compile time. + + +//definitions for Expand Mask +.declare uwMask_Temp1 Base=r17.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF +.declare ubMask_Temp1 Base=r17.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub // 1 GRF +.declare udMask_Temp1 Base=r17.0 ElementSize=4 Type=ud // 1 GRF +.declare uwMask_Temp2 Base=r16.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF +.declare ubMask_Temp2 Base=r16.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub // 1 GRF +.declare udMask_Temp2 Base=r16.0 ElementSize=4 Type=ud // 1 GRF + +.declare uwMask_Temp3 Base=r15.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF +.declare ubMask_Temp3 Base=r15.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub // 1 GRF + +.declare udALPHA_MASK_REG Base=r21.0 ElementSize=4 Type=ud // 1 GRF +.declare udALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=4 Type=ud // 1 GRF + + +//Initialize mask reg to FFFF + + mov (16) uwALPHA_MASK_REG(0)<1> 0xFFFF:uw + + +//Fast jump for - +//LAYER0: we determine whether layer 0 is to be loaded and processed or not based +// on block mask in module "Set_Layer_0" and store result in f0.1. +// This flag is then directly used to while loading buf0-3 and colorfill. +// (So flag f0.1 should not be changed from Set_Layer_0 till Colorfill) +// +//LAYER1-7: For all other layers, we compute whether layer is to be loaded and processed +// based on block mask in module "Set_Layer_1-7" and store result in SKIP_LAYER +// variable. +// While Loading buf 4 and 5, we move SKIP_LAYER to f0.0 every time and use it +// for Loading. +// For processing though, we move SKIP_LAYER only once to f0.1 in module +// "Set_Buf0_Buf4" and use f0.1 for deciding whether layer 1-7 (all 4 sub blocks) +// is to be processed or not. +// (So flag f0.1) should not be modififed from module "Set_Buf0_Buf4" till module +// that processess sub-block 3). +// +//None of the above fast jumps, apply to CSC modules. We always perform CSC irrespective of mask. +// +//Example: (Without going into finer details) +// Typical Combined kernel: +// +// (let var = decision whether to load/process that layer) +// +// Set_Layer_0 //f0.1 <- var +// .. +// Set_Layer_1 //f0.1 <- var, SKIP_LAYER <- var +// .. +// Load buf 0 //use f0.1 +// Load buf 4 //f0.0 <- SKIP_LAYER +// Load buf 1 //use f0.1 +// Load buf 5 //f0.0 <- SKIP_LAYER +// Load buf 2 //use f0.1 +// Load buf 3 //use f0.1 +// .. +// .. +// Colorfill +// .. +// Set_Buf0_Buf4 //f0.1 <- SKIP_LAYER +// process0-4 //Use f0.1 +// Load buf 4 +// Set_Buf1_Buf5 +// process1-5 +// Load buf 5 +// .. +// Set_Layer_2 //f0.1 <-var, SKIP_LAYER <- var +// .. +// Set_Buf2_Buf4 +// process2-4 +// Load buf 4 +// Set_Buf3_Buf5 +// process3-5 +// Load buf 5 +// .. + + + and (1) r24.2<1>:ub r2.2<0;1,0>:uw 3:uw + + + //Copy all AVS Payload data + // Setup Message Payload Header for 1st block of Media Sampler 8x8 (16x4 for IVB+) + //currently the dx & dy is passed by Constant buffer (zero) + mov (1) r25.0<1>:f r7.6<0;1,0>:f //NLAS dy + mov (1) r25.6<1>:f r7.5<0;1,0>:f //NLAS dx + mov (1) r25.4<1>:f r3.0<0;1,0>:f //Step X + mov (1) r25.5<1>:f r4.0<0;1,0>:f //Step Y + + + mov (1) r25.2<1>:f r6.0<0;1,0>:f //Orig X + mov (1) r25.3<1>:f r5.0<0;1,0>:f //Orig Y + + mov (1) r25.7<1>:ud 0:ud + add (1) r25.7<1>:ud r25.7<0;1,0>:ud MSG_AVS_SAMPLE + MSG_AVS_164 + MSG_IEF_BYPASS:ud + + //NLAS calculations for 2nd half of blocks of Media Sampler 8x8: + // X(i) = X0 + dx*i + ddx*i*(i-1)/2 ==> X(8) = X0 + dx*8 +ddx*28 + // dx(i)= dx(0) + ddx*i ==> dx(8)= dx + ddx*8 + + //OPTIMIZATION: fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY - are sub registers of same GRF. Use NODDCLR NODDCHK. -rT + + // Calculating X(8) + mov (1) acc0.2<1>:f r6.0<0;1,0>:f + mac (1) acc0.2<1>:f r3.0<0;1,0>:f 8.0:f + mac (1) r23.2<1>:f r7.5<0;1,0>:f 28.0:f { NoDDClr } + + // Calculating Y(4) + mul (1) r23.1<1>:f r4.0<0;1,0>:f 4.0:f { NoDDClr, NoDDChk } //dY*4 + + // Calculating dx(8) + mov (1) acc0.4<1>:f r3.0<0;1,0>:f + mac (1) r23.4<1>:f r7.5<0;1,0>:f 8.0:f { NoDDClr, NoDDChk } + + // Binding Index + mov (1) r23.5<1>:ud 0:ud { NoDDChk } + + +SKIP_LAYER_L0: + nop + + diff --git a/src/shaders/post_processing/gen8/VP_Setup.g8a b/src/shaders/post_processing/gen8/VP_Setup.g8a new file mode 100644 index 0000000..95f5fe2 --- /dev/null +++ b/src/shaders/post_processing/gen8/VP_Setup.g8a @@ -0,0 +1,440 @@ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Copyright 2000-2011 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * Authors: Zhao Yakui <yakui.zhao@intel.com> + */ + +// 326 // Total instruction count +// 1 // Total kernel count + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// FileName: VP_Setup.asm +// Author: Vivek Kumar +// Description: Sets up all parameters for the Video Processing Kernel + + + + +// Description: Includes all definitions explicit to Fast Composite. + + + + +// End of common.inc + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud +.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw +.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub +.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + +//Setup pointer to the inline parameter + +// Copy MSG HDR + mov (8) r27.0<1>:ud r0.0<8;8,1>:ud // Initialize message payload header with R0 + +// Only one layer is enough + +//temp; remove it once unread msg warnings are resolved -vK +mov (8) r25<1>:ud r0.0<8;8,1>:ud +mov (8) r26<1>:ud r0.0<8;8,1>:ud + +// Calculate StepX for all layers and overwrite it on the ratio + mul (8) r3.0<1>:f r3.0<8;8,1>:f r7.4<0;1,0>:f //StepX_ratio = StepX / VideoStepX + + //Normalised Ratio of Horizontal step size with main video for all layers now becomes + //Normalised Horizontal step size for all layers + +// Calculate block origin for all layers and overwrite it on the frame origin + mov (2) r8.5<1>:f r7.0<2;2,1>:w //Convert origin from word to float + + cmp.e.f0.0 (1) null<1>:d r2.26<0;1,0>:ub 1:uw + + + shr (1) r17.0<1>:uw r2.2<0;1,0>:uw 0:uw + and (1) r17.0<1>:uw r17.0<0;1,0>:uw 3:uw + cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 1:uw + (f0.1) jmpi (1) ROTATE_90_L0 + cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 2:uw + (f0.1) jmpi (1) ROTATE_180_L0 + cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 3:uw + (f0.1) jmpi (1) ROTATE_270_L0 + + // rotate 0 degree +ROTATE_0_L0: + (-f0.0)mov (1) acc0.0<1>:f r6.0<0;1,0>:f + (-f0.0)mac (1) r6.0<1>:f r3.0<0;1,0>:f r8.5<0;1,0>:f + + mov (1) acc0.0<1>:f r5.0<0;1,0>:f + mac (1) r5.0<1>:f r4.0<0;1,0>:f r8.6<0;1,0>:f + jmpi (1) END_SRC_BLOCK_ORIG_COMP_L0 + + // rotate 90 degree +ROTATE_90_L0: + (-f0.0)mov (1) acc0.0<1>:f r6.0<0;1,0>:f + (-f0.0)mac (1) r6.0<1>:f r3.0<0;1,0>:f r8.6<0;1,0>:f + + mov (1) r16.0<1>:f r2.0<0;1,0>:uw + add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f + add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f + + mov (1) acc0.0<1>:f r5.0<0;1,0>:f + mac (1) r5.0<1>:f r4.0<0;1,0>:f r17.0<0;1,0>:f + jmpi (1) END_SRC_BLOCK_ORIG_COMP_L0 + + // rotate 180 degree +ROTATE_180_L0: + (-f0.0)mov (1) r16.0<1>:f r2.0<0;1,0>:uw + (-f0.0)add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f + (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f + (-f0.0)mov (1) acc0.0<1>:f r6.0<0;1,0>:f + (-f0.0)mac (1) r6.0<1>:f r3.0<0;1,0>:f r17.0<0;1,0>:f + + mov (1) r16.0<1>:f r2.1<0;1,0>:uw + add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f + add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f + mov (1) acc0.0<1>:f r5.0<0;1,0>:f + mac (1) r5.0<1>:f r4.0<0;1,0>:f r17.0<0;1,0>:f + jmpi (1) END_SRC_BLOCK_ORIG_COMP_L0 + + // rotate 270 degree +ROTATE_270_L0: + (-f0.0)mov (1) r16.0<1>:f r2.1<0;1,0>:uw + (-f0.0)add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f + (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f + (-f0.0)mov (1) acc0.0<1>:f r6.0<0;1,0>:f + (-f0.0)mac (1) r6.0<1>:f r3.0<0;1,0>:f r17.0<0;1,0>:f + + mov (1) acc0.0<1>:f r5.0<0;1,0>:f + mac (1) r5.0<1>:f r4.0<0;1,0>:f r8.5<0;1,0>:f + +END_SRC_BLOCK_ORIG_COMP_L0: + nop diff --git a/src/shaders/post_processing/gen8/YUV_to_RGB.g8a b/src/shaders/post_processing/gen8/YUV_to_RGB.g8a new file mode 100644 index 0000000..2b968d8 --- /dev/null +++ b/src/shaders/post_processing/gen8/YUV_to_RGB.g8a @@ -0,0 +1,971 @@ +/* + * Copyright 2000-2013 Intel Corporation All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * Authors: + * Zhao Yakui <yakui.zhao@intel.com> + */ + + + +// Module name: common.inc +// +// Common header file for all Video-Processing kernels +// + +.default_execution_size (16) +.default_register_type :ub + +.reg_count_total 128 +.reg_count_payload 7 + +//========== Common constants ========== + + +//========== Macros ========== + + +//Fast Jump, For more details see "Set_Layer_N.asm" + + +//========== Defines ==================== + +//========== Static Parameters (Common To All) ========== +//r1 + + +//r2 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + +//Color Pipe (IECP) parameters + + +//ByteCopy + + +//r4 + + // e.g. byte0 byte1 byte2 + // YUYV 0 1 3 + // YVYU 0 3 1 + + +//========== Inline parameters (Common To All) =========== + + +//============== Binding Index Table=========== +//Common between DNDI and DNUV + + +//================= Common Message Descriptor ===== +// Message descriptor for thread spawning +// Message Descriptors +// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) +// 0000,0000,0000 +// 0001(Spawn a root thread),0001 (Root thread spawn thread) +// = 0x02000011 +// Thread Spawner Message Descriptor + + +// Message descriptor for atomic operation add +// Message Descriptors +// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) +// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) +// 0000,0000 (Binding table index, added later) +// = 0x02000011 + +// Atomic Operation Add Message Descriptor + + +// Message descriptor for dataport media write + // Message Descriptors + // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) + // 1 (header present 1) 0 1010 (media block write) 000000 + // 00000000 (binding table index - set later) + // = 0x020A8000 + + +// Message Length defines + + +// Response Length defines + + +// Block Width and Height Size defines + + +// Extended Message Descriptors + + +// Common message descriptors: + + +//===================== Math Function Control =================================== + + +//============ Message Registers =============== + // buf4 starts from r28 + + +//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT + + +.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub +.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw +.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud +.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f + +//=================== End of thread instruction =========================== + + +//=====================Pointers Used===================================== + + +//======================================================================= + + +//r9-r17 +// Define temp space for any usages + + +// Common Buffers + + +// temp space for rotation + +.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud + +.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw + +.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub + + +// End of common.inc + + +// Module name: YUV_to_RGB.asm +// +// Convert YUV to RGB, handle it by 16x4 block +// + + +// Description: Includes all definitions explicit to Fast Composite. + + +//========== GRF partition ========== + // r0 header : r0 (1 GRF) + // Static parameters : r1 - r6 (6 GRFS) + // Inline parameters : r7 - r8 (2 GRFs) + // MSGSRC : r27 (1 GRF) +//=================================== + +//Interface: +//========== Static Parameters (Explicit To Fast Composite) ========== +//r1 +//CSC Set 0 + + +.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud + +//Constant alpha + + +//r2 + + +// Gen7 AVS WA + + +// WiDi Definitions + + +//Colorfill + + + // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. + +.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub + +//r3 +//Normalised Ratio of Horizontal step size with main video for all layers + + + //Normalised Ratio of Horizontal step size with main video for all layers becomes + //Normalised Horizontal step size for all layers in VP_Setup.asm + + +//r4 +//Normalised Vertical step size for all layers + + +//r5 +//Normalised Vertical Frame Origin for all layers + + +//r6 +//Normalised Horizontal Frame Origin for all layers + + +//========== Inline Parameters (Explicit To Fast Composite) ========== + + +//Main video Step X + + +//====================== Binding table (Explicit To Fast Composite)========================================= + + +//Used by Interlaced Scaling Kernels + + +//========== Sampler State Table Index (Explicit To Fast Composite)========== +//Sampler Index for AVS/IEF messages + + +//Sampler Index for SIMD16 sampler messages + + +//============================================================================= + +.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f +.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f + +.declare bBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare bBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare bBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub +.declare bBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub + +//Pointer to mask reg + + +//r18 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF + +//r19 + + +.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF + + +//r20 + +.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r21 + +.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF + +//r22 + + +//Always keep Cannel Pointers and Offsets in same GRF, so that we can use +// NODDCLR, NODDCHK flags. -rT + + +//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as +//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT + +//r23 + + +//Lumakey + + +//r24 + + +//r25 + + +//r26 + + +//defines to generate LABELS during compile time. + + +//Msg payload buffers; upto 4 full-size messages can be written + +//Unnecessary to use the MSGPayLoad, So it is temporiarily used for conversion of YUV->RGB + +.declare fBUFFER_R Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> Type=f +.declare fBUFFER_G Base=r30.0 ElementSize=4 SrcRegion=<8;8,1> Type=f +.declare fBUFFER_B Base=r32.0 ElementSize=4 SrcRegion=<8;8,1> Type=f + +.declare fBUFFER_Y Base=r36.0 ElementSize=4 SrcRegion=<8;8,1> Type=f +.declare fBUFFER_U Base=r38.0 ElementSize=4 SrcRegion=<8;8,1> Type=f +.declare fBUFFER_V Base=r40.0 ElementSize=4 SrcRegion=<8;8,1> Type=f + + +.declare wTempR Base=r42.0 ElementSize=2 Type=w +.declare wTempG Base=r44.0 ElementSize=2 Type=w +.declare wTempB Base=r46.0 ElementSize=2 Type=w + +.declare ubTempR Base=r42.0 ElementSize=1 Type=ub +.declare ubTempG Base=r44.0 ElementSize=1 Type=ub +.declare ubTempB Base=r46.0 ElementSize=1 Type=ub + + // the r17 register (nTEMP0) is originally defined from "Common.inc" + // instead of re-defining a nTEMP0 here, we use "SAVE_RGB" suffix for its naming + + .declare wTemp0 Base=r17.0 ElementSize=2 Type=uw + + +//_SAVE_INC_ + // NTSC standard + // R = Clamp ( 1.164(Y-16/255) + 1.596(Cr-128/255)) + // G = Clamp ( 1.164(Y-16/255) - 0.813(Cr-128/255) - 0.392(Cb-128/255)) + // B = Clamp ( 1.164(Y-16/255) + 2.017(Cb-128/255)) + // ITU-R conversion, Now we are using ITU-R conversion + // R = clip( Y + 1.402*(Cr-128)) // ITU-R + // G = clip( Y - 0.344*(Cb-128) - 0.714*(Cr-128)) + // B = clip( Y + 1.772*(Cb-128)) + + // At the save module we have all 8 address sub-registers available. + // So we will use PING-PONG type of scheme to save the data using + // pointers pBUF_CHNL_TOP_8x4 and pBUF_CHNL_BOT_8x4. This will help + // reduce dependency. - rT + + //wBUFF_CHNL_PTR points to either buffer 0 or buffer 4. + //Add appropriate offsets to get pointers for all buffers (1,2,3 or 5). + //Offsets are zero for buffer 0 and buffer 4. + //Y/U/V is also stored as R/G/B for the internal purpose +//for BUFFER_0 + mov (4) a0.0<1>:uw r22.0<4;4,1>:uw +//the first line in the block 0 + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 1]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 17]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 1]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 17]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 1]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 17]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + + mov (16) r[a0.1,1]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,1]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,1]<2>:ub ubTempB(0, 0)<32;8,4> + +//the second line in the block 0 + + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 33]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 49]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 33]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 49]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 33]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 49]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + mov (16) r[a0.1,33]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,33]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,33]<2>:ub ubTempB(0, 0)<32;8,4> + +//the third line in the block 0 + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 65]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 81]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 65]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 81]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 65]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 81]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + mov (16) r[a0.1,65]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,65]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,65]<2>:ub ubTempB(0, 0)<32;8,4> + +//the fourth line in the block 0 + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 97]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 113]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 97]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 113]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 97]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 113]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + mov (16) r[a0.1,97]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,97]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,97]<2>:ub ubTempB(0, 0)<32;8,4> + + +//for BUFFER_1 + add (4) a0.0<1>:uw r22.0<4;4,1>:uw 512:uw +//the first line in the block 1 + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 1]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 17]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 1]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 17]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 1]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 17]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + + mov (16) r[a0.1,1]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,1]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,1]<2>:ub ubTempB(0, 0)<32;8,4> + +//the second line in the block 1 + + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 33]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 49]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 33]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 49]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 33]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 49]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + mov (16) r[a0.1,33]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,33]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,33]<2>:ub ubTempB(0, 0)<32;8,4> + +//the third line in the block 1 + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 65]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 81]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 65]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 81]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 65]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 81]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + mov (16) r[a0.1,65]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,65]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,65]<2>:ub ubTempB(0, 0)<32;8,4> + +//the fourth line in the block 1 + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 97]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 113]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 97]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 113]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 97]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 113]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + mov (16) r[a0.1,97]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,97]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,97]<2>:ub ubTempB(0, 0)<32;8,4> + + +//for BUFFER_2 + add (4) a0.0<1>:uw r22.0<4;4,1>:uw 1024:uw +//the first line in the block 2 + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 1]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 17]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 1]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 17]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 1]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 17]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + + mov (16) r[a0.1,1]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,1]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,1]<2>:ub ubTempB(0, 0)<32;8,4> + +//the second line in the block 2 + + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 33]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 49]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 33]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 49]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 33]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 49]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + mov (16) r[a0.1,33]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,33]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,33]<2>:ub ubTempB(0, 0)<32;8,4> + +//the third line in the block 2 + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 65]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 81]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 65]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 81]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 65]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 81]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + mov (16) r[a0.1,65]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,65]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,65]<2>:ub ubTempB(0, 0)<32;8,4> + +//the fourth line in the block 2 + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 97]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 113]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 97]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 113]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 97]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 113]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + mov (16) r[a0.1,97]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,97]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,97]<2>:ub ubTempB(0, 0)<32;8,4> + + +//for BUFFER_3 + add (4) a0.0<1>:uw r22.0<4;4,1>:uw 1536:uw +//the first line in the block 3 + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 1]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 17]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 1]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 17]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 1]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 17]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + + mov (16) r[a0.1,1]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,1]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,1]<2>:ub ubTempB(0, 0)<32;8,4> + +//the second line in the block 3 + + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 33]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 49]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 33]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 49]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 33]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 49]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + mov (16) r[a0.1,33]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,33]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,33]<2>:ub ubTempB(0, 0)<32;8,4> + +//the third line in the block 3 + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 65]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 81]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 65]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 81]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 65]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 81]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + mov (16) r[a0.1,65]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,65]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,65]<2>:ub ubTempB(0, 0)<32;8,4> + +//the fourth line in the block 3 + mov (8) fBUFFER_Y(0, 0)<1> r[a0.1, 97]<16;8,2>:ub + mov (8) fBUFFER_Y(1, 0)<1> r[a0.1, 113]<16;8,2>:ub + mov (8) fBUFFER_U(0, 0)<1> r[a0.2, 97]<16;8,2>:ub + mov (8) fBUFFER_U(1, 0)<1> r[a0.2, 113]<16;8,2>:ub + mov (8) fBUFFER_V(0, 0)<1> r[a0.0, 97]<16;8,2>:ub + mov (8) fBUFFER_V(1, 0)<1> r[a0.0, 113]<16;8,2>:ub + + add (16) fBUFFER_U(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> -128.0f + add (16) fBUFFER_V(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -128.0f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_R(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 1.402f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (8) acc0.0<1>:f fBUFFER_U(0, 0)<8;8,1> -0.344f + mac (8) acc1.0<1>:f fBUFFER_U(1, 0)<8;8,1> -0.344f + mac (16) fBUFFER_G(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> -0.714f + + mov (16) acc0.0<1>:f fBUFFER_Y(0, 0)<8;8,1> + mac (16) fBUFFER_B(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 1.772f + + mul.sat (16) fBUFFER_Y(0, 0)<1> fBUFFER_R(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_U(0, 0)<1> fBUFFER_G(0, 0)<8;8,1> 0.0039216f + mul.sat (16) fBUFFER_V(0, 0)<1> fBUFFER_B(0, 0)<8;8,1> 0.0039216f + + mul (16) fBUFFER_R(0, 0)<1> fBUFFER_Y(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_G(0, 0)<1> fBUFFER_U(0, 0)<8;8,1> 255.0f + mul (16) fBUFFER_B(0, 0)<1> fBUFFER_V(0, 0)<8;8,1> 255.0f + + mov (16) wTempR(0, 0)<2> fBUFFER_R(0, 0)<8;8,1> + mov (16) wTempG(0, 0)<2> fBUFFER_G(0, 0)<8;8,1> + mov (16) wTempB(0, 0)<2> fBUFFER_B(0, 0)<8;8,1> + mov (16) r[a0.1,97]<2>:ub ubTempR(0, 0)<32;8,4> + mov (16) r[a0.2,97]<2>:ub ubTempG(0, 0)<32;8,4> + mov (16) r[a0.0,97]<2>:ub ubTempB(0, 0)<32;8,4> + diff --git a/src/shaders/post_processing/gen8/pa_to_pa.asm b/src/shaders/post_processing/gen8/pa_to_pa.asm new file mode 100644 index 0000000..44e3b35 --- /dev/null +++ b/src/shaders/post_processing/gen8/pa_to_pa.asm @@ -0,0 +1,17 @@ +// Module name: AVS +.kernel YUY2_TO_NV12 +.code + +#include "VP_Setup.g8a" +#include "Set_Layer_0.g8a" +#include "Set_AVS_Buf_0123_VYUA.g8a" +#include "PA_AVS_Buf_0.g8a" +#include "PA_AVS_Buf_1.g8a" +#include "PA_AVS_Buf_2.g8a" +#include "PA_AVS_Buf_3.g8a" +#include "Save_AVS_PA.g8a" +#include "EOT.g8a" + +.end_code + +.end_kernel diff --git a/src/shaders/post_processing/gen8/pa_to_pa.g8b b/src/shaders/post_processing/gen8/pa_to_pa.g8b new file mode 100644 index 0000000..76fe27a --- /dev/null +++ b/src/shaders/post_processing/gen8/pa_to_pa.g8b @@ -0,0 +1,279 @@ + { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 }, + { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 }, + { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 }, + { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 }, + { 0x00000008, 0x22201248, 0x16000044, 0x00000000 }, + { 0x00000005, 0x22201248, 0x16000220, 0x00030003 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00010001 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00020002 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00030003 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 }, + { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 }, + { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 }, + { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff }, + { 0x00000005, 0x23021288, 0x16000044, 0x00030003 }, + { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 }, + { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 }, + { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 }, + { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 }, + { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 }, + { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 }, + { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 }, + { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 }, + { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 }, + { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 }, + { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 }, + { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 }, + { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 }, + { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x24003660, 0x30000000, 0x000062ea }, + { 0x00400040, 0x24001860, 0x16690400, 0x00460046 }, + { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006420 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00400040 }, + { 0x00400209, 0x22401868, 0x16690400, 0x00050005 }, + { 0x00000401, 0x22500608, 0x00000000, 0x01000100 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x28002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000001 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000002 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000003 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22082260, 0x1669005c, 0x03a003a0 }, + { 0x00000209, 0x23601a28, 0x1e0000e0, 0x00010001 }, + { 0x00000601, 0x23641a28, 0x000000e2, 0x00000000 }, + { 0x00000401, 0x23680608, 0x00000000, 0x0001001f }, + { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 }, + { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 }, + { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 }, + { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 }, + { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 }, + { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 }, + { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 }, + { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 }, + { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 }, + { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 }, + { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 }, + { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 }, + { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 }, + { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 }, + { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 }, + { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 }, + { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 }, + { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 }, + { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 }, + { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 }, + { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 }, + { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 }, + { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 }, + { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x02000200 }, + { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 }, + { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 }, + { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 }, + { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 }, + { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 }, + { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 }, + { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 }, + { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 }, + { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 }, + { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 }, + { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 }, + { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 }, + { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 }, + { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 }, + { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 }, + { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 }, + { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 }, + { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 }, + { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 }, + { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 }, + { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 }, + { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 }, + { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 }, + { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 }, + { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 }, + { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 }, + { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 }, + { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 }, + { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 }, + { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 }, + { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 }, + { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 }, + { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 }, + { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 }, + { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 }, + { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 }, + { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 }, + { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 }, + { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 }, + { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 }, + { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 }, + { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 }, + { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 }, + { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 }, + { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 }, + { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 }, + { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x06000600 }, + { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 }, + { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 }, + { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 }, + { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 }, + { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 }, + { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 }, + { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 }, + { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 }, + { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 }, + { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 }, + { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 }, + { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 }, + { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 }, + { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 }, + { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 }, + { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 }, + { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 }, + { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 }, + { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 }, + { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 }, + { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 }, + { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 }, + { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 }, + { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x08000800 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 }, + { 0x00000040, 0x24a40a28, 0x0e000364, 0x00000002 }, + { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 }, + { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 }, + { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 }, + { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 }, + { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 }, + { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 }, + { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 }, + { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 }, + { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 }, + { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 }, + { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 }, + { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x02000200 }, + { 0x00000040, 0x23840a28, 0x0e000364, 0x00000004 }, + { 0x00000040, 0x24a40a28, 0x0e000364, 0x00000006 }, + { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 }, + { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 }, + { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 }, + { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 }, + { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 }, + { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 }, + { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 }, + { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 }, + { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 }, + { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 }, + { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 }, + { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x00000040, 0x23840a28, 0x0e000364, 0x00000008 }, + { 0x00000040, 0x24a40a28, 0x0e000364, 0x0000000a }, + { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 }, + { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 }, + { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 }, + { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 }, + { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 }, + { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 }, + { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 }, + { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 }, + { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 }, + { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 }, + { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 }, + { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x06000600 }, + { 0x00000040, 0x23840a28, 0x0e000364, 0x0000000c }, + { 0x00000040, 0x24a40a28, 0x0e000364, 0x0000000e }, + { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 }, + { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 }, + { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 }, + { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 }, + { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 }, + { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 }, + { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 }, + { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 }, + { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 }, + { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 }, + { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 }, + { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 }, + { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 }, diff --git a/src/shaders/post_processing/gen8/pa_to_pl2.asm b/src/shaders/post_processing/gen8/pa_to_pl2.asm new file mode 100644 index 0000000..adc81fd --- /dev/null +++ b/src/shaders/post_processing/gen8/pa_to_pl2.asm @@ -0,0 +1,17 @@ +// Module name: AVS +.kernel YUY2_TO_NV12 +.code + +#include "VP_Setup.g8a" +#include "Set_Layer_0.g8a" +#include "Set_AVS_Buf_0123_VYUA.g8a" +#include "PA_AVS_Buf_0.g8a" +#include "PA_AVS_Buf_1.g8a" +#include "PA_AVS_Buf_2.g8a" +#include "PA_AVS_Buf_3.g8a" +#include "Save_AVS_NV12.g8a" +#include "EOT.g8a" + +.end_code + +.end_kernel diff --git a/src/shaders/post_processing/gen8/pa_to_pl2.g8b b/src/shaders/post_processing/gen8/pa_to_pl2.g8b new file mode 100644 index 0000000..3282c51 --- /dev/null +++ b/src/shaders/post_processing/gen8/pa_to_pl2.g8b @@ -0,0 +1,236 @@ + { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 }, + { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 }, + { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 }, + { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 }, + { 0x00000008, 0x22201248, 0x16000044, 0x00000000 }, + { 0x00000005, 0x22201248, 0x16000220, 0x00030003 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00010001 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00020002 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00030003 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 }, + { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 }, + { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 }, + { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff }, + { 0x00000005, 0x23021288, 0x16000044, 0x00030003 }, + { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 }, + { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 }, + { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 }, + { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 }, + { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 }, + { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 }, + { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 }, + { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 }, + { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 }, + { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 }, + { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 }, + { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 }, + { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 }, + { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x24003660, 0x30000000, 0x000062ea }, + { 0x00400040, 0x24001860, 0x16690400, 0x00460046 }, + { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006420 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00400040 }, + { 0x00400209, 0x22401868, 0x16690400, 0x00050005 }, + { 0x00000401, 0x22500608, 0x00000000, 0x01000100 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x28002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000001 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000002 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000003 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 }, + { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 }, + { 0x00200201, 0x23801a28, 0x004500e0, 0x00000000 }, + { 0x00000201, 0x24a01a28, 0x000000e0, 0x00000000 }, + { 0x00000608, 0x24a41a28, 0x1e0000e2, 0x00010001 }, + { 0x00000401, 0x23880608, 0x00000000, 0x000f000f }, + { 0x00000401, 0x24a80608, 0x00000000, 0x0007000f }, + { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 }, + { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 }, + { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 }, + { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8400, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8420, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8440, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8460, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xc4001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xc4401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8000, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8020, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8040, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8060, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc0001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc0401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 }, + { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 }, + { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 }, + { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8c00, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8c20, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8c40, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8c60, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xcc001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xcc401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8800, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8820, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8840, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8860, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc8001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc8401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 }, + { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 }, + { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 }, + { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8400, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8420, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8440, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8460, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xc4001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xc4401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8000, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8020, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8040, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8060, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc0001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc0401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 }, + { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 }, + { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 }, + { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8c00, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8c20, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8c40, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8c60, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xcc001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xcc401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8800, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8820, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8840, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8860, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc8001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc8401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 }, + { 0x00800201, 0x23a02288, 0x00d28201, 0x00000000 }, + { 0x00800401, 0x23b02288, 0x00d28221, 0x00000000 }, + { 0x00800201, 0x23c02288, 0x00d28241, 0x00000000 }, + { 0x00800401, 0x23d02288, 0x00d28261, 0x00000000 }, + { 0x00600201, 0x44c02288, 0x00cf8401, 0x00000000 }, + { 0x00600601, 0x44d02288, 0x00cf8441, 0x00000000 }, + { 0x00600601, 0x44c12288, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0x44d12288, 0x00cf8041, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x00800201, 0x23e02288, 0x00d28a01, 0x00000000 }, + { 0x00800401, 0x23f02288, 0x00d28a21, 0x00000000 }, + { 0x00800201, 0x24002288, 0x00d28a41, 0x00000000 }, + { 0x00800401, 0x24102288, 0x00d28a61, 0x00000000 }, + { 0x00600201, 0x44e02288, 0x00cf8c01, 0x00000000 }, + { 0x00600601, 0x44f02288, 0x00cf8c41, 0x00000000 }, + { 0x00600601, 0x44e12288, 0x00cf8801, 0x00000000 }, + { 0x00600401, 0x44f12288, 0x00cf8841, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x00800201, 0x24202288, 0x00d28201, 0x00000000 }, + { 0x00800401, 0x24302288, 0x00d28221, 0x00000000 }, + { 0x00800201, 0x24402288, 0x00d28241, 0x00000000 }, + { 0x00800401, 0x24502288, 0x00d28261, 0x00000000 }, + { 0x00600201, 0x45002288, 0x00cf8401, 0x00000000 }, + { 0x00600601, 0x45102288, 0x00cf8441, 0x00000000 }, + { 0x00600601, 0x45012288, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0x45112288, 0x00cf8041, 0x00000000 }, + { 0x00800201, 0x24602288, 0x00d28a01, 0x00000000 }, + { 0x00800401, 0x24702288, 0x00d28a21, 0x00000000 }, + { 0x00800201, 0x24802288, 0x00d28a41, 0x00000000 }, + { 0x00800401, 0x24902288, 0x00d28a61, 0x00000000 }, + { 0x00600201, 0x45202288, 0x00cf8c01, 0x00000000 }, + { 0x00600601, 0x45302288, 0x00cf8c41, 0x00000000 }, + { 0x00600601, 0x45212288, 0x00cf8801, 0x00000000 }, + { 0x00600401, 0x45312288, 0x00cf8841, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x120a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x0a0a8019 }, + { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 }, diff --git a/src/shaders/post_processing/gen8/pa_to_pl3.asm b/src/shaders/post_processing/gen8/pa_to_pl3.asm new file mode 100644 index 0000000..44c7f9e --- /dev/null +++ b/src/shaders/post_processing/gen8/pa_to_pl3.asm @@ -0,0 +1,17 @@ +// Module name: AVS +.kernel YUY2_TO_NV12 +.code + +#include "VP_Setup.g8a" +#include "Set_Layer_0.g8a" +#include "Set_AVS_Buf_0123_VYUA.g8a" +#include "PA_AVS_Buf_0.g8a" +#include "PA_AVS_Buf_1.g8a" +#include "PA_AVS_Buf_2.g8a" +#include "PA_AVS_Buf_3.g8a" +#include "Save_AVS_PL3.g8a" +#include "EOT.g8a" + +.end_code + +.end_kernel diff --git a/src/shaders/post_processing/gen8/pa_to_pl3.g8b b/src/shaders/post_processing/gen8/pa_to_pl3.g8b new file mode 100644 index 0000000..3d1d087 --- /dev/null +++ b/src/shaders/post_processing/gen8/pa_to_pl3.g8b @@ -0,0 +1,189 @@ + { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 }, + { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 }, + { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 }, + { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 }, + { 0x00000008, 0x22201248, 0x16000044, 0x00000000 }, + { 0x00000005, 0x22201248, 0x16000220, 0x00030003 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00010001 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00020002 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00030003 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 }, + { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 }, + { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 }, + { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff }, + { 0x00000005, 0x23021288, 0x16000044, 0x00030003 }, + { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 }, + { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 }, + { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 }, + { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 }, + { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 }, + { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 }, + { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 }, + { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 }, + { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 }, + { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 }, + { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 }, + { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 }, + { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 }, + { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x24003660, 0x30000000, 0x000062ea }, + { 0x00400040, 0x24001860, 0x16690400, 0x00460046 }, + { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006420 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00400040 }, + { 0x00400209, 0x22401868, 0x16690400, 0x00050005 }, + { 0x00000401, 0x22500608, 0x00000000, 0x01000100 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x28002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000001 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000002 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000003 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 }, + { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x25c00208, 0x008d0360, 0x00000000 }, + { 0x00200201, 0x23801a28, 0x004500e0, 0x00000000 }, + { 0x00200208, 0x24a01a28, 0x1e4500e0, 0x00010001 }, + { 0x00200208, 0x25c01a28, 0x1e4500e0, 0x00010001 }, + { 0x00000401, 0x23880608, 0x00000000, 0x000f000f }, + { 0x00000401, 0x24a80608, 0x00000000, 0x00070007 }, + { 0x00000401, 0x25c80608, 0x00000000, 0x00070007 }, + { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 }, + { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 }, + { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 }, + { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 }, + { 0x80600040, 0xc4001248, 0x16ae8400, 0x00800080 }, + { 0x80600040, 0xc4401248, 0x16ae8440, 0x00800080 }, + { 0x80600040, 0xc0001248, 0x16ae8000, 0x00800080 }, + { 0x80600040, 0xc0401248, 0x16ae8040, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 }, + { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 }, + { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 }, + { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 }, + { 0x80600040, 0xcc001248, 0x16ae8c00, 0x00800080 }, + { 0x80600040, 0xcc401248, 0x16ae8c40, 0x00800080 }, + { 0x80600040, 0xc8001248, 0x16ae8800, 0x00800080 }, + { 0x80600040, 0xc8401248, 0x16ae8840, 0x00800080 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 }, + { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 }, + { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 }, + { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 }, + { 0x80600040, 0xc4001248, 0x16ae8400, 0x00800080 }, + { 0x80600040, 0xc4401248, 0x16ae8440, 0x00800080 }, + { 0x80600040, 0xc0001248, 0x16ae8000, 0x00800080 }, + { 0x80600040, 0xc0401248, 0x16ae8040, 0x00800080 }, + { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 }, + { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 }, + { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 }, + { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 }, + { 0x80600040, 0xcc001248, 0x16ae8c00, 0x00800080 }, + { 0x80600040, 0xcc401248, 0x16ae8c40, 0x00800080 }, + { 0x80600040, 0xc8001248, 0x16ae8800, 0x00800080 }, + { 0x80600040, 0xc8401248, 0x16ae8840, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 }, + { 0x00800201, 0x23a02288, 0x00d28201, 0x00000000 }, + { 0x00800401, 0x23b02288, 0x00d28221, 0x00000000 }, + { 0x00800201, 0x23c02288, 0x00d28241, 0x00000000 }, + { 0x00800401, 0x23d02288, 0x00d28261, 0x00000000 }, + { 0x00600201, 0x24c02288, 0x00cf8401, 0x00000000 }, + { 0x00600601, 0x24c82288, 0x00cf8441, 0x00000000 }, + { 0x00600201, 0x25e02288, 0x00cf8001, 0x00000000 }, + { 0x00600601, 0x25e82288, 0x00cf8041, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x00800201, 0x23e02288, 0x00d28a01, 0x00000000 }, + { 0x00800401, 0x23f02288, 0x00d28a21, 0x00000000 }, + { 0x00800201, 0x24002288, 0x00d28a41, 0x00000000 }, + { 0x00800401, 0x24102288, 0x00d28a61, 0x00000000 }, + { 0x00600601, 0x24d02288, 0x00cf8c01, 0x00000000 }, + { 0x00600401, 0x24d82288, 0x00cf8c41, 0x00000000 }, + { 0x00600601, 0x25f02288, 0x00cf8801, 0x00000000 }, + { 0x00600401, 0x25f82288, 0x00cf8841, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x00800201, 0x24202288, 0x00d28201, 0x00000000 }, + { 0x00800401, 0x24302288, 0x00d28221, 0x00000000 }, + { 0x00800201, 0x24402288, 0x00d28241, 0x00000000 }, + { 0x00800401, 0x24502288, 0x00d28261, 0x00000000 }, + { 0x00600201, 0x24e02288, 0x00cf8401, 0x00000000 }, + { 0x00600601, 0x24e82288, 0x00cf8441, 0x00000000 }, + { 0x00600201, 0x26002288, 0x00cf8001, 0x00000000 }, + { 0x00600601, 0x26082288, 0x00cf8041, 0x00000000 }, + { 0x00800201, 0x24602288, 0x00d28a01, 0x00000000 }, + { 0x00800401, 0x24702288, 0x00d28a21, 0x00000000 }, + { 0x00800201, 0x24802288, 0x00d28a41, 0x00000000 }, + { 0x00800401, 0x24902288, 0x00d28a61, 0x00000000 }, + { 0x00600601, 0x24f02288, 0x00cf8c01, 0x00000000 }, + { 0x00600401, 0x24f82288, 0x00cf8c41, 0x00000000 }, + { 0x00600601, 0x26102288, 0x00cf8801, 0x00000000 }, + { 0x00600401, 0x26182288, 0x00cf8841, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x120a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8019 }, + { 0x0c000031, 0x20002220, 0x060005c0, 0x060a801a }, + { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 }, diff --git a/src/shaders/post_processing/gen8/pl2_to_pa.asm b/src/shaders/post_processing/gen8/pl2_to_pa.asm new file mode 100644 index 0000000..55d9ced --- /dev/null +++ b/src/shaders/post_processing/gen8/pl2_to_pa.asm @@ -0,0 +1,17 @@ +// Module name: AVS +.kernel PL2_TO_PA +.code + +#include "VP_Setup.g8a" +#include "Set_Layer_0.g8a" +#include "Set_AVS_Buf_0123_PL2.g8a" +#include "PL2_AVS_Buf_0.g8a" +#include "PL2_AVS_Buf_1.g8a" +#include "PL2_AVS_Buf_2.g8a" +#include "PL2_AVS_Buf_3.g8a" +#include "Save_AVS_PA.g8a" +#include "EOT.g8a" + +.end_code + +.end_kernel diff --git a/src/shaders/post_processing/gen8/pl2_to_pa.g8b b/src/shaders/post_processing/gen8/pl2_to_pa.g8b new file mode 100644 index 0000000..0c0cda1 --- /dev/null +++ b/src/shaders/post_processing/gen8/pl2_to_pa.g8b @@ -0,0 +1,287 @@ + { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 }, + { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 }, + { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 }, + { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 }, + { 0x00000008, 0x22201248, 0x16000044, 0x00000000 }, + { 0x00000005, 0x22201248, 0x16000220, 0x00030003 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00010001 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00020002 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00030003 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 }, + { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 }, + { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 }, + { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff }, + { 0x00000005, 0x23021288, 0x16000044, 0x00030003 }, + { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 }, + { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 }, + { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 }, + { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 }, + { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 }, + { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 }, + { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 }, + { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 }, + { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 }, + { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 }, + { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 }, + { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 }, + { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 }, + { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006ea2 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00460046 }, + { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006204 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00400040 }, + { 0x00400209, 0x22401868, 0x16690400, 0x00050005 }, + { 0x00000401, 0x22500608, 0x00000000, 0x01000100 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x28002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x28802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000001 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x2a802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000002 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x2c802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000003 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x2e802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22082260, 0x1669005c, 0x03a003a0 }, + { 0x00000209, 0x23601a28, 0x1e0000e0, 0x00010001 }, + { 0x00000601, 0x23641a28, 0x000000e2, 0x00000000 }, + { 0x00000401, 0x23680608, 0x00000000, 0x0001001f }, + { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 }, + { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 }, + { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 }, + { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 }, + { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 }, + { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 }, + { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 }, + { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 }, + { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 }, + { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 }, + { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 }, + { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 }, + { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 }, + { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 }, + { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 }, + { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 }, + { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 }, + { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 }, + { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 }, + { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 }, + { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 }, + { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 }, + { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 }, + { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x02000200 }, + { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 }, + { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 }, + { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 }, + { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 }, + { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 }, + { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 }, + { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 }, + { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 }, + { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 }, + { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 }, + { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 }, + { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 }, + { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 }, + { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 }, + { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 }, + { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 }, + { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 }, + { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 }, + { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 }, + { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 }, + { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 }, + { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 }, + { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 }, + { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 }, + { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 }, + { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 }, + { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 }, + { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 }, + { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 }, + { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 }, + { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 }, + { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 }, + { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 }, + { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 }, + { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 }, + { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 }, + { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 }, + { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 }, + { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 }, + { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 }, + { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 }, + { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 }, + { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 }, + { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 }, + { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 }, + { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 }, + { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x06000600 }, + { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 }, + { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 }, + { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 }, + { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 }, + { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 }, + { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 }, + { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 }, + { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 }, + { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 }, + { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 }, + { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 }, + { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 }, + { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 }, + { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 }, + { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 }, + { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 }, + { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 }, + { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 }, + { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 }, + { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 }, + { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 }, + { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 }, + { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 }, + { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x08000800 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 }, + { 0x00000040, 0x24a40a28, 0x0e000364, 0x00000002 }, + { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 }, + { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 }, + { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 }, + { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 }, + { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 }, + { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 }, + { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 }, + { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 }, + { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 }, + { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 }, + { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 }, + { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x02000200 }, + { 0x00000040, 0x23840a28, 0x0e000364, 0x00000004 }, + { 0x00000040, 0x24a40a28, 0x0e000364, 0x00000006 }, + { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 }, + { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 }, + { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 }, + { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 }, + { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 }, + { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 }, + { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 }, + { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 }, + { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 }, + { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 }, + { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 }, + { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x00000040, 0x23840a28, 0x0e000364, 0x00000008 }, + { 0x00000040, 0x24a40a28, 0x0e000364, 0x0000000a }, + { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 }, + { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 }, + { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 }, + { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 }, + { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 }, + { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 }, + { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 }, + { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 }, + { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 }, + { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 }, + { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 }, + { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x06000600 }, + { 0x00000040, 0x23840a28, 0x0e000364, 0x0000000c }, + { 0x00000040, 0x24a40a28, 0x0e000364, 0x0000000e }, + { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 }, + { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 }, + { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 }, + { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 }, + { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 }, + { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 }, + { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 }, + { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 }, + { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 }, + { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 }, + { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 }, + { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 }, + { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 }, diff --git a/src/shaders/post_processing/gen8/pl2_to_pl2.asm b/src/shaders/post_processing/gen8/pl2_to_pl2.asm new file mode 100644 index 0000000..0281854 --- /dev/null +++ b/src/shaders/post_processing/gen8/pl2_to_pl2.asm @@ -0,0 +1,17 @@ +// Module name: AVS +.kernel PL2_TO_PL2 +.code + +#include "VP_Setup.g8a" +#include "Set_Layer_0.g8a" +#include "Set_AVS_Buf_0123_PL2.g8a" +#include "PL2_AVS_Buf_0.g8a" +#include "PL2_AVS_Buf_1.g8a" +#include "PL2_AVS_Buf_2.g8a" +#include "PL2_AVS_Buf_3.g8a" +#include "Save_AVS_NV12.g8a" +#include "EOT.g8a" + +.end_code + +.end_kernel diff --git a/src/shaders/post_processing/gen8/pl2_to_pl2.g8b b/src/shaders/post_processing/gen8/pl2_to_pl2.g8b new file mode 100644 index 0000000..fa72882 --- /dev/null +++ b/src/shaders/post_processing/gen8/pl2_to_pl2.g8b @@ -0,0 +1,244 @@ + { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 }, + { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 }, + { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 }, + { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 }, + { 0x00000008, 0x22201248, 0x16000044, 0x00000000 }, + { 0x00000005, 0x22201248, 0x16000220, 0x00030003 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00010001 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00020002 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00030003 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 }, + { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 }, + { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 }, + { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff }, + { 0x00000005, 0x23021288, 0x16000044, 0x00030003 }, + { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 }, + { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 }, + { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 }, + { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 }, + { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 }, + { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 }, + { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 }, + { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 }, + { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 }, + { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 }, + { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 }, + { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 }, + { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 }, + { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006ea2 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00460046 }, + { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006204 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00400040 }, + { 0x00400209, 0x22401868, 0x16690400, 0x00050005 }, + { 0x00000401, 0x22500608, 0x00000000, 0x01000100 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x28002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x28802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000001 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x2a802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000002 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x2c802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000003 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x2e802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 }, + { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 }, + { 0x00200201, 0x23801a28, 0x004500e0, 0x00000000 }, + { 0x00000201, 0x24a01a28, 0x000000e0, 0x00000000 }, + { 0x00000608, 0x24a41a28, 0x1e0000e2, 0x00010001 }, + { 0x00000401, 0x23880608, 0x00000000, 0x000f000f }, + { 0x00000401, 0x24a80608, 0x00000000, 0x0007000f }, + { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 }, + { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 }, + { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 }, + { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8400, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8420, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8440, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8460, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xc4001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xc4401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8000, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8020, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8040, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8060, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc0001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc0401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 }, + { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 }, + { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 }, + { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8c00, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8c20, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8c40, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8c60, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xcc001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xcc401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8800, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8820, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8840, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8860, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc8001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc8401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 }, + { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 }, + { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 }, + { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8400, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8420, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8440, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8460, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xc4001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xc4401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8000, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8020, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8040, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8060, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc0001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc0401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 }, + { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 }, + { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 }, + { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8c00, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8c20, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8c40, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8c60, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xcc001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xcc401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8800, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8820, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8840, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8860, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc8001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc8401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 }, + { 0x00800201, 0x23a02288, 0x00d28201, 0x00000000 }, + { 0x00800401, 0x23b02288, 0x00d28221, 0x00000000 }, + { 0x00800201, 0x23c02288, 0x00d28241, 0x00000000 }, + { 0x00800401, 0x23d02288, 0x00d28261, 0x00000000 }, + { 0x00600201, 0x44c02288, 0x00cf8401, 0x00000000 }, + { 0x00600601, 0x44d02288, 0x00cf8441, 0x00000000 }, + { 0x00600601, 0x44c12288, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0x44d12288, 0x00cf8041, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x00800201, 0x23e02288, 0x00d28a01, 0x00000000 }, + { 0x00800401, 0x23f02288, 0x00d28a21, 0x00000000 }, + { 0x00800201, 0x24002288, 0x00d28a41, 0x00000000 }, + { 0x00800401, 0x24102288, 0x00d28a61, 0x00000000 }, + { 0x00600201, 0x44e02288, 0x00cf8c01, 0x00000000 }, + { 0x00600601, 0x44f02288, 0x00cf8c41, 0x00000000 }, + { 0x00600601, 0x44e12288, 0x00cf8801, 0x00000000 }, + { 0x00600401, 0x44f12288, 0x00cf8841, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x00800201, 0x24202288, 0x00d28201, 0x00000000 }, + { 0x00800401, 0x24302288, 0x00d28221, 0x00000000 }, + { 0x00800201, 0x24402288, 0x00d28241, 0x00000000 }, + { 0x00800401, 0x24502288, 0x00d28261, 0x00000000 }, + { 0x00600201, 0x45002288, 0x00cf8401, 0x00000000 }, + { 0x00600601, 0x45102288, 0x00cf8441, 0x00000000 }, + { 0x00600601, 0x45012288, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0x45112288, 0x00cf8041, 0x00000000 }, + { 0x00800201, 0x24602288, 0x00d28a01, 0x00000000 }, + { 0x00800401, 0x24702288, 0x00d28a21, 0x00000000 }, + { 0x00800201, 0x24802288, 0x00d28a41, 0x00000000 }, + { 0x00800401, 0x24902288, 0x00d28a61, 0x00000000 }, + { 0x00600201, 0x45202288, 0x00cf8c01, 0x00000000 }, + { 0x00600601, 0x45302288, 0x00cf8c41, 0x00000000 }, + { 0x00600601, 0x45212288, 0x00cf8801, 0x00000000 }, + { 0x00600401, 0x45312288, 0x00cf8841, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x120a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x0a0a8019 }, + { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 }, diff --git a/src/shaders/post_processing/gen8/pl2_to_pl3.asm b/src/shaders/post_processing/gen8/pl2_to_pl3.asm new file mode 100644 index 0000000..042a834 --- /dev/null +++ b/src/shaders/post_processing/gen8/pl2_to_pl3.asm @@ -0,0 +1,17 @@ +// Module name: AVS +.kernel PL2_TO_PL3 +.code + +#include "VP_Setup.g8a" +#include "Set_Layer_0.g8a" +#include "Set_AVS_Buf_0123_PL2.g8a" +#include "PL2_AVS_Buf_0.g8a" +#include "PL2_AVS_Buf_1.g8a" +#include "PL2_AVS_Buf_2.g8a" +#include "PL2_AVS_Buf_3.g8a" +#include "Save_AVS_PL3.g8a" +#include "EOT.g8a" + +.end_code + +.end_kernel diff --git a/src/shaders/post_processing/gen8/pl2_to_pl3.g8b b/src/shaders/post_processing/gen8/pl2_to_pl3.g8b new file mode 100644 index 0000000..28a951c --- /dev/null +++ b/src/shaders/post_processing/gen8/pl2_to_pl3.g8b @@ -0,0 +1,197 @@ + { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 }, + { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 }, + { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 }, + { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 }, + { 0x00000008, 0x22201248, 0x16000044, 0x00000000 }, + { 0x00000005, 0x22201248, 0x16000220, 0x00030003 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00010001 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00020002 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00030003 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 }, + { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 }, + { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 }, + { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff }, + { 0x00000005, 0x23021288, 0x16000044, 0x00030003 }, + { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 }, + { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 }, + { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 }, + { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 }, + { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 }, + { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 }, + { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 }, + { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 }, + { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 }, + { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 }, + { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 }, + { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 }, + { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 }, + { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006ea2 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00460046 }, + { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006204 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00400040 }, + { 0x00400209, 0x22401868, 0x16690400, 0x00050005 }, + { 0x00000401, 0x22500608, 0x00000000, 0x01000100 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x28002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x28802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000001 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x2a802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000002 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x2c802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000003 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x2e802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 }, + { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x25c00208, 0x008d0360, 0x00000000 }, + { 0x00200201, 0x23801a28, 0x004500e0, 0x00000000 }, + { 0x00200208, 0x24a01a28, 0x1e4500e0, 0x00010001 }, + { 0x00200208, 0x25c01a28, 0x1e4500e0, 0x00010001 }, + { 0x00000401, 0x23880608, 0x00000000, 0x000f000f }, + { 0x00000401, 0x24a80608, 0x00000000, 0x00070007 }, + { 0x00000401, 0x25c80608, 0x00000000, 0x00070007 }, + { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 }, + { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 }, + { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 }, + { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 }, + { 0x80600040, 0xc4001248, 0x16ae8400, 0x00800080 }, + { 0x80600040, 0xc4401248, 0x16ae8440, 0x00800080 }, + { 0x80600040, 0xc0001248, 0x16ae8000, 0x00800080 }, + { 0x80600040, 0xc0401248, 0x16ae8040, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 }, + { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 }, + { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 }, + { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 }, + { 0x80600040, 0xcc001248, 0x16ae8c00, 0x00800080 }, + { 0x80600040, 0xcc401248, 0x16ae8c40, 0x00800080 }, + { 0x80600040, 0xc8001248, 0x16ae8800, 0x00800080 }, + { 0x80600040, 0xc8401248, 0x16ae8840, 0x00800080 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 }, + { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 }, + { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 }, + { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 }, + { 0x80600040, 0xc4001248, 0x16ae8400, 0x00800080 }, + { 0x80600040, 0xc4401248, 0x16ae8440, 0x00800080 }, + { 0x80600040, 0xc0001248, 0x16ae8000, 0x00800080 }, + { 0x80600040, 0xc0401248, 0x16ae8040, 0x00800080 }, + { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 }, + { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 }, + { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 }, + { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 }, + { 0x80600040, 0xcc001248, 0x16ae8c00, 0x00800080 }, + { 0x80600040, 0xcc401248, 0x16ae8c40, 0x00800080 }, + { 0x80600040, 0xc8001248, 0x16ae8800, 0x00800080 }, + { 0x80600040, 0xc8401248, 0x16ae8840, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 }, + { 0x00800201, 0x23a02288, 0x00d28201, 0x00000000 }, + { 0x00800401, 0x23b02288, 0x00d28221, 0x00000000 }, + { 0x00800201, 0x23c02288, 0x00d28241, 0x00000000 }, + { 0x00800401, 0x23d02288, 0x00d28261, 0x00000000 }, + { 0x00600201, 0x24c02288, 0x00cf8401, 0x00000000 }, + { 0x00600601, 0x24c82288, 0x00cf8441, 0x00000000 }, + { 0x00600201, 0x25e02288, 0x00cf8001, 0x00000000 }, + { 0x00600601, 0x25e82288, 0x00cf8041, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x00800201, 0x23e02288, 0x00d28a01, 0x00000000 }, + { 0x00800401, 0x23f02288, 0x00d28a21, 0x00000000 }, + { 0x00800201, 0x24002288, 0x00d28a41, 0x00000000 }, + { 0x00800401, 0x24102288, 0x00d28a61, 0x00000000 }, + { 0x00600601, 0x24d02288, 0x00cf8c01, 0x00000000 }, + { 0x00600401, 0x24d82288, 0x00cf8c41, 0x00000000 }, + { 0x00600601, 0x25f02288, 0x00cf8801, 0x00000000 }, + { 0x00600401, 0x25f82288, 0x00cf8841, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x00800201, 0x24202288, 0x00d28201, 0x00000000 }, + { 0x00800401, 0x24302288, 0x00d28221, 0x00000000 }, + { 0x00800201, 0x24402288, 0x00d28241, 0x00000000 }, + { 0x00800401, 0x24502288, 0x00d28261, 0x00000000 }, + { 0x00600201, 0x24e02288, 0x00cf8401, 0x00000000 }, + { 0x00600601, 0x24e82288, 0x00cf8441, 0x00000000 }, + { 0x00600201, 0x26002288, 0x00cf8001, 0x00000000 }, + { 0x00600601, 0x26082288, 0x00cf8041, 0x00000000 }, + { 0x00800201, 0x24602288, 0x00d28a01, 0x00000000 }, + { 0x00800401, 0x24702288, 0x00d28a21, 0x00000000 }, + { 0x00800201, 0x24802288, 0x00d28a41, 0x00000000 }, + { 0x00800401, 0x24902288, 0x00d28a61, 0x00000000 }, + { 0x00600601, 0x24f02288, 0x00cf8c01, 0x00000000 }, + { 0x00600401, 0x24f82288, 0x00cf8c41, 0x00000000 }, + { 0x00600601, 0x26102288, 0x00cf8801, 0x00000000 }, + { 0x00600401, 0x26182288, 0x00cf8841, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x120a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8019 }, + { 0x0c000031, 0x20002220, 0x060005c0, 0x060a801a }, + { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 }, diff --git a/src/shaders/post_processing/gen8/pl2_to_rgbx.asm b/src/shaders/post_processing/gen8/pl2_to_rgbx.asm new file mode 100644 index 0000000..58a5204 --- /dev/null +++ b/src/shaders/post_processing/gen8/pl2_to_rgbx.asm @@ -0,0 +1,18 @@ +// Module name: AVS +.kernel PL2_TO_PL2 +.code + +#include "VP_Setup.g8a" +#include "Set_Layer_0.g8a" +#include "Set_AVS_Buf_0123_PL2.g8a" +#include "PL2_AVS_Buf_0.g8a" +#include "PL2_AVS_Buf_1.g8a" +#include "PL2_AVS_Buf_2.g8a" +#include "PL2_AVS_Buf_3.g8a" +#include "YUV_to_RGB.g8a" +#include "Save_AVS_RGBX.g8a" +#include "EOT.g8a" + +.end_code + +.end_kernel diff --git a/src/shaders/post_processing/gen8/pl2_to_rgbx.g8b b/src/shaders/post_processing/gen8/pl2_to_rgbx.g8b new file mode 100644 index 0000000..9ee29c2 --- /dev/null +++ b/src/shaders/post_processing/gen8/pl2_to_rgbx.g8b @@ -0,0 +1,738 @@ + { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 }, + { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 }, + { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 }, + { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 }, + { 0x00000008, 0x22201248, 0x16000044, 0x00000000 }, + { 0x00000005, 0x22201248, 0x16000220, 0x00030003 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00010001 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00020002 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00030003 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 }, + { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 }, + { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 }, + { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff }, + { 0x00000005, 0x23021288, 0x16000044, 0x00030003 }, + { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 }, + { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 }, + { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 }, + { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 }, + { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 }, + { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 }, + { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 }, + { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 }, + { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 }, + { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 }, + { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 }, + { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 }, + { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 }, + { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006ea2 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00460046 }, + { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006204 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00400040 }, + { 0x00400209, 0x22401868, 0x16690400, 0x00050005 }, + { 0x00000401, 0x22500608, 0x00000000, 0x01000100 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x28002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x28802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000001 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x2a802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000002 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x2c802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000003 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x048eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000a000 }, + { 0x02000031, 0x2e802248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x22001240, 0x006902c0, 0x00000000 }, + { 0x00600001, 0x248022e8, 0x00ae8201, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8211, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8401, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8411, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8001, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8011, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2012288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4012288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0012288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x248022e8, 0x00ae8221, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8231, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8421, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8431, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8021, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8031, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2212288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4212288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0212288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x248022e8, 0x00ae8241, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8251, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8441, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8451, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8041, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8051, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2412288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4412288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0412288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x248022e8, 0x00ae8261, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8271, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8461, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8471, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8061, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8071, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2612288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4612288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0612288, 0x00cf05c0, 0x00000000 }, + { 0x00400040, 0x22001240, 0x166902c0, 0x02000200 }, + { 0x00600001, 0x248022e8, 0x00ae8201, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8211, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8401, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8411, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8001, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8011, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2012288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4012288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0012288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x248022e8, 0x00ae8221, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8231, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8421, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8431, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8021, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8031, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2212288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4212288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0212288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x248022e8, 0x00ae8241, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8251, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8441, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8451, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8041, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8051, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2412288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4412288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0412288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x248022e8, 0x00ae8261, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8271, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8461, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8471, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8061, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8071, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2612288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4612288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0612288, 0x00cf05c0, 0x00000000 }, + { 0x00400040, 0x22001240, 0x166902c0, 0x04000400 }, + { 0x00600001, 0x248022e8, 0x00ae8201, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8211, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8401, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8411, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8001, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8011, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2012288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4012288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0012288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x248022e8, 0x00ae8221, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8231, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8421, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8431, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8021, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8031, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2212288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4212288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0212288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x248022e8, 0x00ae8241, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8251, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8441, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8451, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8041, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8051, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2412288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4412288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0412288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x248022e8, 0x00ae8261, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8271, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8461, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8471, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8061, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8071, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2612288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4612288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0612288, 0x00cf05c0, 0x00000000 }, + { 0x00400040, 0x22001240, 0x166902c0, 0x06000600 }, + { 0x00600001, 0x248022e8, 0x00ae8201, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8211, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8401, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8411, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8001, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8011, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2012288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4012288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0012288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x248022e8, 0x00ae8221, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8231, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8421, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8431, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8021, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8031, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2212288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4212288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0212288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x248022e8, 0x00ae8241, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8251, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8441, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8451, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8041, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8051, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2412288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4412288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0412288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x248022e8, 0x00ae8261, 0x00000000 }, + { 0x00600001, 0x24a022e8, 0x00ae8271, 0x00000000 }, + { 0x00600001, 0x24c022e8, 0x00ae8461, 0x00000000 }, + { 0x00600001, 0x24e022e8, 0x00ae8471, 0x00000000 }, + { 0x00600001, 0x250022e8, 0x00ae8061, 0x00000000 }, + { 0x00600001, 0x252022e8, 0x00ae8071, 0x00000000 }, + { 0x00800040, 0x24c03ae8, 0x3e8d04c0, 0xc3000000 }, + { 0x00800040, 0x25003ae8, 0x3e8d0500, 0xc3000000 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x23803ae8, 0x3e8d0500, 0x3fb374bc }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00600048, 0x24003ae0, 0x3e8d04c0, 0xbeb020c5 }, + { 0x00600048, 0x24203ae0, 0x3e8d04e0, 0xbeb020c5 }, + { 0x00800048, 0x23c03ae8, 0x3e8d0500, 0xbf36c8b4 }, + { 0x00800001, 0x24003ae0, 0x008d0480, 0x00000000 }, + { 0x00800048, 0x24003ae8, 0x3e8d04c0, 0x3fe2d0e5 }, + { 0x80800041, 0x24803ae8, 0x3e8d0380, 0x3b8080c4 }, + { 0x80800041, 0x24c03ae8, 0x3e8d03c0, 0x3b8080c4 }, + { 0x80800041, 0x25003ae8, 0x3e8d0400, 0x3b8080c4 }, + { 0x00800041, 0x23803ae8, 0x3e8d0480, 0x437f0000 }, + { 0x00800041, 0x23c03ae8, 0x3e8d04c0, 0x437f0000 }, + { 0x00800041, 0x24003ae8, 0x3e8d0500, 0x437f0000 }, + { 0x00800001, 0x45403a68, 0x008d0380, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d03c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0400, 0x00000000 }, + { 0x00800001, 0xc2612288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xc4612288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc0612288, 0x00cf05c0, 0x00000000 }, + { 0x00400001, 0x24003660, 0x30000000, 0x000062ea }, + { 0x00400040, 0x24001860, 0x16690400, 0x00460046 }, + { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 }, + { 0x02800005, 0x20001260, 0x1e000046, 0x00010001 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00010001, 0x22201048, 0x00000200, 0x00000000 }, + { 0x00010001, 0x22001040, 0x00000204, 0x00000000 }, + { 0x00010001, 0x22041240, 0x00000220, 0x00000000 }, + { 0x00000209, 0x23601a28, 0x1e0000e0, 0x00020002 }, + { 0x00000601, 0x23641a28, 0x000000e2, 0x00000000 }, + { 0x00000401, 0x23680608, 0x00000000, 0x0003001f }, + { 0x00400001, 0x22081040, 0x00690200, 0x00000000 }, + { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x25c00208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x26e00208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x23e00208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x25000208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x26200208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x27400208, 0x008d0360, 0x00000000 }, + { 0x00000040, 0x24a00a28, 0x0e000360, 0x00000020 }, + { 0x00000040, 0x25c40a28, 0x0e000364, 0x00000004 }, + { 0x00000040, 0x26e40a28, 0x0e000364, 0x00000004 }, + { 0x00000040, 0x26e00a28, 0x0e000360, 0x00000020 }, + { 0x00600001, 0x63a02288, 0x00ae8001, 0x00000000 }, + { 0x00600001, 0x63a12288, 0x00ae8201, 0x00000000 }, + { 0x00600001, 0x63a22288, 0x00ae8401, 0x00000000 }, + { 0x00600001, 0x63a32288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x63c02288, 0x00ae8021, 0x00000000 }, + { 0x00600001, 0x63c12288, 0x00ae8221, 0x00000000 }, + { 0x00600001, 0x63c22288, 0x00ae8421, 0x00000000 }, + { 0x00600001, 0x63c32288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x64c02288, 0x00ae8011, 0x00000000 }, + { 0x00600001, 0x64c12288, 0x00ae8211, 0x00000000 }, + { 0x00600001, 0x64c22288, 0x00ae8411, 0x00000000 }, + { 0x00600001, 0x64c32288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x64e02288, 0x00ae8031, 0x00000000 }, + { 0x00600001, 0x64e12288, 0x00ae8231, 0x00000000 }, + { 0x00600001, 0x64e22288, 0x00ae8431, 0x00000000 }, + { 0x00600001, 0x64e32288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x63e02288, 0x00ae8041, 0x00000000 }, + { 0x00600001, 0x63e12288, 0x00ae8241, 0x00000000 }, + { 0x00600001, 0x63e22288, 0x00ae8441, 0x00000000 }, + { 0x00600001, 0x63e32288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x64002288, 0x00ae8061, 0x00000000 }, + { 0x00600001, 0x64012288, 0x00ae8261, 0x00000000 }, + { 0x00600001, 0x64022288, 0x00ae8461, 0x00000000 }, + { 0x00600001, 0x64032288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x65002288, 0x00ae8051, 0x00000000 }, + { 0x00600001, 0x65012288, 0x00ae8251, 0x00000000 }, + { 0x00600001, 0x65022288, 0x00ae8451, 0x00000000 }, + { 0x00600001, 0x65032288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x65202288, 0x00ae8071, 0x00000000 }, + { 0x00600001, 0x65212288, 0x00ae8271, 0x00000000 }, + { 0x00600001, 0x65222288, 0x00ae8471, 0x00000000 }, + { 0x00600001, 0x65232288, 0x0000005f, 0x00000000 }, + { 0x0c800031, 0x20002220, 0x06000380, 0x0a0a8018 }, + { 0x0c800031, 0x20002220, 0x060004a0, 0x0a0a8018 }, + { 0x00400040, 0x22001040, 0x16690208, 0x02000200 }, + { 0x00600001, 0x65e02288, 0x00ae8001, 0x00000000 }, + { 0x00600001, 0x65e12288, 0x00ae8201, 0x00000000 }, + { 0x00600001, 0x65e22288, 0x00ae8401, 0x00000000 }, + { 0x00600001, 0x65e32288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x66002288, 0x00ae8021, 0x00000000 }, + { 0x00600001, 0x66012288, 0x00ae8221, 0x00000000 }, + { 0x00600001, 0x66022288, 0x00ae8421, 0x00000000 }, + { 0x00600001, 0x66032288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x67002288, 0x00ae8011, 0x00000000 }, + { 0x00600001, 0x67012288, 0x00ae8211, 0x00000000 }, + { 0x00600001, 0x67022288, 0x00ae8411, 0x00000000 }, + { 0x00600001, 0x67032288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x67202288, 0x00ae8031, 0x00000000 }, + { 0x00600001, 0x67212288, 0x00ae8231, 0x00000000 }, + { 0x00600001, 0x67222288, 0x00ae8431, 0x00000000 }, + { 0x00600001, 0x67232288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x66202288, 0x00ae8041, 0x00000000 }, + { 0x00600001, 0x66212288, 0x00ae8241, 0x00000000 }, + { 0x00600001, 0x66222288, 0x00ae8441, 0x00000000 }, + { 0x00600001, 0x66232288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x66402288, 0x00ae8061, 0x00000000 }, + { 0x00600001, 0x66412288, 0x00ae8261, 0x00000000 }, + { 0x00600001, 0x66422288, 0x00ae8461, 0x00000000 }, + { 0x00600001, 0x66432288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x67402288, 0x00ae8051, 0x00000000 }, + { 0x00600001, 0x67412288, 0x00ae8251, 0x00000000 }, + { 0x00600001, 0x67422288, 0x00ae8451, 0x00000000 }, + { 0x00600001, 0x67432288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x67602288, 0x00ae8071, 0x00000000 }, + { 0x00600001, 0x67612288, 0x00ae8271, 0x00000000 }, + { 0x00600001, 0x67622288, 0x00ae8471, 0x00000000 }, + { 0x00600001, 0x67632288, 0x0000005f, 0x00000000 }, + { 0x0c800031, 0x20002220, 0x060005c0, 0x0a0a8018 }, + { 0x0c800031, 0x20002220, 0x060006e0, 0x0a0a8018 }, + { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x25c00208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x26e00208, 0x008d0360, 0x00000000 }, + { 0x00000040, 0x23840a28, 0x0e000364, 0x00000008 }, + { 0x00000040, 0x24a00a28, 0x0e000360, 0x00000020 }, + { 0x00000040, 0x24a40a28, 0x0e000364, 0x00000008 }, + { 0x00000040, 0x25c40a28, 0x0e000364, 0x0000000c }, + { 0x00000040, 0x26e40a28, 0x0e000364, 0x0000000c }, + { 0x00000040, 0x26e00a28, 0x0e000360, 0x00000020 }, + { 0x00400040, 0x22001040, 0x16690208, 0x04000400 }, + { 0x00600001, 0x63a02288, 0x00ae8001, 0x00000000 }, + { 0x00600001, 0x63a12288, 0x00ae8201, 0x00000000 }, + { 0x00600001, 0x63a22288, 0x00ae8401, 0x00000000 }, + { 0x00600001, 0x63a32288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x63c02288, 0x00ae8021, 0x00000000 }, + { 0x00600001, 0x63c12288, 0x00ae8221, 0x00000000 }, + { 0x00600001, 0x63c22288, 0x00ae8421, 0x00000000 }, + { 0x00600001, 0x63c32288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x64c02288, 0x00ae8011, 0x00000000 }, + { 0x00600001, 0x64c12288, 0x00ae8211, 0x00000000 }, + { 0x00600001, 0x64c22288, 0x00ae8411, 0x00000000 }, + { 0x00600001, 0x64c32288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x64e02288, 0x00ae8031, 0x00000000 }, + { 0x00600001, 0x64e12288, 0x00ae8231, 0x00000000 }, + { 0x00600001, 0x64e22288, 0x00ae8431, 0x00000000 }, + { 0x00600001, 0x64e32288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x63e02288, 0x00ae8041, 0x00000000 }, + { 0x00600001, 0x63e12288, 0x00ae8241, 0x00000000 }, + { 0x00600001, 0x63e22288, 0x00ae8441, 0x00000000 }, + { 0x00600001, 0x63e32288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x64002288, 0x00ae8061, 0x00000000 }, + { 0x00600001, 0x64012288, 0x00ae8261, 0x00000000 }, + { 0x00600001, 0x64022288, 0x00ae8461, 0x00000000 }, + { 0x00600001, 0x64032288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x65002288, 0x00ae8051, 0x00000000 }, + { 0x00600001, 0x65012288, 0x00ae8251, 0x00000000 }, + { 0x00600001, 0x65022288, 0x00ae8451, 0x00000000 }, + { 0x00600001, 0x65032288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x65202288, 0x00ae8071, 0x00000000 }, + { 0x00600001, 0x65212288, 0x00ae8271, 0x00000000 }, + { 0x00600001, 0x65222288, 0x00ae8471, 0x00000000 }, + { 0x00600001, 0x65232288, 0x0000005f, 0x00000000 }, + { 0x0c800031, 0x20002220, 0x06000380, 0x0a0a8018 }, + { 0x0c800031, 0x20002220, 0x060004a0, 0x0a0a8018 }, + { 0x00400040, 0x22001040, 0x16690208, 0x06000600 }, + { 0x00600001, 0x65e02288, 0x00ae8001, 0x00000000 }, + { 0x00600001, 0x65e12288, 0x00ae8201, 0x00000000 }, + { 0x00600001, 0x65e22288, 0x00ae8401, 0x00000000 }, + { 0x00600001, 0x65e32288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x66002288, 0x00ae8021, 0x00000000 }, + { 0x00600001, 0x66012288, 0x00ae8221, 0x00000000 }, + { 0x00600001, 0x66022288, 0x00ae8421, 0x00000000 }, + { 0x00600001, 0x66032288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x67002288, 0x00ae8011, 0x00000000 }, + { 0x00600001, 0x67012288, 0x00ae8211, 0x00000000 }, + { 0x00600001, 0x67022288, 0x00ae8411, 0x00000000 }, + { 0x00600001, 0x67032288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x67202288, 0x00ae8031, 0x00000000 }, + { 0x00600001, 0x67212288, 0x00ae8231, 0x00000000 }, + { 0x00600001, 0x67222288, 0x00ae8431, 0x00000000 }, + { 0x00600001, 0x67232288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x66202288, 0x00ae8041, 0x00000000 }, + { 0x00600001, 0x66212288, 0x00ae8241, 0x00000000 }, + { 0x00600001, 0x66222288, 0x00ae8441, 0x00000000 }, + { 0x00600001, 0x66232288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x66402288, 0x00ae8061, 0x00000000 }, + { 0x00600001, 0x66412288, 0x00ae8261, 0x00000000 }, + { 0x00600001, 0x66422288, 0x00ae8461, 0x00000000 }, + { 0x00600001, 0x66432288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x67402288, 0x00ae8051, 0x00000000 }, + { 0x00600001, 0x67412288, 0x00ae8251, 0x00000000 }, + { 0x00600001, 0x67422288, 0x00ae8451, 0x00000000 }, + { 0x00600001, 0x67432288, 0x0000005f, 0x00000000 }, + { 0x00600001, 0x67602288, 0x00ae8071, 0x00000000 }, + { 0x00600001, 0x67612288, 0x00ae8271, 0x00000000 }, + { 0x00600001, 0x67622288, 0x00ae8471, 0x00000000 }, + { 0x00600001, 0x67632288, 0x0000005f, 0x00000000 }, + { 0x0c800031, 0x20002220, 0x060005c0, 0x0a0a8018 }, + { 0x0c800031, 0x20002220, 0x060006e0, 0x0a0a8018 }, + { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 }, diff --git a/src/shaders/post_processing/gen8/pl3_to_pa.asm b/src/shaders/post_processing/gen8/pl3_to_pa.asm new file mode 100644 index 0000000..acb7670 --- /dev/null +++ b/src/shaders/post_processing/gen8/pl3_to_pa.asm @@ -0,0 +1,17 @@ +// Module name: AVS +.kernel PL3_TO_PL3 +.code + +#include "VP_Setup.g8a" +#include "Set_Layer_0.g8a" +#include "Set_AVS_Buf_0123_PL3.g8a" +#include "PL3_AVS_Buf_0.g8a" +#include "PL3_AVS_Buf_1.g8a" +#include "PL3_AVS_Buf_2.g8a" +#include "PL3_AVS_Buf_3.g8a" +#include "Save_AVS_PA.g8a" +#include "EOT.g8a" + +.end_code + +.end_kernel diff --git a/src/shaders/post_processing/gen8/pl3_to_pa.g8b b/src/shaders/post_processing/gen8/pl3_to_pa.g8b new file mode 100644 index 0000000..d6798c2 --- /dev/null +++ b/src/shaders/post_processing/gen8/pl3_to_pa.g8b @@ -0,0 +1,303 @@ + { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 }, + { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 }, + { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 }, + { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 }, + { 0x00000008, 0x22201248, 0x16000044, 0x00000000 }, + { 0x00000005, 0x22201248, 0x16000220, 0x00030003 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00010001 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00020002 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00030003 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 }, + { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 }, + { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 }, + { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff }, + { 0x00000005, 0x23021288, 0x16000044, 0x00030003 }, + { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 }, + { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 }, + { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 }, + { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 }, + { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 }, + { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 }, + { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 }, + { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 }, + { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 }, + { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 }, + { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 }, + { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 }, + { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 }, + { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006ea2 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00460046 }, + { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006204 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00400040 }, + { 0x00400209, 0x22401868, 0x16690400, 0x00050005 }, + { 0x00000401, 0x22500608, 0x00000000, 0x01000100 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x28002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x28802248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x29002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000001 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2a802248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2b002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000002 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2c802248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2d002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000003 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2e802248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2f002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22082260, 0x1669005c, 0x03a003a0 }, + { 0x00000209, 0x23601a28, 0x1e0000e0, 0x00010001 }, + { 0x00000601, 0x23641a28, 0x000000e2, 0x00000000 }, + { 0x00000401, 0x23680608, 0x00000000, 0x0001001f }, + { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 }, + { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 }, + { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 }, + { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 }, + { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 }, + { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 }, + { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 }, + { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 }, + { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 }, + { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 }, + { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 }, + { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 }, + { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 }, + { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 }, + { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 }, + { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 }, + { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 }, + { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 }, + { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 }, + { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 }, + { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 }, + { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 }, + { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 }, + { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x02000200 }, + { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 }, + { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 }, + { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 }, + { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 }, + { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 }, + { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 }, + { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 }, + { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 }, + { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 }, + { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 }, + { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 }, + { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 }, + { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 }, + { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 }, + { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 }, + { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 }, + { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 }, + { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 }, + { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 }, + { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 }, + { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 }, + { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 }, + { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 }, + { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 }, + { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 }, + { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 }, + { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 }, + { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 }, + { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 }, + { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 }, + { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 }, + { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 }, + { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 }, + { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 }, + { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 }, + { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 }, + { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 }, + { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 }, + { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 }, + { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 }, + { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 }, + { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 }, + { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 }, + { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 }, + { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 }, + { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 }, + { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x06000600 }, + { 0x80400040, 0xc0001248, 0x168a8000, 0x00800080 }, + { 0x80400040, 0xc0201248, 0x168a8020, 0x00800080 }, + { 0x80400040, 0xc0401248, 0x168a8040, 0x00800080 }, + { 0x80400040, 0xc0601248, 0x168a8060, 0x00800080 }, + { 0x80600040, 0xa2001248, 0x168d8200, 0x00800080 }, + { 0x80600040, 0xa2201248, 0x168d8220, 0x00800080 }, + { 0x80600040, 0xa2401248, 0x168d8240, 0x00800080 }, + { 0x80600040, 0xa2601248, 0x168d8260, 0x00800080 }, + { 0x80400040, 0xc4001248, 0x168a8400, 0x00800080 }, + { 0x80400040, 0xc4201248, 0x168a8420, 0x00800080 }, + { 0x80400040, 0xc4401248, 0x168a8440, 0x00800080 }, + { 0x80400040, 0xc4601248, 0x168a8460, 0x00800080 }, + { 0x80400040, 0xc0101248, 0x168a8010, 0x00800080 }, + { 0x80400040, 0xc0301248, 0x168a8030, 0x00800080 }, + { 0x80400040, 0xc0501248, 0x168a8050, 0x00800080 }, + { 0x80400040, 0xc0701248, 0x168a8070, 0x00800080 }, + { 0x80600040, 0xa2101248, 0x168d8210, 0x00800080 }, + { 0x80600040, 0xa2301248, 0x168d8230, 0x00800080 }, + { 0x80600040, 0xa2501248, 0x168d8250, 0x00800080 }, + { 0x80600040, 0xa2701248, 0x168d8270, 0x00800080 }, + { 0x80400040, 0xc4101248, 0x168a8410, 0x00800080 }, + { 0x80400040, 0xc4301248, 0x168a8430, 0x00800080 }, + { 0x80400040, 0xc4501248, 0x168a8450, 0x00800080 }, + { 0x80400040, 0xc4701248, 0x168a8470, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x08000800 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 }, + { 0x00000040, 0x24a40a28, 0x0e000364, 0x00000002 }, + { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 }, + { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 }, + { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 }, + { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 }, + { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 }, + { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 }, + { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 }, + { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 }, + { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 }, + { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 }, + { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 }, + { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x02000200 }, + { 0x00000040, 0x23840a28, 0x0e000364, 0x00000004 }, + { 0x00000040, 0x24a40a28, 0x0e000364, 0x00000006 }, + { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 }, + { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 }, + { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 }, + { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 }, + { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 }, + { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 }, + { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 }, + { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 }, + { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 }, + { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 }, + { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 }, + { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x00000040, 0x23840a28, 0x0e000364, 0x00000008 }, + { 0x00000040, 0x24a40a28, 0x0e000364, 0x0000000a }, + { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 }, + { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 }, + { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 }, + { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 }, + { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 }, + { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 }, + { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 }, + { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 }, + { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 }, + { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 }, + { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 }, + { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x06000600 }, + { 0x00000040, 0x23840a28, 0x0e000364, 0x0000000c }, + { 0x00000040, 0x24a40a28, 0x0e000364, 0x0000000e }, + { 0x00600201, 0xec002288, 0x00cf8001, 0x00000000 }, + { 0x00600201, 0xec202288, 0x00cf8021, 0x00000000 }, + { 0x00800601, 0xc8002288, 0x00d28201, 0x00000000 }, + { 0x00800601, 0xc8202288, 0x00d28221, 0x00000000 }, + { 0x00600401, 0xea002288, 0x00cf8401, 0x00000000 }, + { 0x00600401, 0xea202288, 0x00cf8421, 0x00000000 }, + { 0x00600201, 0xed202288, 0x00cf8041, 0x00000000 }, + { 0x00600201, 0xed402288, 0x00cf8061, 0x00000000 }, + { 0x00800601, 0xc9202288, 0x00d28241, 0x00000000 }, + { 0x00800601, 0xc9402288, 0x00d28261, 0x00000000 }, + { 0x00600401, 0xeb202288, 0x00cf8441, 0x00000000 }, + { 0x00600401, 0xeb402288, 0x00cf8461, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x060a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8018 }, + { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 }, diff --git a/src/shaders/post_processing/gen8/pl3_to_pl2.asm b/src/shaders/post_processing/gen8/pl3_to_pl2.asm new file mode 100644 index 0000000..713cb97 --- /dev/null +++ b/src/shaders/post_processing/gen8/pl3_to_pl2.asm @@ -0,0 +1,17 @@ +// Module name: AVS +.kernel PL3_TO_PL2 +.code + +#include "VP_Setup.g8a" +#include "Set_Layer_0.g8a" +#include "Set_AVS_Buf_0123_PL3.g8a" +#include "PL3_AVS_Buf_0.g8a" +#include "PL3_AVS_Buf_1.g8a" +#include "PL3_AVS_Buf_2.g8a" +#include "PL3_AVS_Buf_3.g8a" +#include "Save_AVS_NV12.g8a" +#include "EOT.g8a" + +.end_code + +.end_kernel diff --git a/src/shaders/post_processing/gen8/pl3_to_pl2.g8b b/src/shaders/post_processing/gen8/pl3_to_pl2.g8b new file mode 100644 index 0000000..9a141e7 --- /dev/null +++ b/src/shaders/post_processing/gen8/pl3_to_pl2.g8b @@ -0,0 +1,260 @@ + { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 }, + { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 }, + { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 }, + { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 }, + { 0x00000008, 0x22201248, 0x16000044, 0x00000000 }, + { 0x00000005, 0x22201248, 0x16000220, 0x00030003 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00010001 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00020002 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00030003 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 }, + { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 }, + { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 }, + { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff }, + { 0x00000005, 0x23021288, 0x16000044, 0x00030003 }, + { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 }, + { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 }, + { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 }, + { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 }, + { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 }, + { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 }, + { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 }, + { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 }, + { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 }, + { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 }, + { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 }, + { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 }, + { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 }, + { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006ea2 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00460046 }, + { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006204 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00400040 }, + { 0x00400209, 0x22401868, 0x16690400, 0x00050005 }, + { 0x00000401, 0x22500608, 0x00000000, 0x01000100 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x28002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x28802248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x29002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000001 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2a802248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2b002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000002 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2c802248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2d002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000003 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2e802248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2f002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 }, + { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 }, + { 0x00200201, 0x23801a28, 0x004500e0, 0x00000000 }, + { 0x00000201, 0x24a01a28, 0x000000e0, 0x00000000 }, + { 0x00000608, 0x24a41a28, 0x1e0000e2, 0x00010001 }, + { 0x00000401, 0x23880608, 0x00000000, 0x000f000f }, + { 0x00000401, 0x24a80608, 0x00000000, 0x0007000f }, + { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 }, + { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 }, + { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 }, + { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8400, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8420, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8440, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8460, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xc4001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xc4401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8000, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8020, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8040, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8060, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc0001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc0401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 }, + { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 }, + { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 }, + { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8c00, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8c20, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8c40, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8c60, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xcc001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xcc401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8800, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8820, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8840, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8860, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc8001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc8401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 }, + { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 }, + { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 }, + { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8400, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8420, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8440, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8460, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xc4001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xc4401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8000, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8020, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8040, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8060, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc0001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc0401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 }, + { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 }, + { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 }, + { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8c00, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8c20, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8c40, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8c60, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xcc001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xcc401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8800, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8820, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8840, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8860, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc8001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc8401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 }, + { 0x00800201, 0x23a02288, 0x00d28201, 0x00000000 }, + { 0x00800401, 0x23b02288, 0x00d28221, 0x00000000 }, + { 0x00800201, 0x23c02288, 0x00d28241, 0x00000000 }, + { 0x00800401, 0x23d02288, 0x00d28261, 0x00000000 }, + { 0x00600201, 0x44c02288, 0x00cf8401, 0x00000000 }, + { 0x00600601, 0x44d02288, 0x00cf8441, 0x00000000 }, + { 0x00600601, 0x44c12288, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0x44d12288, 0x00cf8041, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x00800201, 0x23e02288, 0x00d28a01, 0x00000000 }, + { 0x00800401, 0x23f02288, 0x00d28a21, 0x00000000 }, + { 0x00800201, 0x24002288, 0x00d28a41, 0x00000000 }, + { 0x00800401, 0x24102288, 0x00d28a61, 0x00000000 }, + { 0x00600201, 0x44e02288, 0x00cf8c01, 0x00000000 }, + { 0x00600601, 0x44f02288, 0x00cf8c41, 0x00000000 }, + { 0x00600601, 0x44e12288, 0x00cf8801, 0x00000000 }, + { 0x00600401, 0x44f12288, 0x00cf8841, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x00800201, 0x24202288, 0x00d28201, 0x00000000 }, + { 0x00800401, 0x24302288, 0x00d28221, 0x00000000 }, + { 0x00800201, 0x24402288, 0x00d28241, 0x00000000 }, + { 0x00800401, 0x24502288, 0x00d28261, 0x00000000 }, + { 0x00600201, 0x45002288, 0x00cf8401, 0x00000000 }, + { 0x00600601, 0x45102288, 0x00cf8441, 0x00000000 }, + { 0x00600601, 0x45012288, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0x45112288, 0x00cf8041, 0x00000000 }, + { 0x00800201, 0x24602288, 0x00d28a01, 0x00000000 }, + { 0x00800401, 0x24702288, 0x00d28a21, 0x00000000 }, + { 0x00800201, 0x24802288, 0x00d28a41, 0x00000000 }, + { 0x00800401, 0x24902288, 0x00d28a61, 0x00000000 }, + { 0x00600201, 0x45202288, 0x00cf8c01, 0x00000000 }, + { 0x00600601, 0x45302288, 0x00cf8c41, 0x00000000 }, + { 0x00600601, 0x45212288, 0x00cf8801, 0x00000000 }, + { 0x00600401, 0x45312288, 0x00cf8841, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x120a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x0a0a8019 }, + { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 }, diff --git a/src/shaders/post_processing/gen8/pl3_to_pl3.asm b/src/shaders/post_processing/gen8/pl3_to_pl3.asm new file mode 100644 index 0000000..f6a2a76 --- /dev/null +++ b/src/shaders/post_processing/gen8/pl3_to_pl3.asm @@ -0,0 +1,17 @@ +// Module name: AVS +.kernel PL3_TO_PL3 +.code + +#include "VP_Setup.g8a" +#include "Set_Layer_0.g8a" +#include "Set_AVS_Buf_0123_PL3.g8a" +#include "PL3_AVS_Buf_0.g8a" +#include "PL3_AVS_Buf_1.g8a" +#include "PL3_AVS_Buf_2.g8a" +#include "PL3_AVS_Buf_3.g8a" +#include "Save_AVS_PL3.g8a" +#include "EOT.g8a" + +.end_code + +.end_kernel diff --git a/src/shaders/post_processing/gen8/pl3_to_pl3.g8b b/src/shaders/post_processing/gen8/pl3_to_pl3.g8b new file mode 100644 index 0000000..67ac99c --- /dev/null +++ b/src/shaders/post_processing/gen8/pl3_to_pl3.g8b @@ -0,0 +1,213 @@ + { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 }, + { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 }, + { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 }, + { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 }, + { 0x00000008, 0x22201248, 0x16000044, 0x00000000 }, + { 0x00000005, 0x22201248, 0x16000220, 0x00030003 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00010001 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00020002 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00030003 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 }, + { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 }, + { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 }, + { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff }, + { 0x00000005, 0x23021288, 0x16000044, 0x00030003 }, + { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 }, + { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 }, + { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 }, + { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 }, + { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 }, + { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 }, + { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 }, + { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 }, + { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 }, + { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 }, + { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 }, + { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 }, + { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 }, + { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006ea2 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00460046 }, + { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006204 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00400040 }, + { 0x00400209, 0x22401868, 0x16690400, 0x00050005 }, + { 0x00000401, 0x22500608, 0x00000000, 0x01000100 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x28002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x28802248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x29002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000001 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2a802248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2b002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000002 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2c802248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2d002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000d000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000003 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb001 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2e802248, 0x00000200, 0x00000200 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x044eb002 }, + { 0x00000001, 0x22080608, 0x00000000, 0x0000e000 }, + { 0x02000031, 0x2f002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 }, + { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x25c00208, 0x008d0360, 0x00000000 }, + { 0x00200201, 0x23801a28, 0x004500e0, 0x00000000 }, + { 0x00200208, 0x24a01a28, 0x1e4500e0, 0x00010001 }, + { 0x00200208, 0x25c01a28, 0x1e4500e0, 0x00010001 }, + { 0x00000401, 0x23880608, 0x00000000, 0x000f000f }, + { 0x00000401, 0x24a80608, 0x00000000, 0x00070007 }, + { 0x00000401, 0x25c80608, 0x00000000, 0x00070007 }, + { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 }, + { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 }, + { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 }, + { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 }, + { 0x80600040, 0xc4001248, 0x16ae8400, 0x00800080 }, + { 0x80600040, 0xc4401248, 0x16ae8440, 0x00800080 }, + { 0x80600040, 0xc0001248, 0x16ae8000, 0x00800080 }, + { 0x80600040, 0xc0401248, 0x16ae8040, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 }, + { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 }, + { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 }, + { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 }, + { 0x80600040, 0xcc001248, 0x16ae8c00, 0x00800080 }, + { 0x80600040, 0xcc401248, 0x16ae8c40, 0x00800080 }, + { 0x80600040, 0xc8001248, 0x16ae8800, 0x00800080 }, + { 0x80600040, 0xc8401248, 0x16ae8840, 0x00800080 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 }, + { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 }, + { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 }, + { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 }, + { 0x80600040, 0xc4001248, 0x16ae8400, 0x00800080 }, + { 0x80600040, 0xc4401248, 0x16ae8440, 0x00800080 }, + { 0x80600040, 0xc0001248, 0x16ae8000, 0x00800080 }, + { 0x80600040, 0xc0401248, 0x16ae8040, 0x00800080 }, + { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 }, + { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 }, + { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 }, + { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 }, + { 0x80600040, 0xcc001248, 0x16ae8c00, 0x00800080 }, + { 0x80600040, 0xcc401248, 0x16ae8c40, 0x00800080 }, + { 0x80600040, 0xc8001248, 0x16ae8800, 0x00800080 }, + { 0x80600040, 0xc8401248, 0x16ae8840, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 }, + { 0x00800201, 0x23a02288, 0x00d28201, 0x00000000 }, + { 0x00800401, 0x23b02288, 0x00d28221, 0x00000000 }, + { 0x00800201, 0x23c02288, 0x00d28241, 0x00000000 }, + { 0x00800401, 0x23d02288, 0x00d28261, 0x00000000 }, + { 0x00600201, 0x24c02288, 0x00cf8401, 0x00000000 }, + { 0x00600601, 0x24c82288, 0x00cf8441, 0x00000000 }, + { 0x00600201, 0x25e02288, 0x00cf8001, 0x00000000 }, + { 0x00600601, 0x25e82288, 0x00cf8041, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x00800201, 0x23e02288, 0x00d28a01, 0x00000000 }, + { 0x00800401, 0x23f02288, 0x00d28a21, 0x00000000 }, + { 0x00800201, 0x24002288, 0x00d28a41, 0x00000000 }, + { 0x00800401, 0x24102288, 0x00d28a61, 0x00000000 }, + { 0x00600601, 0x24d02288, 0x00cf8c01, 0x00000000 }, + { 0x00600401, 0x24d82288, 0x00cf8c41, 0x00000000 }, + { 0x00600601, 0x25f02288, 0x00cf8801, 0x00000000 }, + { 0x00600401, 0x25f82288, 0x00cf8841, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x00800201, 0x24202288, 0x00d28201, 0x00000000 }, + { 0x00800401, 0x24302288, 0x00d28221, 0x00000000 }, + { 0x00800201, 0x24402288, 0x00d28241, 0x00000000 }, + { 0x00800401, 0x24502288, 0x00d28261, 0x00000000 }, + { 0x00600201, 0x24e02288, 0x00cf8401, 0x00000000 }, + { 0x00600601, 0x24e82288, 0x00cf8441, 0x00000000 }, + { 0x00600201, 0x26002288, 0x00cf8001, 0x00000000 }, + { 0x00600601, 0x26082288, 0x00cf8041, 0x00000000 }, + { 0x00800201, 0x24602288, 0x00d28a01, 0x00000000 }, + { 0x00800401, 0x24702288, 0x00d28a21, 0x00000000 }, + { 0x00800201, 0x24802288, 0x00d28a41, 0x00000000 }, + { 0x00800401, 0x24902288, 0x00d28a61, 0x00000000 }, + { 0x00600601, 0x24f02288, 0x00cf8c01, 0x00000000 }, + { 0x00600401, 0x24f82288, 0x00cf8c41, 0x00000000 }, + { 0x00600601, 0x26102288, 0x00cf8801, 0x00000000 }, + { 0x00600401, 0x26182288, 0x00cf8841, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x120a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x060a8019 }, + { 0x0c000031, 0x20002220, 0x060005c0, 0x060a801a }, + { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 }, diff --git a/src/shaders/post_processing/gen8/rgbx_to_nv12.asm b/src/shaders/post_processing/gen8/rgbx_to_nv12.asm new file mode 100644 index 0000000..14baafe --- /dev/null +++ b/src/shaders/post_processing/gen8/rgbx_to_nv12.asm @@ -0,0 +1,18 @@ +// Module name: AVS +.kernel RGBX_TO_NV12 +.code + +#include "VP_Setup.g8a" +#include "Set_Layer_0.g8a" +#include "Set_AVS_Buf_0123_BGRA.g8a" +#include "PA_AVS_Buf_0.g8a" +#include "PA_AVS_Buf_1.g8a" +#include "PA_AVS_Buf_2.g8a" +#include "PA_AVS_Buf_3.g8a" +#include "RGB_to_YUV.g8a" +#include "Save_AVS_NV12.g8a" +#include "EOT.g8a" + +.end_code + +.end_kernel diff --git a/src/shaders/post_processing/gen8/rgbx_to_nv12.g8b b/src/shaders/post_processing/gen8/rgbx_to_nv12.g8b new file mode 100644 index 0000000..4cc113b --- /dev/null +++ b/src/shaders/post_processing/gen8/rgbx_to_nv12.g8b @@ -0,0 +1,661 @@ + { 0x00600001, 0x23600208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23200208, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x23400208, 0x008d0000, 0x00000000 }, + { 0x00600041, 0x20603ae8, 0x3a8d0060, 0x000000f0 }, + { 0x00200001, 0x21141ae8, 0x004500e0, 0x00000000 }, + { 0x01000010, 0x20002220, 0x1600005a, 0x00010001 }, + { 0x00000008, 0x22201248, 0x16000044, 0x00000000 }, + { 0x00000005, 0x22201248, 0x16000220, 0x00030003 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00010001 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000090 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00020002 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x000000f0 }, + { 0x01000010, 0x20001261, 0x16000220, 0x00030003 }, + { 0x00010020, 0x34000005, 0x0e001400, 0x00000180 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000114 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000118 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x000001a0 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000118 }, + { 0x00000001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000120 }, + { 0x00110001, 0x220012e8, 0x00000040, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004114, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00000040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00000040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000220 }, + { 0x00000020, 0x34000004, 0x0e001400, 0x00000070 }, + { 0x00110001, 0x220012e8, 0x00000042, 0x00000000 }, + { 0x00110040, 0x22203ae8, 0x3a004118, 0x00000200 }, + { 0x00110040, 0x22203ae8, 0x3e000220, 0xc1800000 }, + { 0x00110001, 0x24003ae0, 0x000000c0, 0x00000000 }, + { 0x00110048, 0x20c03ae8, 0x3a000060, 0x00000220 }, + { 0x00000001, 0x24003ae0, 0x000000a0, 0x00000000 }, + { 0x00000048, 0x20a03ae8, 0x3a000080, 0x00000114 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00800001, 0x22a01648, 0x10000000, 0xffffffff }, + { 0x00000005, 0x23021288, 0x16000044, 0x00030003 }, + { 0x00000001, 0x23203ae8, 0x000000f8, 0x00000000 }, + { 0x00000001, 0x23383ae8, 0x000000f4, 0x00000000 }, + { 0x00000001, 0x23303ae8, 0x00000060, 0x00000000 }, + { 0x00000001, 0x23343ae8, 0x00000080, 0x00000000 }, + { 0x00000001, 0x23283ae8, 0x000000c0, 0x00000000 }, + { 0x00000001, 0x232c3ae8, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x233c0608, 0x00000000, 0x00000000 }, + { 0x00000040, 0x233c0208, 0x0600033c, 0x08000000 }, + { 0x00000001, 0x24083ae0, 0x000000c0, 0x00000000 }, + { 0x00000048, 0x24083ae0, 0x3e000060, 0x41000000 }, + { 0x00000248, 0x22e83ae8, 0x3e0000f4, 0x41e00000 }, + { 0x00000641, 0x22e43ae8, 0x3e000080, 0x40800000 }, + { 0x00000001, 0x24103ae0, 0x00000060, 0x00000000 }, + { 0x00000648, 0x22f03ae8, 0x3e0000f4, 0x41000000 }, + { 0x00000401, 0x22f40608, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006ea2 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00460046 }, + { 0x00400009, 0x22c01868, 0x16690400, 0x00050005 }, + { 0x00400001, 0x24003660, 0x30000000, 0x00006204 }, + { 0x00400040, 0x24001860, 0x16690400, 0x00400040 }, + { 0x00400209, 0x22401868, 0x16690400, 0x00050005 }, + { 0x00000401, 0x22500608, 0x00000000, 0x01000100 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x28002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000001 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2a002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000002 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2c002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22d00608, 0x00000000, 0x00400040 }, + { 0x00000001, 0x220c0208, 0x0000000c, 0x00000000 }, + { 0x00000040, 0x22000200, 0x060002f4, 0x050eb000 }, + { 0x00000001, 0x22080608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23240608, 0x00000000, 0x00000003 }, + { 0x00600001, 0x22200208, 0x008d0320, 0x00000000 }, + { 0x02000031, 0x2e002248, 0x00000200, 0x00000200 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00400001, 0x22001240, 0x006902c0, 0x00000000 }, + { 0x00400001, 0x22081240, 0x006902c0, 0x00000000 }, + { 0x02800005, 0x20001260, 0x1e000040, 0x00010001 }, + { 0x00010001, 0x22201048, 0x00000200, 0x00000000 }, + { 0x00010001, 0x22001040, 0x00000202, 0x00000000 }, + { 0x00010001, 0x22021240, 0x00000220, 0x00000000 }, + { 0x00600001, 0x238022e8, 0x00ae8201, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8211, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8401, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8411, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8001, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8011, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa001648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac001648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8001648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca012288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc012288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8012288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x238022e8, 0x00ae8221, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8231, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8421, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8431, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8021, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8031, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa201648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac201648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8201648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca212288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc212288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8212288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x238022e8, 0x00ae8241, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8251, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8441, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8451, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8041, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8051, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa401648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac401648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8401648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca412288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc412288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8412288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x238022e8, 0x00ae8261, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8271, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8461, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8471, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8061, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8071, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa601648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac601648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8601648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca612288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc612288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8612288, 0x00cf05c0, 0x00000000 }, + { 0x00600040, 0x22001040, 0x168d0200, 0x02000200 }, + { 0x00600001, 0x238022e8, 0x00ae8201, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8211, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8401, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8411, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8001, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8011, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa001648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac001648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8001648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca012288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc012288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8012288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x238022e8, 0x00ae8221, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8231, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8421, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8431, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8021, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8031, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa201648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac201648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8201648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca212288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc212288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8212288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x238022e8, 0x00ae8241, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8251, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8441, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8451, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8041, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8051, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa401648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac401648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8401648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca412288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc412288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8412288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x238022e8, 0x00ae8261, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8271, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8461, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8471, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8061, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8071, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa601648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac601648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8601648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca612288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc612288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8612288, 0x00cf05c0, 0x00000000 }, + { 0x00600040, 0x22001040, 0x168d0200, 0x02000200 }, + { 0x00600001, 0x238022e8, 0x00ae8201, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8211, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8401, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8411, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8001, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8011, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa001648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac001648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8001648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca012288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc012288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8012288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x238022e8, 0x00ae8221, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8231, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8421, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8431, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8021, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8031, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa201648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac201648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8201648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca212288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc212288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8212288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x238022e8, 0x00ae8241, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8251, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8441, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8451, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8041, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8051, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa401648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac401648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8401648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca412288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc412288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8412288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x238022e8, 0x00ae8261, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8271, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8461, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8471, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8061, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8071, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa601648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac601648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8601648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca612288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc612288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8612288, 0x00cf05c0, 0x00000000 }, + { 0x00600040, 0x22001040, 0x168d0200, 0x02000200 }, + { 0x00600001, 0x238022e8, 0x00ae8201, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8211, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8401, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8411, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8001, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8011, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa001648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac001648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8001648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca012288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc012288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8012288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x238022e8, 0x00ae8221, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8231, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8421, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8431, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8021, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8031, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa201648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac201648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8201648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca212288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc212288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8212288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x238022e8, 0x00ae8241, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8251, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8441, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8451, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8041, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8051, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa401648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac401648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8401648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca412288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc412288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8412288, 0x00cf05c0, 0x00000000 }, + { 0x00600001, 0x238022e8, 0x00ae8261, 0x00000000 }, + { 0x00600001, 0x23a022e8, 0x00ae8271, 0x00000000 }, + { 0x00600001, 0x23c022e8, 0x00ae8461, 0x00000000 }, + { 0x00600001, 0x23e022e8, 0x00ae8471, 0x00000000 }, + { 0x00600001, 0x240022e8, 0x00ae8061, 0x00000000 }, + { 0x00600001, 0x242022e8, 0x00ae8071, 0x00000000 }, + { 0x00800041, 0x24003ae0, 0x3e8d0380, 0x3e991687 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0x3f1645a2 }, + { 0x00800048, 0x24803ae8, 0x3e8d0400, 0x3de978d5 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0xbe2d0e56 }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbea978d5 }, + { 0x00800048, 0x24c03ae8, 0x3e8d0400, 0x3eff7cee }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x43000000 }, + { 0x00800048, 0x24003ae0, 0x3e8d0380, 0x3eff7cee }, + { 0x00800048, 0x24003ae0, 0x3e8d03c0, 0xbed60419 }, + { 0x00800048, 0x25003ae8, 0x3e8d0400, 0xbda6809d }, + { 0x00800001, 0x45403a68, 0x008d0480, 0x00000000 }, + { 0x00800001, 0x45803a68, 0x008d04c0, 0x00000000 }, + { 0x00800001, 0x45c03a68, 0x008d0500, 0x00000000 }, + { 0x00800001, 0xaa601648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xac601648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xa8601648, 0x10000000, 0x00000000 }, + { 0x00800001, 0xca612288, 0x00cf0540, 0x00000000 }, + { 0x00800001, 0xcc612288, 0x00cf0580, 0x00000000 }, + { 0x00800001, 0xc8612288, 0x00cf05c0, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 }, + { 0x00600001, 0x23800208, 0x008d0360, 0x00000000 }, + { 0x00600001, 0x24a00208, 0x008d0360, 0x00000000 }, + { 0x00200201, 0x23801a28, 0x004500e0, 0x00000000 }, + { 0x00000201, 0x24a01a28, 0x000000e0, 0x00000000 }, + { 0x00000608, 0x24a41a28, 0x1e0000e2, 0x00010001 }, + { 0x00000401, 0x23880608, 0x00000000, 0x000f000f }, + { 0x00000401, 0x24a80608, 0x00000000, 0x0007000f }, + { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 }, + { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 }, + { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 }, + { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8400, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8420, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8440, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8460, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xc4001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xc4401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8000, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8020, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8040, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8060, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc0001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc0401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 }, + { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 }, + { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 }, + { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8c00, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8c20, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8c40, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8c60, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xcc001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xcc401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8800, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8820, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8840, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8860, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc8001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc8401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x80800040, 0xa2001248, 0x16b18200, 0x00800080 }, + { 0x80800040, 0xa2201248, 0x16b18220, 0x00800080 }, + { 0x80800040, 0xa2401248, 0x16b18240, 0x00800080 }, + { 0x80800040, 0xa2601248, 0x16b18260, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8400, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8420, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8440, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8460, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xc4001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xc4401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8000, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8020, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8040, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8060, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc0001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc0401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x80800040, 0xaa001248, 0x16b18a00, 0x00800080 }, + { 0x80800040, 0xaa201248, 0x16b18a20, 0x00800080 }, + { 0x80800040, 0xaa401248, 0x16b18a40, 0x00800080 }, + { 0x80800040, 0xaa601248, 0x16b18a60, 0x00800080 }, + { 0x00600008, 0x45c01248, 0x1eae8c00, 0x00010001 }, + { 0x00600008, 0x45e01248, 0x1eae8c20, 0x00010001 }, + { 0x00600008, 0x46001248, 0x1eae8c40, 0x00010001 }, + { 0x00600008, 0x46201248, 0x1eae8c60, 0x00010001 }, + { 0x00600040, 0x45c01248, 0x12ae05c0, 0x00ae05e0 }, + { 0x80600040, 0xcc001248, 0x16ae05c0, 0x00800080 }, + { 0x00600040, 0x46001248, 0x12ae0600, 0x00ae0620 }, + { 0x80600040, 0xcc401248, 0x16ae0600, 0x00800080 }, + { 0x00600008, 0x46401248, 0x1eae8800, 0x00010001 }, + { 0x00600008, 0x46601248, 0x1eae8820, 0x00010001 }, + { 0x00600008, 0x46801248, 0x1eae8840, 0x00010001 }, + { 0x00600008, 0x46a01248, 0x1eae8860, 0x00010001 }, + { 0x00600040, 0x46401248, 0x12ae0640, 0x00ae0660 }, + { 0x80600040, 0xc8001248, 0x16ae0640, 0x00800080 }, + { 0x00600040, 0x46801248, 0x12ae0680, 0x00ae06a0 }, + { 0x80600040, 0xc8401248, 0x16ae0680, 0x00800080 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x02000200 }, + { 0x00800201, 0x23a02288, 0x00d28201, 0x00000000 }, + { 0x00800401, 0x23b02288, 0x00d28221, 0x00000000 }, + { 0x00800201, 0x23c02288, 0x00d28241, 0x00000000 }, + { 0x00800401, 0x23d02288, 0x00d28261, 0x00000000 }, + { 0x00600201, 0x44c02288, 0x00cf8401, 0x00000000 }, + { 0x00600601, 0x44d02288, 0x00cf8441, 0x00000000 }, + { 0x00600601, 0x44c12288, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0x44d12288, 0x00cf8041, 0x00000000 }, + { 0x00400040, 0x22001a40, 0x166902c0, 0x04000400 }, + { 0x00800201, 0x23e02288, 0x00d28a01, 0x00000000 }, + { 0x00800401, 0x23f02288, 0x00d28a21, 0x00000000 }, + { 0x00800201, 0x24002288, 0x00d28a41, 0x00000000 }, + { 0x00800401, 0x24102288, 0x00d28a61, 0x00000000 }, + { 0x00600201, 0x44e02288, 0x00cf8c01, 0x00000000 }, + { 0x00600601, 0x44f02288, 0x00cf8c41, 0x00000000 }, + { 0x00600601, 0x44e12288, 0x00cf8801, 0x00000000 }, + { 0x00600401, 0x44f12288, 0x00cf8841, 0x00000000 }, + { 0x00400040, 0x22081a40, 0x166902c0, 0x06000600 }, + { 0x00800201, 0x24202288, 0x00d28201, 0x00000000 }, + { 0x00800401, 0x24302288, 0x00d28221, 0x00000000 }, + { 0x00800201, 0x24402288, 0x00d28241, 0x00000000 }, + { 0x00800401, 0x24502288, 0x00d28261, 0x00000000 }, + { 0x00600201, 0x45002288, 0x00cf8401, 0x00000000 }, + { 0x00600601, 0x45102288, 0x00cf8441, 0x00000000 }, + { 0x00600601, 0x45012288, 0x00cf8001, 0x00000000 }, + { 0x00600401, 0x45112288, 0x00cf8041, 0x00000000 }, + { 0x00800201, 0x24602288, 0x00d28a01, 0x00000000 }, + { 0x00800401, 0x24702288, 0x00d28a21, 0x00000000 }, + { 0x00800201, 0x24802288, 0x00d28a41, 0x00000000 }, + { 0x00800401, 0x24902288, 0x00d28a61, 0x00000000 }, + { 0x00600201, 0x45202288, 0x00cf8c01, 0x00000000 }, + { 0x00600601, 0x45302288, 0x00cf8c41, 0x00000000 }, + { 0x00600601, 0x45212288, 0x00cf8801, 0x00000000 }, + { 0x00600401, 0x45312288, 0x00cf8841, 0x00000000 }, + { 0x0c000031, 0x20002220, 0x06000380, 0x120a8018 }, + { 0x0c000031, 0x20002220, 0x060004a0, 0x0a0a8019 }, + { 0x00600001, 0x2fe00208, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x20002220, 0x0e000fe0, 0x82000010 }, diff --git a/src/shaders/post_processing/gen8/sharpening_h_blur.g8b b/src/shaders/post_processing/gen8/sharpening_h_blur.g8b new file mode 100644 index 0000000..ffa759b --- /dev/null +++ b/src/shaders/post_processing/gen8/sharpening_h_blur.g8b @@ -0,0 +1,1718 @@ +{ 0x00000001, 0x23401608, 0x00000000, 0x00000000 }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000001, 0x23441608, 0x00000000, 0x00020002 }, +{ 0x00000001, 0x20480608, 0x00000000, 0x000f0007 }, +{ 0x00000001, 0x20401608, 0x00000000, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000340, 0x02490000 }, +{ 0x00000001, 0x20443ae8, 0x00000028, 0x00000000 }, +{ 0x00000001, 0x202c1608, 0x00000000, 0x00040004 }, +{ 0x0c600031, 0x20403a08, 0x00000040, 0x00000200 }, +{ 0x00000040, 0x20240208, 0x1e000020, 0xfffcfffc }, +{ 0x06000010, 0x20000201, 0x16000024, 0x00040004 }, +{ 0x00200001, 0x23283ae8, 0x004500b0, 0x00000000 }, +{ 0x00200001, 0x23083ae8, 0x004500a0, 0x00000000 }, +{ 0x00200001, 0x22e83ae8, 0x00450090, 0x00000000 }, +{ 0x00200001, 0x22c83ae8, 0x00450080, 0x00000000 }, +{ 0x00200001, 0x22a83ae8, 0x00450070, 0x00000000 }, +{ 0x00200001, 0x22883ae8, 0x00450060, 0x00000000 }, +{ 0x00200001, 0x22683ae8, 0x00450050, 0x00000000 }, +{ 0x00200001, 0x22483ae8, 0x00450040, 0x00000000 }, +{ 0x00200001, 0x23383ae8, 0x004500b8, 0x00000000 }, +{ 0x00200001, 0x23183ae8, 0x004500a8, 0x00000000 }, +{ 0x00200001, 0x22f83ae8, 0x00450098, 0x00000000 }, +{ 0x00200001, 0x22d83ae8, 0x00450088, 0x00000000 }, +{ 0x00200001, 0x22b83ae8, 0x00450078, 0x00000000 }, +{ 0x00200001, 0x22983ae8, 0x00450068, 0x00000000 }, +{ 0x00200001, 0x22783ae8, 0x00450058, 0x00000000 }, +{ 0x00200001, 0x22583ae8, 0x00450048, 0x00000000 }, +{ 0x00010020, 0x34000005, 0x0e001400, 0x000067d0 }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000340, 0x02490000 }, +{ 0x00200001, 0x22603ae8, 0x00450268, 0x00000000 }, +{ 0x00200001, 0x22403ae8, 0x00450248, 0x00000000 }, +{ 0x00200001, 0x22803ae8, 0x00450288, 0x00000000 }, +{ 0x00200001, 0x22c03ae8, 0x004502c8, 0x00000000 }, +{ 0x00200001, 0x22a03ae8, 0x004502a8, 0x00000000 }, +{ 0x00200001, 0x23203ae8, 0x00450328, 0x00000000 }, +{ 0x00000001, 0x20480608, 0x00000000, 0x000f0007 }, +{ 0x00200001, 0x23003ae8, 0x00450308, 0x00000000 }, +{ 0x00200001, 0x22e03ae8, 0x004502e8, 0x00000000 }, +{ 0x00000040, 0x20400208, 0x1600002c, 0x00040004 }, +{ 0x00000001, 0x20443ae8, 0x00000028, 0x00000000 }, +{ 0x0c600031, 0x20403a08, 0x00000040, 0x00000200 }, +{ 0x00200001, 0x22703ae8, 0x00450278, 0x00000000 }, +{ 0x00200001, 0x22503ae8, 0x00450258, 0x00000000 }, +{ 0x00200001, 0x22683ae8, 0x00450050, 0x00000000 }, +{ 0x00200001, 0x22483ae8, 0x00450040, 0x00000000 }, +{ 0x00200001, 0x22783ae8, 0x00450058, 0x00000000 }, +{ 0x00200001, 0x22583ae8, 0x00450048, 0x00000000 }, +{ 0x00000001, 0x204022e8, 0x00000240, 0x00000000 }, +{ 0x00200001, 0x22903ae8, 0x00450298, 0x00000000 }, +{ 0x00000001, 0x204422e8, 0x00000250, 0x00000000 }, +{ 0x00000001, 0x204822e8, 0x00000260, 0x00000000 }, +{ 0x00200001, 0x22883ae8, 0x00450060, 0x00000000 }, +{ 0x00200001, 0x22d03ae8, 0x004502d8, 0x00000000 }, +{ 0x00200001, 0x22b03ae8, 0x004502b8, 0x00000000 }, +{ 0x00200001, 0x22983ae8, 0x00450068, 0x00000000 }, +{ 0x00000001, 0x204c22e8, 0x00000270, 0x00000000 }, +{ 0x00200001, 0x22c83ae8, 0x00450080, 0x00000000 }, +{ 0x00200001, 0x22a83ae8, 0x00450070, 0x00000000 }, +{ 0x00000001, 0x205022e8, 0x00000280, 0x00000000 }, +{ 0x00200001, 0x22d83ae8, 0x00450088, 0x00000000 }, +{ 0x00200001, 0x22b83ae8, 0x00450078, 0x00000000 }, +{ 0x00200001, 0x23303ae8, 0x00450338, 0x00000000 }, +{ 0x00200001, 0x23103ae8, 0x00450318, 0x00000000 }, +{ 0x00200001, 0x22f03ae8, 0x004502f8, 0x00000000 }, +{ 0x00000001, 0x205422e8, 0x00000290, 0x00000000 }, +{ 0x00200001, 0x22e83ae8, 0x00450090, 0x00000000 }, +{ 0x00200001, 0x23083ae8, 0x004500a0, 0x00000000 }, +{ 0x00200001, 0x23283ae8, 0x004500b0, 0x00000000 }, +{ 0x00000001, 0x205822e8, 0x000002a0, 0x00000000 }, +{ 0x00000001, 0x206022e8, 0x000002c0, 0x00000000 }, +{ 0x00000001, 0x206422e8, 0x000002d0, 0x00000000 }, +{ 0x00000001, 0x205c22e8, 0x000002b0, 0x00000000 }, +{ 0x00200001, 0x23383ae8, 0x004500b8, 0x00000000 }, +{ 0x00200001, 0x23183ae8, 0x004500a8, 0x00000000 }, +{ 0x00200001, 0x22f83ae8, 0x00450098, 0x00000000 }, +{ 0x00000001, 0x206822e8, 0x000002e0, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c1, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000241, 0x00000000 }, +{ 0x00000001, 0x20303ee8, 0x00000000, 0x332bcc77 }, +{ 0x00000001, 0x21403ee8, 0x00000000, 0x3c1d98ad }, +{ 0x00000001, 0x206c22e8, 0x000002f0, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d1, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000251, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000261, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e1, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f1, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000271, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000281, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000301, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000311, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000291, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a1, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000321, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000331, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b1, 0x00000000 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00000001, 0x208022e8, 0x00000242, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c2, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000252, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000262, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000272, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000282, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000292, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a2, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872003 }, +{ 0x00000001, 0x209c22e8, 0x000002b2, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d2, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00000001, 0x208022e8, 0x00000243, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e2, 0x00000000 }, +{ 0x00000001, 0x207022e8, 0x00000300, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f2, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000253, 0x00000000 }, +{ 0x00000001, 0x207422e8, 0x00000310, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000263, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000302, 0x00000000 }, +{ 0x00000001, 0x207822e8, 0x00000320, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000312, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000273, 0x00000000 }, +{ 0x00000001, 0x207c22e8, 0x00000330, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000283, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000322, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72003 }, +{ 0x00000001, 0x20bc22e8, 0x00000332, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000293, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a3, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00000001, 0x209c22e8, 0x000002b3, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c3, 0x00000000 }, +{ 0x00000001, 0x21503ee8, 0x00000000, 0x3e525448 }, +{ 0x00000001, 0x21603ee8, 0x00000000, 0x3f11e168 }, +{ 0x00000001, 0x20a422e8, 0x000002d3, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e3, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f3, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000303, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x20b422e8, 0x00000313, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000323, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000244, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000254, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000333, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x00000001, 0x208822e8, 0x00000264, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000274, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c4, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d4, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000284, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000294, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e4, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f4, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a4, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b4, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000304, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000314, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x00000001, 0x208022e8, 0x00000245, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000324, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000334, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000255, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000265, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x00000001, 0x20a022e8, 0x000002c5, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000275, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000285, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d5, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e5, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000295, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a5, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f5, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000305, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b5, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x20b422e8, 0x00000315, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000325, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000246, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000256, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000335, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x00000001, 0x208822e8, 0x00000266, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000276, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c6, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000286, 0x00000000 }, +{ 0x00000001, 0x21703ee8, 0x00000000, 0x3875735f }, +{ 0x00000001, 0x20a422e8, 0x000002d6, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e6, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f6, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000306, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000296, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000316, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000326, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a6, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b6, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000336, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00000001, 0x208022e8, 0x00000247, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c7, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d7, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000257, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000267, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e7, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f7, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000277, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000287, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000307, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000317, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000297, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a7, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000327, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000337, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b7, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072017 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472017 }, +{ 0x00000001, 0x20a022e8, 0x000002c8, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000248, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000258, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d8, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e8, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000268, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000278, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f8, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000308, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000288, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000298, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000318, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000328, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a8, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b8, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000338, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00000001, 0x21803a28, 0x00000100, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000104, 0x00000000 }, +{ 0x00000001, 0x61a00a88, 0x00000180, 0x00000000 }, +{ 0x00000001, 0x206022e8, 0x000002c1, 0x00000000 }, +{ 0x00000001, 0x204022e8, 0x00000241, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000242, 0x00000000 }, +{ 0x00000001, 0x61a80a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000108, 0x00000000 }, +{ 0x00000001, 0x61b00a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000010c, 0x00000000 }, +{ 0x00000001, 0x61b80a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000110, 0x00000000 }, +{ 0x00000001, 0x61c00a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000114, 0x00000000 }, +{ 0x00000001, 0x61c80a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000118, 0x00000000 }, +{ 0x00000001, 0x61d00a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000011c, 0x00000000 }, +{ 0x00000001, 0x61d80a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000120, 0x00000000 }, +{ 0x00000001, 0x61e00a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000124, 0x00000000 }, +{ 0x00000001, 0x61e80a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000128, 0x00000000 }, +{ 0x00000001, 0x61f00a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000012c, 0x00000000 }, +{ 0x00000001, 0x61f80a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000130, 0x00000000 }, +{ 0x00000001, 0x206422e8, 0x000002d1, 0x00000000 }, +{ 0x00000001, 0x204422e8, 0x00000251, 0x00000000 }, +{ 0x00000001, 0x62000a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000134, 0x00000000 }, +{ 0x00000001, 0x204822e8, 0x00000261, 0x00000000 }, +{ 0x00000001, 0x206822e8, 0x000002e1, 0x00000000 }, +{ 0x00000001, 0x206c22e8, 0x000002f1, 0x00000000 }, +{ 0x00000001, 0x204c22e8, 0x00000271, 0x00000000 }, +{ 0x00000001, 0x62080a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000138, 0x00000000 }, +{ 0x00000001, 0x205022e8, 0x00000281, 0x00000000 }, +{ 0x00000001, 0x207022e8, 0x00000301, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000252, 0x00000000 }, +{ 0x00000001, 0x207422e8, 0x00000311, 0x00000000 }, +{ 0x00000001, 0x205422e8, 0x00000291, 0x00000000 }, +{ 0x00000001, 0x62100a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000262, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000013c, 0x00000000 }, +{ 0x00000001, 0x205822e8, 0x000002a1, 0x00000000 }, +{ 0x00000001, 0x207822e8, 0x00000321, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000272, 0x00000000 }, +{ 0x00000001, 0x207c22e8, 0x00000331, 0x00000000 }, +{ 0x00000001, 0x205c22e8, 0x000002b1, 0x00000000 }, +{ 0x00000001, 0x62180a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000282, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c2, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000292, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d2, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e2, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f2, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000302, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000312, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a2, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000322, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000332, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b2, 0x00000000 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00000001, 0x208022e8, 0x00000243, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c3, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872003 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72003 }, +{ 0x00000001, 0x208422e8, 0x00000253, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d3, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000263, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000273, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e3, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f3, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000283, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000293, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000303, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000313, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a3, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b3, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000323, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000333, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00000001, 0x208022e8, 0x00000244, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00000001, 0x20a022e8, 0x000002c4, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000254, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000264, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d4, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e4, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000274, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000284, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f4, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000304, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000294, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a4, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000314, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000324, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b4, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x20bc22e8, 0x00000334, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x00000001, 0x208022e8, 0x00000245, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000255, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c5, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000265, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d5, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000275, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e5, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f5, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000285, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000295, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000305, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000315, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a5, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b5, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000325, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000335, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x00000001, 0x208022e8, 0x00000246, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x00000001, 0x20a022e8, 0x000002c6, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000256, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000266, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d6, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e6, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000276, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000286, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f6, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000306, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000296, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a6, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000316, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000326, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b6, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x20bc22e8, 0x00000336, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x00000001, 0x208022e8, 0x00000247, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000257, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c7, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d7, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000267, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000277, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e7, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f7, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000287, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000297, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000307, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000317, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a7, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b7, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000327, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000337, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00000001, 0x208022e8, 0x00000248, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c8, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000258, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000268, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000278, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000288, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000298, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a8, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b8, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072017 }, +{ 0x00000001, 0x208022e8, 0x00000249, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000259, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000269, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000279, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000289, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000299, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a9, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d8, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b9, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00000001, 0x20a822e8, 0x000002e8, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f8, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000100, 0x00000000 }, +{ 0x00000001, 0x61a10a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000308, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000318, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000104, 0x00000000 }, +{ 0x00000001, 0x61a90a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000328, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000338, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000108, 0x00000000 }, +{ 0x00000001, 0x61b10a88, 0x00000220, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472017 }, +{ 0x00000001, 0x20a022e8, 0x000002c9, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000010c, 0x00000000 }, +{ 0x00000001, 0x61b90a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d9, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e9, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000110, 0x00000000 }, +{ 0x00000001, 0x61c10a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f9, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000309, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000114, 0x00000000 }, +{ 0x00000001, 0x61c90a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000319, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000329, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000118, 0x00000000 }, +{ 0x00000001, 0x61d10a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000339, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x00000001, 0x22203a28, 0x0000011c, 0x00000000 }, +{ 0x00000001, 0x61d90a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000120, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c3, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000243, 0x00000000 }, +{ 0x00000001, 0x206022e8, 0x000002c2, 0x00000000 }, +{ 0x00000001, 0x204022e8, 0x00000242, 0x00000000 }, +{ 0x00000001, 0x61e10a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000124, 0x00000000 }, +{ 0x00000001, 0x61e90a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000128, 0x00000000 }, +{ 0x00000001, 0x61f10a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000012c, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d3, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000253, 0x00000000 }, +{ 0x00000001, 0x61f90a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x206422e8, 0x000002d2, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e3, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000263, 0x00000000 }, +{ 0x00000001, 0x204422e8, 0x00000252, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000130, 0x00000000 }, +{ 0x00000001, 0x62010a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x204822e8, 0x00000262, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000273, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f3, 0x00000000 }, +{ 0x00000001, 0x206822e8, 0x000002e2, 0x00000000 }, +{ 0x00000001, 0x206c22e8, 0x000002f2, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000303, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000283, 0x00000000 }, +{ 0x00000001, 0x204c22e8, 0x00000272, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000134, 0x00000000 }, +{ 0x00000001, 0x62090a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x205022e8, 0x00000282, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000293, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000313, 0x00000000 }, +{ 0x00000001, 0x207022e8, 0x00000302, 0x00000000 }, +{ 0x00000001, 0x207422e8, 0x00000312, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000323, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a3, 0x00000000 }, +{ 0x00000001, 0x205422e8, 0x00000292, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000138, 0x00000000 }, +{ 0x00000001, 0x62110a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x205822e8, 0x000002a2, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b3, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000333, 0x00000000 }, +{ 0x00000001, 0x207822e8, 0x00000322, 0x00000000 }, +{ 0x00000001, 0x207c22e8, 0x00000332, 0x00000000 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00000001, 0x205c22e8, 0x000002b2, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000013c, 0x00000000 }, +{ 0x00000001, 0x62190a88, 0x00000220, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872003 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72003 }, +{ 0x00000001, 0x208022e8, 0x00000244, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c4, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000254, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d4, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000264, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e4, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f4, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000274, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000284, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000304, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000314, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000294, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a4, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000324, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000334, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b4, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00000001, 0x20a022e8, 0x000002c5, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000245, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000255, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d5, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e5, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000265, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000275, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f5, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000305, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000285, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000295, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000315, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000325, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a5, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b5, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000335, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x208022e8, 0x00000246, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c6, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d6, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000256, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000266, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e6, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f6, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000276, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000286, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000306, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000316, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000296, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a6, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000326, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000336, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b6, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x00000001, 0x208022e8, 0x00000247, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x00000001, 0x208422e8, 0x00000257, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c7, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000267, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000277, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d7, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e7, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000287, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000297, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f7, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000307, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a7, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b7, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000317, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000327, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x208022e8, 0x00000248, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000337, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x00000001, 0x208422e8, 0x00000258, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000268, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c8, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d8, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000278, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000288, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e8, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f8, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000298, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a8, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000308, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000318, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b8, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00000001, 0x20b822e8, 0x00000328, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000338, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000249, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000259, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00000001, 0x20a022e8, 0x000002c9, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000269, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000279, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d9, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e9, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000289, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000299, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f9, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000309, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a9, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b9, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000319, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000329, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072017 }, +{ 0x00000001, 0x20bc22e8, 0x00000339, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x0000024a, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472017 }, +{ 0x00000001, 0x20a022e8, 0x000002ca, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025a, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026a, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002da, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002ea, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027a, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028a, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fa, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030a, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029a, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002aa, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031a, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032a, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002ba, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00000001, 0x20bc22e8, 0x0000033a, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x00000001, 0x22203a28, 0x00000100, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000104, 0x00000000 }, +{ 0x00000001, 0x41a22288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000108, 0x00000000 }, +{ 0x00000001, 0x41aa2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000010c, 0x00000000 }, +{ 0x00000001, 0x41b22288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000110, 0x00000000 }, +{ 0x00000001, 0x41ba2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000114, 0x00000000 }, +{ 0x00000001, 0x41c22288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000118, 0x00000000 }, +{ 0x00000001, 0x41ca2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000011c, 0x00000000 }, +{ 0x00000001, 0x41d22288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000120, 0x00000000 }, +{ 0x00000001, 0x41da2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000124, 0x00000000 }, +{ 0x00000001, 0x41e22288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x41ea2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000128, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c4, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000244, 0x00000000 }, +{ 0x00000001, 0x204022e8, 0x00000243, 0x00000000 }, +{ 0x00000001, 0x206022e8, 0x000002c3, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000012c, 0x00000000 }, +{ 0x00000001, 0x41f22288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d4, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000254, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000130, 0x00000000 }, +{ 0x00000001, 0x41fa2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x204422e8, 0x00000253, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e4, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000264, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x204822e8, 0x00000263, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f4, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000274, 0x00000000 }, +{ 0x00000001, 0x206422e8, 0x000002d3, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000134, 0x00000000 }, +{ 0x00000001, 0x42022288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x206822e8, 0x000002e3, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000284, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000304, 0x00000000 }, +{ 0x00000001, 0x204c22e8, 0x00000273, 0x00000000 }, +{ 0x00000001, 0x205022e8, 0x00000283, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000314, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000294, 0x00000000 }, +{ 0x00000001, 0x206c22e8, 0x000002f3, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000138, 0x00000000 }, +{ 0x00000001, 0x420a2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x207022e8, 0x00000303, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a4, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000324, 0x00000000 }, +{ 0x00000001, 0x205422e8, 0x00000293, 0x00000000 }, +{ 0x00000001, 0x205822e8, 0x000002a3, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000334, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b4, 0x00000000 }, +{ 0x00000001, 0x207422e8, 0x00000313, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000013c, 0x00000000 }, +{ 0x00000001, 0x42122288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x207822e8, 0x00000323, 0x00000000 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00000001, 0x205c22e8, 0x000002b3, 0x00000000 }, +{ 0x00000001, 0x207c22e8, 0x00000333, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872003 }, +{ 0x00000001, 0x421a2288, 0x00000024, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72003 }, +{ 0x00000001, 0x208022e8, 0x00000245, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c5, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000255, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d5, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000265, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000275, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e5, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f5, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000285, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000295, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000305, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000315, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a5, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b5, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000325, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000335, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00000001, 0x208022e8, 0x00000246, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00000001, 0x20a022e8, 0x000002c6, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000256, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000266, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d6, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e6, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000276, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000286, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f6, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000306, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000296, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a6, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000316, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000326, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b6, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x20bc22e8, 0x00000336, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x00000001, 0x208022e8, 0x00000247, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000257, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c7, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d7, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000267, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000277, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e7, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f7, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000287, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000297, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000307, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000317, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a7, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b7, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000327, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000337, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x00000001, 0x208022e8, 0x00000248, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000258, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000268, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c8, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000278, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000288, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d8, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e8, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000298, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a8, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f8, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000308, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b8, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x20b422e8, 0x00000318, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000328, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000249, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000259, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000338, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x00000001, 0x208822e8, 0x00000269, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000279, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c9, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d9, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000289, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000299, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e9, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f9, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a9, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b9, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000309, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000319, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00000001, 0x208022e8, 0x0000024a, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000329, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000339, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025a, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026a, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00000001, 0x20a022e8, 0x000002ca, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027a, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028a, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002da, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002ea, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029a, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002aa, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fa, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030a, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002ba, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031a, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032a, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072017 }, +{ 0x00000001, 0x20bc22e8, 0x0000033a, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x0000024b, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472017 }, +{ 0x00000001, 0x20a022e8, 0x000002cb, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025b, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026b, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002db, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002eb, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027b, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028b, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fb, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030b, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029b, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002ab, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031b, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032b, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002bb, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00000001, 0x20bc22e8, 0x0000033b, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x00000001, 0x22203a28, 0x00000100, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000104, 0x00000000 }, +{ 0x00000001, 0x41a32288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000108, 0x00000000 }, +{ 0x00000001, 0x41ab2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000010c, 0x00000000 }, +{ 0x00000001, 0x41b32288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000110, 0x00000000 }, +{ 0x00000001, 0x41bb2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000114, 0x00000000 }, +{ 0x00000001, 0x41c32288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000118, 0x00000000 }, +{ 0x00000001, 0x41cb2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000011c, 0x00000000 }, +{ 0x00000001, 0x41d32288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000120, 0x00000000 }, +{ 0x00000001, 0x41db2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000124, 0x00000000 }, +{ 0x00000001, 0x41e32288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x41eb2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000128, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c5, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000245, 0x00000000 }, +{ 0x00000001, 0x206022e8, 0x000002c4, 0x00000000 }, +{ 0x00000001, 0x204022e8, 0x00000244, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000012c, 0x00000000 }, +{ 0x00000001, 0x41f32288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000130, 0x00000000 }, +{ 0x00000001, 0x41fb2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d5, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000255, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e5, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000265, 0x00000000 }, +{ 0x00000001, 0x206422e8, 0x000002d4, 0x00000000 }, +{ 0x00000001, 0x204422e8, 0x00000254, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000134, 0x00000000 }, +{ 0x00000001, 0x42032288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x204822e8, 0x00000264, 0x00000000 }, +{ 0x00000001, 0x206822e8, 0x000002e4, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000275, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f5, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000305, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000285, 0x00000000 }, +{ 0x00000001, 0x206c22e8, 0x000002f4, 0x00000000 }, +{ 0x00000001, 0x204c22e8, 0x00000274, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000138, 0x00000000 }, +{ 0x00000001, 0x420b2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x205022e8, 0x00000284, 0x00000000 }, +{ 0x00000001, 0x207022e8, 0x00000304, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000295, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000315, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000325, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a5, 0x00000000 }, +{ 0x00000001, 0x207422e8, 0x00000314, 0x00000000 }, +{ 0x00000001, 0x205422e8, 0x00000294, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000013c, 0x00000000 }, +{ 0x00000001, 0x42132288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x205822e8, 0x000002a4, 0x00000000 }, +{ 0x00000001, 0x207822e8, 0x00000324, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b5, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000335, 0x00000000 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00000001, 0x207c22e8, 0x00000334, 0x00000000 }, +{ 0x00000001, 0x205c22e8, 0x000002b4, 0x00000000 }, +{ 0x00000001, 0x421b2288, 0x00000024, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872003 }, +{ 0x00000001, 0x208022e8, 0x00000246, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c6, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72003 }, +{ 0x00000001, 0x208422e8, 0x00000256, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000266, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d6, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000276, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000286, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e6, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f6, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000296, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a6, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000306, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000316, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b6, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00000001, 0x20b822e8, 0x00000326, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000336, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000247, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000257, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00000001, 0x20a022e8, 0x000002c7, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000267, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000277, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d7, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e7, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000287, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000297, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f7, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000307, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a7, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b7, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000317, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000327, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x208022e8, 0x00000248, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000337, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x00000001, 0x208422e8, 0x00000258, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000268, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c8, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d8, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000278, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000288, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e8, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f8, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000298, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a8, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000308, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000318, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b8, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000328, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x00000001, 0x20bc22e8, 0x00000338, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000249, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000259, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000269, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x00000001, 0x208c22e8, 0x00000279, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000289, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c9, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d9, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000299, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a9, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e9, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f9, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b9, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x20b022e8, 0x00000309, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000319, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x0000024a, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025a, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000329, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000339, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026a, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027a, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x00000001, 0x20a022e8, 0x000002ca, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028a, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029a, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002da, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002ea, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002aa, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002ba, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fa, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030a, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00000001, 0x208022e8, 0x0000024b, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031a, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032a, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025b, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026b, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033a, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00000001, 0x208c22e8, 0x0000027b, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028b, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002cb, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002db, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029b, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002ab, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002eb, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fb, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002bb, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030b, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072017 }, +{ 0x00000001, 0x20b422e8, 0x0000031b, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x0000024c, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025c, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026c, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027c, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028c, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029c, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002ac, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002bc, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00000001, 0x22203a28, 0x00000100, 0x00000000 }, +{ 0x00000001, 0x61a40a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000104, 0x00000000 }, +{ 0x00000001, 0x61ac0a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032b, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000108, 0x00000000 }, +{ 0x00000001, 0x61b40a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033b, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472017 }, +{ 0x00000001, 0x22203a28, 0x0000010c, 0x00000000 }, +{ 0x00000001, 0x61bc0a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002cc, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002dc, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000110, 0x00000000 }, +{ 0x00000001, 0x61c40a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002ec, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fc, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000114, 0x00000000 }, +{ 0x00000001, 0x61cc0a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030c, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031c, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000118, 0x00000000 }, +{ 0x00000001, 0x61d40a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032c, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033c, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000011c, 0x00000000 }, +{ 0x00000001, 0x61dc0a88, 0x00000220, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x00000001, 0x22203a28, 0x00000120, 0x00000000 }, +{ 0x00000001, 0x61e40a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000124, 0x00000000 }, +{ 0x00000001, 0x61ec0a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000128, 0x00000000 }, +{ 0x00000001, 0x61f40a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000012c, 0x00000000 }, +{ 0x00000001, 0x61fc0a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000130, 0x00000000 }, +{ 0x00000001, 0x62040a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000134, 0x00000000 }, +{ 0x00000001, 0x620c0a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000138, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c6, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000246, 0x00000000 }, +{ 0x00000001, 0x206022e8, 0x000002c5, 0x00000000 }, +{ 0x00000001, 0x204022e8, 0x00000245, 0x00000000 }, +{ 0x00000001, 0x62140a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d6, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000256, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000266, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e6, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f6, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000276, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000286, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000306, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000316, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000296, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a6, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000326, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000336, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b6, 0x00000000 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00000001, 0x206422e8, 0x000002d5, 0x00000000 }, +{ 0x00000001, 0x204422e8, 0x00000255, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000247, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c7, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000257, 0x00000000 }, +{ 0x00000001, 0x204822e8, 0x00000265, 0x00000000 }, +{ 0x00000001, 0x206822e8, 0x000002e5, 0x00000000 }, +{ 0x00000001, 0x206c22e8, 0x000002f5, 0x00000000 }, +{ 0x00000001, 0x204c22e8, 0x00000275, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000267, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000277, 0x00000000 }, +{ 0x00000001, 0x205022e8, 0x00000285, 0x00000000 }, +{ 0x00000001, 0x207022e8, 0x00000305, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d7, 0x00000000 }, +{ 0x00000001, 0x207422e8, 0x00000315, 0x00000000 }, +{ 0x00000001, 0x205422e8, 0x00000295, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000287, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e7, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000297, 0x00000000 }, +{ 0x00000001, 0x205822e8, 0x000002a5, 0x00000000 }, +{ 0x00000001, 0x207822e8, 0x00000325, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f7, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000013c, 0x00000000 }, +{ 0x00000001, 0x207c22e8, 0x00000335, 0x00000000 }, +{ 0x00000001, 0x205c22e8, 0x000002b5, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a7, 0x00000000 }, +{ 0x00000001, 0x621c0a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000307, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b7, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872003 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72003 }, +{ 0x00000001, 0x20b422e8, 0x00000317, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00000001, 0x20b822e8, 0x00000327, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000248, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000258, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000268, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000337, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000278, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000288, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00000001, 0x20a022e8, 0x000002c8, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000298, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a8, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d8, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e8, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b8, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x20ac22e8, 0x000002f8, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000308, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000249, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000259, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000318, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000328, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000269, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000279, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000338, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x00000001, 0x209022e8, 0x00000289, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000299, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c9, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d9, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a9, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b9, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e9, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f9, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x00000001, 0x208022e8, 0x0000024a, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000309, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000319, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025a, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026a, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000329, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000339, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027a, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028a, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x00000001, 0x20a022e8, 0x000002ca, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029a, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002aa, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002da, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002ea, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002ba, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fa, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x20b022e8, 0x0000030a, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x0000024b, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025b, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026b, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031a, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027b, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028b, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032a, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033a, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029b, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002ab, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x00000001, 0x20a022e8, 0x000002cb, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002bb, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00000001, 0x20a422e8, 0x000002db, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002eb, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x0000024c, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025c, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fb, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030b, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026c, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027c, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031b, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032b, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028c, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029c, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033b, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00000001, 0x209822e8, 0x000002ac, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002bc, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002cc, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002dc, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072017 }, +{ 0x00000001, 0x208022e8, 0x0000024d, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002ec, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fc, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025d, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026d, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030c, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031c, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027d, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028d, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032c, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033c, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029d, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002ad, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472017 }, +{ 0x00000001, 0x20a022e8, 0x000002cd, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002bd, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002dd, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00000001, 0x20a822e8, 0x000002ed, 0x00000000 }, +{ 0x00000001, 0x204022e8, 0x00000246, 0x00000000 }, +{ 0x00000001, 0x206022e8, 0x000002c6, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000100, 0x00000000 }, +{ 0x00000001, 0x61a50a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000104, 0x00000000 }, +{ 0x00000001, 0x61ad0a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000108, 0x00000000 }, +{ 0x00000001, 0x61b50a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000010c, 0x00000000 }, +{ 0x00000001, 0x61bd0a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000110, 0x00000000 }, +{ 0x00000001, 0x61c50a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fd, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000114, 0x00000000 }, +{ 0x00000001, 0x61cd0a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030d, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031d, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000118, 0x00000000 }, +{ 0x00000001, 0x61d50a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032d, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033d, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000011c, 0x00000000 }, +{ 0x00000001, 0x61dd0a88, 0x00000220, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x00000001, 0x22203a28, 0x00000120, 0x00000000 }, +{ 0x00000001, 0x61e50a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000124, 0x00000000 }, +{ 0x00000001, 0x61ed0a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000128, 0x00000000 }, +{ 0x00000001, 0x61f50a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000012c, 0x00000000 }, +{ 0x00000001, 0x61fd0a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000130, 0x00000000 }, +{ 0x00000001, 0x204422e8, 0x00000256, 0x00000000 }, +{ 0x00000001, 0x62050a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000134, 0x00000000 }, +{ 0x00000001, 0x204822e8, 0x00000266, 0x00000000 }, +{ 0x00000001, 0x204c22e8, 0x00000276, 0x00000000 }, +{ 0x00000001, 0x620d0a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000138, 0x00000000 }, +{ 0x00000001, 0x205022e8, 0x00000286, 0x00000000 }, +{ 0x00000001, 0x205422e8, 0x00000296, 0x00000000 }, +{ 0x00000001, 0x62150a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x206422e8, 0x000002d6, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000013c, 0x00000000 }, +{ 0x00000001, 0x205822e8, 0x000002a6, 0x00000000 }, +{ 0x00000001, 0x206822e8, 0x000002e6, 0x00000000 }, +{ 0x00000001, 0x205c22e8, 0x000002b6, 0x00000000 }, +{ 0x00000001, 0x621d0a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x206c22e8, 0x000002f6, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c7, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000247, 0x00000000 }, +{ 0x00000001, 0x207022e8, 0x00000306, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d7, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000257, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000267, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e7, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f7, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000277, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000287, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000307, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000317, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000297, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a7, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000327, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000337, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b7, 0x00000000 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00000001, 0x208022e8, 0x00000248, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c8, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872003 }, +{ 0x00000001, 0x208422e8, 0x00000258, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000268, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000278, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000288, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000298, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a8, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d8, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b8, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e8, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00000001, 0x208022e8, 0x00000249, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f8, 0x00000000 }, +{ 0x00000001, 0x207422e8, 0x00000316, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000308, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000259, 0x00000000 }, +{ 0x00000001, 0x207822e8, 0x00000326, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000269, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000318, 0x00000000 }, +{ 0x00000001, 0x207c22e8, 0x00000336, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000328, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000279, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72003 }, +{ 0x00000001, 0x209022e8, 0x00000289, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000338, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00000001, 0x209422e8, 0x00000299, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a9, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c9, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d9, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b9, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e9, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x20ac22e8, 0x000002f9, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x0000024a, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025a, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026a, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000309, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027a, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028a, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000319, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000329, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029a, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002aa, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000339, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x00000001, 0x209c22e8, 0x000002ba, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x00000001, 0x20a022e8, 0x000002ca, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002da, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x0000024b, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025b, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002ea, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fa, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026b, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027b, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030a, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031a, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028b, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029b, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032a, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033a, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002ab, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002bb, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x00000001, 0x20a022e8, 0x000002cb, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x208022e8, 0x0000024c, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002db, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002eb, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025c, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026c, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fb, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030b, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027c, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028c, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031b, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032b, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029c, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002ac, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033b, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x00000001, 0x209c22e8, 0x000002bc, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002cc, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002dc, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00000001, 0x20a822e8, 0x000002ec, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fc, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030c, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031c, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032c, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033c, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x0000024d, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00000001, 0x20a022e8, 0x000002cd, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025d, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026d, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002dd, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002ed, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027d, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028d, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fd, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030d, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029d, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002ad, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031d, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032d, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002bd, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072017 }, +{ 0x00000001, 0x20bc22e8, 0x0000033d, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472017 }, +{ 0x00000001, 0x208022e8, 0x0000024e, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025e, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002ce, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002de, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026e, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027e, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002ee, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fe, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028e, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029e, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030e, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031e, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002ae, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002be, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032e, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033e, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00000001, 0x22203a28, 0x00000100, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000104, 0x00000000 }, +{ 0x00000001, 0x41a62288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x41ae2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000108, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x204022e8, 0x00000247, 0x00000000 }, +{ 0x00000001, 0x206022e8, 0x000002c7, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000010c, 0x00000000 }, +{ 0x00000001, 0x41b62288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000110, 0x00000000 }, +{ 0x00000001, 0x41be2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000114, 0x00000000 }, +{ 0x00000001, 0x41c62288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000118, 0x00000000 }, +{ 0x00000001, 0x41ce2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000011c, 0x00000000 }, +{ 0x00000001, 0x41d62288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000120, 0x00000000 }, +{ 0x00000001, 0x41de2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000124, 0x00000000 }, +{ 0x00000001, 0x41e62288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000128, 0x00000000 }, +{ 0x00000001, 0x41ee2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000012c, 0x00000000 }, +{ 0x00000001, 0x41f62288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000130, 0x00000000 }, +{ 0x00000001, 0x41fe2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x204422e8, 0x00000257, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000134, 0x00000000 }, +{ 0x00000001, 0x42062288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x204822e8, 0x00000267, 0x00000000 }, +{ 0x00000001, 0x204c22e8, 0x00000277, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x00000138, 0x00000000 }, +{ 0x00000001, 0x420e2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x205022e8, 0x00000287, 0x00000000 }, +{ 0x00000001, 0x205422e8, 0x00000297, 0x00000000 }, +{ 0x00000001, 0x22203a28, 0x0000013c, 0x00000000 }, +{ 0x00000001, 0x42162288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000220, 0x00000000 }, +{ 0x00000001, 0x205822e8, 0x000002a7, 0x00000000 }, +{ 0x00000001, 0x206422e8, 0x000002d7, 0x00000000 }, +{ 0x00000001, 0x205c22e8, 0x000002b7, 0x00000000 }, +{ 0x00000001, 0x421e2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x206822e8, 0x000002e7, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c8, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x00000248, 0x00000000 }, +{ 0x00000001, 0x206c22e8, 0x000002f7, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d8, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x00000258, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000268, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e8, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f8, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000278, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000288, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000308, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000318, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000298, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a8, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000328, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x00000338, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b8, 0x00000000 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00000001, 0x208022e8, 0x00000249, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002c9, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872003 }, +{ 0x00000001, 0x208422e8, 0x00000259, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x00000269, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x00000279, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x00000289, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x00000299, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002a9, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002b9, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002d9, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00000001, 0x208022e8, 0x0000024a, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002e9, 0x00000000 }, +{ 0x00000001, 0x207022e8, 0x00000307, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002f9, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025a, 0x00000000 }, +{ 0x00000001, 0x207422e8, 0x00000317, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026a, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x00000309, 0x00000000 }, +{ 0x00000001, 0x207822e8, 0x00000327, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x00000319, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027a, 0x00000000 }, +{ 0x00000001, 0x207c22e8, 0x00000337, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028a, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x00000329, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72003 }, +{ 0x00000001, 0x20bc22e8, 0x00000339, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029a, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002aa, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00000001, 0x20a022e8, 0x000002ca, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002ba, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002da, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x20a822e8, 0x000002ea, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x0000024b, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fa, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025b, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026b, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030a, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031a, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027b, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028b, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032a, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033a, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029b, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002ab, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x00000001, 0x20a022e8, 0x000002cb, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002bb, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x00000001, 0x20a422e8, 0x000002db, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002eb, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x0000024c, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025c, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fb, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030b, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026c, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027c, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031b, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032b, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028c, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029c, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033b, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x00000001, 0x209822e8, 0x000002ac, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002bc, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002cc, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002dc, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00000001, 0x208022e8, 0x0000024d, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002ec, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fc, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025d, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026d, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030c, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031c, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027d, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028d, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032c, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033c, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029d, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002ad, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x00000001, 0x209c22e8, 0x000002bd, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002cd, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00000001, 0x20a422e8, 0x000002dd, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002ed, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fd, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030d, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031d, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032d, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033d, 0x00000000 }, +{ 0x00000001, 0x208022e8, 0x0000024e, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00000001, 0x20a022e8, 0x000002ce, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025e, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026e, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002de, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002ee, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027e, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028e, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002fe, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030e, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029e, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002ae, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031e, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032e, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002be, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072017 }, +{ 0x00000001, 0x20bc22e8, 0x0000033e, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472017 }, +{ 0x00000001, 0x208022e8, 0x0000024f, 0x00000000 }, +{ 0x00000001, 0x208422e8, 0x0000025f, 0x00000000 }, +{ 0x00000001, 0x20a022e8, 0x000002cf, 0x00000000 }, +{ 0x00000001, 0x20a422e8, 0x000002df, 0x00000000 }, +{ 0x00000001, 0x208822e8, 0x0000026f, 0x00000000 }, +{ 0x00000001, 0x208c22e8, 0x0000027f, 0x00000000 }, +{ 0x00000001, 0x20a822e8, 0x000002ef, 0x00000000 }, +{ 0x00000001, 0x20ac22e8, 0x000002ff, 0x00000000 }, +{ 0x00000001, 0x209022e8, 0x0000028f, 0x00000000 }, +{ 0x00000001, 0x209422e8, 0x0000029f, 0x00000000 }, +{ 0x00000001, 0x20b022e8, 0x0000030f, 0x00000000 }, +{ 0x00000001, 0x20b422e8, 0x0000031f, 0x00000000 }, +{ 0x00000001, 0x209822e8, 0x000002af, 0x00000000 }, +{ 0x00000001, 0x209c22e8, 0x000002bf, 0x00000000 }, +{ 0x00000001, 0x20b822e8, 0x0000032f, 0x00000000 }, +{ 0x00000001, 0x20bc22e8, 0x0000033f, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00000001, 0x20403a28, 0x00000100, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x20403a28, 0x00000104, 0x00000000 }, +{ 0x00000001, 0x41a72288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x41af2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x20403a28, 0x00000108, 0x00000000 }, +{ 0x00600001, 0x21803ae8, 0x008d0000, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000344, 0x0a0a8000 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x20403a28, 0x0000010c, 0x00000000 }, +{ 0x00000001, 0x41b72288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x20403a28, 0x00000110, 0x00000000 }, +{ 0x00000001, 0x41bf2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x20403a28, 0x00000114, 0x00000000 }, +{ 0x00000001, 0x41c72288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x20403a28, 0x00000118, 0x00000000 }, +{ 0x00000001, 0x41cf2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x20403a28, 0x0000011c, 0x00000000 }, +{ 0x00000001, 0x41d72288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x20403a28, 0x00000120, 0x00000000 }, +{ 0x00000001, 0x41df2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x20403a28, 0x00000124, 0x00000000 }, +{ 0x00000001, 0x41e72288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x20403a28, 0x00000128, 0x00000000 }, +{ 0x00000001, 0x41ef2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x20403a28, 0x0000012c, 0x00000000 }, +{ 0x00000001, 0x41f72288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x20403a28, 0x00000130, 0x00000000 }, +{ 0x00000001, 0x41ff2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x20403a28, 0x00000134, 0x00000000 }, +{ 0x00000001, 0x42072288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x20403a28, 0x00000138, 0x00000000 }, +{ 0x00000001, 0x420f2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x20403a28, 0x0000013c, 0x00000000 }, +{ 0x00000001, 0x42172288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x21880608, 0x00000000, 0x000f0007 }, +{ 0x00000001, 0x60240a88, 0x00000040, 0x00000000 }, +{ 0x00000001, 0x21803ae8, 0x0000002c, 0x00000000 }, +{ 0x00000001, 0x421f2288, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x21843ae8, 0x00000028, 0x00000000 }, +{ 0x0c600031, 0x20003a00, 0x00000180, 0x00000200 }, +{ 0x00000040, 0x202c0208, 0x1600002c, 0x00080008 }, +{ 0x00000040, 0x20240208, 0x1e000020, 0xfffcfffc }, +{ 0x05000010, 0x20000200, 0x0200002c, 0x00000024 }, +{ 0x00010020, 0x34000004, 0x0e001400, 0xffff9830 }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000340, 0x02290000 }, +{ 0x00000001, 0x20480608, 0x00000000, 0x000f0003 }, +{ 0x00000001, 0x20401608, 0x00000000, 0x00000000 }, +{ 0x00000001, 0x20443ae8, 0x00000028, 0x00000000 }, +{ 0x0c600031, 0x20403a08, 0x00000040, 0x00000200 }, +{ 0x00800001, 0x20603ae8, 0x008d0040, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000344, 0x060a8000 }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000001, 0x20480608, 0x00000000, 0x000f0003 }, +{ 0x00000001, 0x20401608, 0x00000000, 0x00000000 }, +{ 0x00000001, 0x20443ae8, 0x00000028, 0x00000000 }, +{ 0x0c600031, 0x20003a00, 0x00000040, 0x00000200 }, +{ 0x00000040, 0x20240208, 0x1e000020, 0xfffcfffc }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000340, 0x02290000 }, +{ 0x00000001, 0x20480608, 0x00000000, 0x000f0003 }, +{ 0x00000001, 0x20403ae8, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x20443ae8, 0x00000028, 0x00000000 }, +{ 0x0c600031, 0x20403a08, 0x00000040, 0x00000200 }, +{ 0x00800001, 0x20603ae8, 0x008d0040, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000344, 0x060a8000 }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000001, 0x20480608, 0x00000000, 0x000f0003 }, +{ 0x00000001, 0x20403ae8, 0x00000024, 0x00000000 }, +{ 0x00000001, 0x20443ae8, 0x00000028, 0x00000000 }, +{ 0x0c600031, 0x20003a00, 0x00000040, 0x00000200 }, +{ 0x00600001, 0x2e003ae8, 0x008d0000, 0x00000000 }, +{ 0x07000031, 0x20003a00, 0x06000e00, 0x82000010 }, diff --git a/src/shaders/post_processing/gen8/sharpening_unmask.g8b b/src/shaders/post_processing/gen8/sharpening_unmask.g8b new file mode 100644 index 0000000..f27a2d5 --- /dev/null +++ b/src/shaders/post_processing/gen8/sharpening_unmask.g8b @@ -0,0 +1,159 @@ +{ 0x00000001, 0x21281608, 0x00000000, 0x00000000 }, +{ 0x00000001, 0x202c1608, 0x00000000, 0x00000000 }, +{ 0x00000001, 0x21481608, 0x00000000, 0x00050005 }, +{ 0x00000001, 0x21681608, 0x00000000, 0x00040004 }, +{ 0x00000001, 0x21881608, 0x00000000, 0x00020002 }, +{ 0x00000001, 0x21081608, 0x00000000, 0x00010001 }, +{ 0x06000010, 0x20000202, 0x16000020, 0x00000000 }, +{ 0x00010020, 0x34000006, 0x0e001400, 0x00000530 }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000128, 0x02190000 }, +{ 0x06000010, 0x20000201, 0x16000030, 0x00000000 }, +{ 0x00000001, 0x20480608, 0x00000000, 0x00030003 }, +{ 0x00000001, 0x20403ae8, 0x0000002c, 0x00000000 }, +{ 0x00000001, 0x20443ae8, 0x00000028, 0x00000000 }, +{ 0x0c600031, 0x20603a08, 0x00000040, 0x00000200 }, +{ 0x00000040, 0x22000200, 0x06000188, 0x02190000 }, +{ 0x00400001, 0x21a03ae8, 0x00690060, 0x00000000 }, +{ 0x0c600031, 0x20403a08, 0x00000040, 0x00000200 }, +{ 0x00400001, 0x21c03ae8, 0x00690040, 0x00000000 }, +{ 0x00010020, 0x34000005, 0x0e001400, 0x00000150 }, +{ 0x00000001, 0x21821e68, 0x00000000, 0x00800080 }, +{ 0x00400001, 0x21703ae8, 0x006901a0, 0x00000000 }, +{ 0x00400001, 0x21503ae8, 0x006901c0, 0x00000000 }, +{ 0x0080802c, 0x21600008, 0x0e490000, 0x00000460 }, +{ 0x00000001, 0x210c1e68, 0x00000000, 0x00800080 }, +{ 0x00400001, 0x21703ae8, 0x006901a0, 0x00000000 }, +{ 0x00400001, 0x21503ae8, 0x00690150, 0x00000000 }, +{ 0x0080802c, 0x21200008, 0x0e490000, 0x000004e0 }, +{ 0x00400001, 0x21703ae8, 0x00690150, 0x00000000 }, +{ 0x00000001, 0x21801e68, 0x00000000, 0x00800080 }, +{ 0x00400001, 0x21503ae8, 0x00690150, 0x00000000 }, +{ 0x0080802c, 0x21000008, 0x0e490000, 0x00000550 }, +{ 0x00000001, 0x21801e68, 0x00000000, 0x00800080 }, +{ 0x0080802c, 0x21000008, 0x0e490000, 0x00000530 }, +{ 0x00000001, 0x41800268, 0x00000030, 0x00000000 }, +{ 0x00400001, 0x21703ae8, 0x00690150, 0x00000000 }, +{ 0x00000001, 0x20241a68, 0x00000180, 0x00000000 }, +{ 0x00400001, 0x21503ae8, 0x006901a0, 0x00000000 }, +{ 0x0080802c, 0x21000008, 0x0e490000, 0x000004e0 }, +{ 0x00400001, 0x21903ae8, 0x00690150, 0x00000000 }, +{ 0x00000020, 0x34000004, 0x0e001400, 0x00000010 }, +{ 0x00400001, 0x21903ae8, 0x006901a0, 0x00000000 }, +{ 0x01000010, 0x20000200, 0x16000034, 0x00000000 }, +{ 0x00010020, 0x34000004, 0x0e001400, 0x00000160 }, +{ 0x00000001, 0x21121e68, 0x00000000, 0x00800080 }, +{ 0x00400001, 0x21703ae8, 0x006901a0, 0x00000000 }, +{ 0x00400001, 0x21503ae8, 0x006901c0, 0x00000000 }, +{ 0x0080802c, 0x21800008, 0x0e490000, 0x00000560 }, +{ 0x00000001, 0x210c1e68, 0x00000000, 0x00800080 }, +{ 0x00400001, 0x21703ae8, 0x006901a0, 0x00000000 }, +{ 0x00400001, 0x21503ae8, 0x00690150, 0x00000000 }, +{ 0x0080802c, 0x21200008, 0x0e490000, 0x00000360 }, +{ 0x00600040, 0x41502288, 0x1eae4150, 0x00ff00ff }, +{ 0x00000001, 0x21101e68, 0x00000000, 0x00800080 }, +{ 0x00600040, 0x41512288, 0x1eae4151, 0x00ff00ff }, +{ 0x00400001, 0x21703ae8, 0x00690150, 0x00000000 }, +{ 0x0080802c, 0x21400008, 0x0e490000, 0x00000590 }, +{ 0x00000001, 0x21101e68, 0x00000000, 0x00800080 }, +{ 0x0080802c, 0x21400008, 0x0e490000, 0x00000570 }, +{ 0x00000001, 0x41100268, 0x00000034, 0x00000000 }, +{ 0x00400001, 0x21703ae8, 0x00690150, 0x00000000 }, +{ 0x00000001, 0x20241a68, 0x00000110, 0x00000000 }, +{ 0x00400001, 0x21503ae8, 0x00690190, 0x00000000 }, +{ 0x0080802c, 0x21400008, 0x0e490000, 0x00000520 }, +{ 0x00400001, 0x21e03ae8, 0x00690150, 0x00000000 }, +{ 0x00000020, 0x34000004, 0x0e001400, 0x00000010 }, +{ 0x00400001, 0x21e03ae8, 0x00690190, 0x00000000 }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000168, 0x040a8000 }, +{ 0x00400001, 0x20603ae8, 0x006901e0, 0x00000000 }, +{ 0x00000001, 0x20480608, 0x00000000, 0x00030003 }, +{ 0x00000001, 0x20403ae8, 0x0000002c, 0x00000000 }, +{ 0x00000001, 0x20443ae8, 0x00000028, 0x00000000 }, +{ 0x0c600031, 0x20003a00, 0x00000040, 0x00000200 }, +{ 0x0000000c, 0x20240208, 0x16000028, 0x00010001 }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000108, 0x02190000 }, +{ 0x00000001, 0x20480608, 0x00000000, 0x00010003 }, +{ 0x00000001, 0x20403ae8, 0x0000002c, 0x00000000 }, +{ 0x00000001, 0x20443ae8, 0x00000024, 0x00000000 }, +{ 0x0c600031, 0x20403a08, 0x00000040, 0x00000200 }, +{ 0x00200001, 0x20603ae8, 0x00450040, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000148, 0x040a8000 }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000001, 0x20480608, 0x00000000, 0x00010003 }, +{ 0x00000001, 0x20403ae8, 0x0000002c, 0x00000000 }, +{ 0x00000001, 0x20443ae8, 0x00000024, 0x00000000 }, +{ 0x0c600031, 0x20003a00, 0x00000040, 0x00000200 }, +{ 0x00000040, 0x202c0208, 0x1600002c, 0x00040004 }, +{ 0x05000010, 0x20000203, 0x0200002c, 0x00000020 }, +{ 0x00010020, 0x34000007, 0x0e001400, 0xfffffad0 }, +{ 0x00600001, 0x2e003ae8, 0x008d0000, 0x00000000 }, +{ 0x07000031, 0x20003a00, 0x06000e00, 0x82000010 }, +{ 0x00000040, 0x20241a28, 0x1e004182, 0x00800080 }, +{ 0x00800041, 0x20802228, 0x1ab10170, 0x00000182 }, +{ 0x05600010, 0x20002260, 0x22ae0170, 0x00ae0150 }, +{ 0x00600041, 0x20600a28, 0x22000024, 0x008d0158 }, +{ 0x00600041, 0x20400a28, 0x22000024, 0x008d0150 }, +{ 0x00800040, 0x20400a28, 0x0a8d0080, 0x008d0040 }, +{ 0x0080000c, 0x40400a68, 0x1e8d0040, 0x00070007 }, +{ 0x05601010, 0x20002260, 0x22ae0171, 0x00ae0151 }, +{ 0x00800001, 0x20401a68, 0x00ae0040, 0x00000000 }, +{ 0x00610001, 0x41501a88, 0x00ae0040, 0x00000000 }, +{ 0x00611001, 0x41511a88, 0x00ae0042, 0x00000000 }, +{ 0x0080002d, 0x20000220, 0x00450160, 0x00000000 }, +{ 0x00800040, 0x20402268, 0x22b10150, 0x00b14170 }, +{ 0x00000040, 0x20241a28, 0x1e00410c, 0x00800080 }, +{ 0x05800010, 0x20001a62, 0x1eb10040, 0x00000000 }, +{ 0x00810001, 0x20401a6a, 0x00b14040, 0x00000000 }, +{ 0x00600041, 0x20800a28, 0x22000024, 0x008d0158 }, +{ 0x00600041, 0x20600a28, 0x22000024, 0x008d0150 }, +{ 0x00800041, 0x20a01a28, 0x1a8d0040, 0x0000010c }, +{ 0x00800040, 0x20400a28, 0x0a8d00a0, 0x008d0060 }, +{ 0x0080000c, 0x60400a88, 0x1e8d0040, 0x00070007 }, +{ 0x00800001, 0x21502288, 0x00cf0040, 0x00000000 }, +{ 0x0080002d, 0x20000220, 0x00450120, 0x00000000 }, +{ 0x00800040, 0x20802228, 0x1eb14170, 0x00ff00ff }, +{ 0x00800040, 0x20402228, 0x1eb14150, 0x00ff00ff }, +{ 0x00000040, 0x20241a28, 0x1e004180, 0x00800080 }, +{ 0x00600041, 0x20c00a28, 0x0a8d0040, 0x008d0080 }, +{ 0x00600041, 0x20e00a28, 0x0a8d0060, 0x008d00a0 }, +{ 0x00000001, 0x20401e28, 0x00000000, 0x00ff00ff }, +{ 0x0c600038, 0x20800a28, 0x0a8d00e0, 0x00000040 }, +{ 0x0c600038, 0x20600a28, 0x0a8d00c0, 0x00000040 }, +{ 0x00800040, 0x40400a68, 0x1e8d4060, 0x00ff00ff }, +{ 0x00800001, 0x20401a68, 0x00ae0040, 0x00000000 }, +{ 0x00600041, 0x20800a28, 0x22000024, 0x008d0158 }, +{ 0x00800041, 0x20a01a28, 0x1a8d0040, 0x00000180 }, +{ 0x00600041, 0x20600a28, 0x22000024, 0x008d0150 }, +{ 0x00800040, 0x20400a28, 0x0a8d00a0, 0x008d0060 }, +{ 0x0080000c, 0x60400a88, 0x1e8d0040, 0x00070007 }, +{ 0x00800001, 0x21502288, 0x00cf0040, 0x00000000 }, +{ 0x0080002d, 0x20000220, 0x00450100, 0x00000000 }, +{ 0x00000040, 0x20241a28, 0x1e004112, 0x00800080 }, +{ 0x00800041, 0x20802228, 0x1ab10170, 0x00000112 }, +{ 0x03600010, 0x20002261, 0x22ae0170, 0x00ae0150 }, +{ 0x00600041, 0x20600a28, 0x22000024, 0x008d0158 }, +{ 0x00600041, 0x20400a28, 0x22000024, 0x008d0150 }, +{ 0x00800040, 0x20400a28, 0x0a8d0080, 0x008d0040 }, +{ 0x0080000c, 0x40400a68, 0x1e8d0040, 0x00070007 }, +{ 0x03601010, 0x20002261, 0x22ae0171, 0x00ae0151 }, +{ 0x00800001, 0x20401a68, 0x00ae0040, 0x00000000 }, +{ 0x00610001, 0x41501a89, 0x00ae0040, 0x00000000 }, +{ 0x00611001, 0x41511a89, 0x00ae0042, 0x00000000 }, +{ 0x0080002d, 0x20000220, 0x00450180, 0x00000000 }, +{ 0x00000001, 0x20801e28, 0x00000000, 0x00ff00ff }, +{ 0x00800041, 0x20402228, 0x22b10170, 0x00b10150 }, +{ 0x00000040, 0x20241a28, 0x1e004110, 0x00800080 }, +{ 0x0c600038, 0x20c00a28, 0x0a8d0060, 0x00000080 }, +{ 0x0c600038, 0x20a00a28, 0x0a8d0040, 0x00000080 }, +{ 0x00800001, 0x40400a68, 0x008d00a0, 0x00000000 }, +{ 0x00800001, 0x20401a68, 0x00ae0040, 0x00000000 }, +{ 0x00600041, 0x20800a28, 0x22000024, 0x008d0158 }, +{ 0x00800041, 0x20a01a28, 0x1a8d0040, 0x00000110 }, +{ 0x00600041, 0x20600a28, 0x22000024, 0x008d0150 }, +{ 0x00800040, 0x20400a28, 0x0a8d00a0, 0x008d0060 }, +{ 0x0080000c, 0x60400a88, 0x1e8d0040, 0x00070007 }, +{ 0x00800001, 0x21502288, 0x00cf0040, 0x00000000 }, +{ 0x0080002d, 0x20000220, 0x00450140, 0x00000000 }, diff --git a/src/shaders/post_processing/gen8/sharpening_v_blur.g8b b/src/shaders/post_processing/gen8/sharpening_v_blur.g8b new file mode 100644 index 0000000..a57f43d --- /dev/null +++ b/src/shaders/post_processing/gen8/sharpening_v_blur.g8b @@ -0,0 +1,296 @@ +{ 0x00000001, 0x23601608, 0x00000000, 0x00000000 }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000001, 0x23641608, 0x00000000, 0x00020002 }, +{ 0x00000001, 0x20480608, 0x00000000, 0x0007000f }, +{ 0x00000001, 0x20403ae8, 0x0000002c, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000360, 0x02490000 }, +{ 0x00000001, 0x20441608, 0x00000000, 0x00000000 }, +{ 0x00000001, 0x20281608, 0x00000000, 0x00040004 }, +{ 0x0c600031, 0x22e03a08, 0x00000040, 0x00000200 }, +{ 0x00000040, 0x20200208, 0x1e000024, 0xfffcfffc }, +{ 0x06000010, 0x20000201, 0x16000020, 0x00040004 }, +{ 0x00010020, 0x34000005, 0x0e001400, 0x00000ff0 }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000360, 0x02490000 }, +{ 0x00800001, 0x22a03ae8, 0x008d0320, 0x00000000 }, +{ 0x00800001, 0x22603ae8, 0x008d02e0, 0x00000000 }, +{ 0x00000001, 0x21403ee8, 0x00000000, 0x3e525448 }, +{ 0x00000001, 0x21603ee8, 0x00000000, 0x3875735f }, +{ 0x00000001, 0x20480608, 0x00000000, 0x0007000f }, +{ 0x00000001, 0x20403ae8, 0x0000002c, 0x00000000 }, +{ 0x00000040, 0x20440208, 0x16000028, 0x00040004 }, +{ 0x00800001, 0x208022e8, 0x00b10270, 0x00000000 }, +{ 0x0c600031, 0x22e03a08, 0x00000040, 0x00000200 }, +{ 0x00000001, 0x20203ee8, 0x00000000, 0x332bcc77 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00800001, 0x208022e8, 0x00b10280, 0x00000000 }, +{ 0x00800001, 0x204022e8, 0x00b10260, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72002 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872002 }, +{ 0x00000001, 0x20303ee8, 0x00000000, 0x3c1d98ad }, +{ 0x00800001, 0x204022e8, 0x00b10270, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x208022e8, 0x00b10290, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00000001, 0x21503ee8, 0x00000000, 0x3f11e168 }, +{ 0x00800001, 0x208022e8, 0x00b102a0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00800001, 0x208022e8, 0x00b102b0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00800001, 0x208022e8, 0x00b102c0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x208022e8, 0x00b102d0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x00800001, 0x208022e8, 0x00b102e0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472002 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072002 }, +{ 0x00800001, 0x208022e8, 0x00b10280, 0x00000000 }, +{ 0x00800001, 0x21803a28, 0x008d0100, 0x00000000 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00800001, 0x61800a88, 0x008d0180, 0x00000000 }, +{ 0x00800001, 0x21a02288, 0x00cf0180, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872002 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72002 }, +{ 0x00800001, 0x208022e8, 0x00b10290, 0x00000000 }, +{ 0x00800001, 0x204022e8, 0x00b10280, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x208022e8, 0x00b102a0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00800001, 0x208022e8, 0x00b102b0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00800001, 0x208022e8, 0x00b102c0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00800001, 0x208022e8, 0x00b102d0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x208022e8, 0x00b102e0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x00800001, 0x208022e8, 0x00b102f0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472002 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072002 }, +{ 0x00800001, 0x208022e8, 0x00b10290, 0x00000000 }, +{ 0x00800001, 0x22203a28, 0x008d0100, 0x00000000 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00800001, 0x62200a88, 0x008d0220, 0x00000000 }, +{ 0x00800001, 0x208022e8, 0x00b102a0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72002 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872002 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x208022e8, 0x00b102b0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00800001, 0x208022e8, 0x00b102c0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00800001, 0x208022e8, 0x00b102d0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00800001, 0x208022e8, 0x00b102e0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x208022e8, 0x00b102f0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x00800001, 0x208022e8, 0x00b10300, 0x00000000 }, +{ 0x00800001, 0x21b02288, 0x00cf0220, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472002 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072002 }, +{ 0x00800001, 0x22203a28, 0x008d0100, 0x00000000 }, +{ 0x00800001, 0x62200a88, 0x008d0220, 0x00000000 }, +{ 0x00800001, 0x208022e8, 0x00b102a0, 0x00000000 }, +{ 0x00800001, 0x204022e8, 0x00b10290, 0x00000000 }, +{ 0x00800001, 0x21c02288, 0x00cf0220, 0x00000000 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00800001, 0x208022e8, 0x00b102b0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72002 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872002 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x204022e8, 0x00b102a0, 0x00000000 }, +{ 0x00800001, 0x208022e8, 0x00b102c0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00800001, 0x208022e8, 0x00b102d0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00800001, 0x208022e8, 0x00b102e0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00800001, 0x208022e8, 0x00b102f0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x208022e8, 0x00b10300, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x00800001, 0x208022e8, 0x00b10310, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472002 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072002 }, +{ 0x00800001, 0x208022e8, 0x00b102b0, 0x00000000 }, +{ 0x00800001, 0x22203a28, 0x008d0100, 0x00000000 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00800001, 0x62200a88, 0x008d0220, 0x00000000 }, +{ 0x00800001, 0x208022e8, 0x00b102c0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72002 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872002 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x208022e8, 0x00b102d0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00800001, 0x208022e8, 0x00b102e0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00800001, 0x208022e8, 0x00b102f0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00800001, 0x208022e8, 0x00b10300, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x21d02288, 0x00cf0220, 0x00000000 }, +{ 0x00800001, 0x208022e8, 0x00b10310, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x00800001, 0x204022e8, 0x00b102b0, 0x00000000 }, +{ 0x00800001, 0x208022e8, 0x00b10320, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472002 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072002 }, +{ 0x00800001, 0x208022e8, 0x00b102c0, 0x00000000 }, +{ 0x00800001, 0x22203a28, 0x008d0100, 0x00000000 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00800001, 0x62200a88, 0x008d0220, 0x00000000 }, +{ 0x00800001, 0x208022e8, 0x00b102d0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72002 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872002 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x204022e8, 0x00b102c0, 0x00000000 }, +{ 0x00800001, 0x208022e8, 0x00b102e0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00800001, 0x208022e8, 0x00b102f0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00800001, 0x208022e8, 0x00b10300, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00800001, 0x208022e8, 0x00b10310, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x208022e8, 0x00b10320, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x00800001, 0x208022e8, 0x00b10330, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472002 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072002 }, +{ 0x00800001, 0x21e02288, 0x00cf0220, 0x00000000 }, +{ 0x00800001, 0x208022e8, 0x00b102d0, 0x00000000 }, +{ 0x00800001, 0x22203a28, 0x008d0100, 0x00000000 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00800001, 0x62200a88, 0x008d0220, 0x00000000 }, +{ 0x00800001, 0x208022e8, 0x00b102e0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72002 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872002 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x208022e8, 0x00b102f0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00800001, 0x208022e8, 0x00b10300, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00800001, 0x21f02288, 0x00cf0220, 0x00000000 }, +{ 0x00800001, 0x208022e8, 0x00b10310, 0x00000000 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x00800001, 0x204022e8, 0x00b102d0, 0x00000000 }, +{ 0x00600001, 0x21803ae8, 0x008d0000, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000364, 0x0a0a8000 }, +{ 0x00800001, 0x208022e8, 0x00b10320, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x208022e8, 0x00b10330, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x00800001, 0x208022e8, 0x00b10340, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472002 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072002 }, +{ 0x00800001, 0x208022e8, 0x00b102e0, 0x00000000 }, +{ 0x00800001, 0x22203a28, 0x008d0100, 0x00000000 }, +{ 0x00800041, 0x20c03ae8, 0x3e8d0080, 0x3875735f }, +{ 0x00800001, 0x62200a88, 0x008d0220, 0x00000000 }, +{ 0x00800001, 0x208022e8, 0x00b102f0, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392071c8, 0x00c72002 }, +{ 0x0060015b, 0x081e0000, 0x392061c8, 0x00872002 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x208022e8, 0x00b10300, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00800001, 0x208022e8, 0x00b10310, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472015 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072015 }, +{ 0x00800001, 0x208022e8, 0x00b10320, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472014 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072014 }, +{ 0x00800001, 0x208022e8, 0x00b10330, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472003 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072003 }, +{ 0x00800001, 0x208022e8, 0x00b10340, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472016 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072016 }, +{ 0x00800001, 0x208022e8, 0x00b10350, 0x00000000 }, +{ 0x0060015b, 0x091e0000, 0x392091c8, 0x01472002 }, +{ 0x0060015b, 0x081e0000, 0x392081c8, 0x01072002 }, +{ 0x00000001, 0x21880608, 0x00000000, 0x0007000f }, +{ 0x00800001, 0x20403a28, 0x008d0100, 0x00000000 }, +{ 0x00000001, 0x21803ae8, 0x0000002c, 0x00000000 }, +{ 0x00800001, 0x22002288, 0x00cf0220, 0x00000000 }, +{ 0x00800001, 0x60400a88, 0x008d0040, 0x00000000 }, +{ 0x00800001, 0x22102288, 0x00cf0040, 0x00000000 }, +{ 0x00000001, 0x21843ae8, 0x00000028, 0x00000000 }, +{ 0x00000040, 0x20280208, 0x16000028, 0x00080008 }, +{ 0x0c600031, 0x20003a00, 0x00000180, 0x00000200 }, +{ 0x00000040, 0x20200208, 0x1e000024, 0xfffcfffc }, +{ 0x05000010, 0x20000200, 0x02000028, 0x00000020 }, +{ 0x00010020, 0x34000004, 0x0e001400, 0xfffff010 }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000360, 0x02290000 }, +{ 0x00000001, 0x20480608, 0x00000000, 0x0003000f }, +{ 0x00000001, 0x20403ae8, 0x0000002c, 0x00000000 }, +{ 0x00000001, 0x20441608, 0x00000000, 0x00000000 }, +{ 0x0c600031, 0x22603a08, 0x00000040, 0x00000200 }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000364, 0x060a8000 }, +{ 0x00800001, 0x20603ae8, 0x008d0260, 0x00000000 }, +{ 0x00000001, 0x20480608, 0x00000000, 0x0003000f }, +{ 0x00000001, 0x20403ae8, 0x0000002c, 0x00000000 }, +{ 0x00000001, 0x20441608, 0x00000000, 0x00000000 }, +{ 0x00000040, 0x20200208, 0x1e000024, 0xfffcfffc }, +{ 0x0c600031, 0x20003a00, 0x00000040, 0x00000200 }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000360, 0x02290000 }, +{ 0x00000001, 0x20480608, 0x00000000, 0x0003000f }, +{ 0x00000001, 0x20403ae8, 0x0000002c, 0x00000000 }, +{ 0x00000001, 0x20443ae8, 0x00000020, 0x00000000 }, +{ 0x0c600031, 0x22603a08, 0x00000040, 0x00000200 }, +{ 0x00600001, 0x20403ae8, 0x008d0000, 0x00000000 }, +{ 0x00000040, 0x22000200, 0x06000364, 0x060a8000 }, +{ 0x00800001, 0x20603ae8, 0x008d0260, 0x00000000 }, +{ 0x00000001, 0x20480608, 0x00000000, 0x0003000f }, +{ 0x00000001, 0x20403ae8, 0x0000002c, 0x00000000 }, +{ 0x00000001, 0x20443ae8, 0x00000020, 0x00000000 }, +{ 0x0c600031, 0x20003a00, 0x00000040, 0x00000200 }, +{ 0x00600001, 0x2e003ae8, 0x008d0000, 0x00000000 }, +{ 0x07000031, 0x20003a00, 0x06000e00, 0x82000010 }, diff --git a/src/shaders/render/Makefile.am b/src/shaders/render/Makefile.am index dac58c7..e7d5e76 100644 --- a/src/shaders/render/Makefile.am +++ b/src/shaders/render/Makefile.am @@ -1,7 +1,11 @@ INTEL_G4I = \ exa_wm.g4i \ - exa_wm_affine.g4i + exa_wm_affine.g4i \ + exa_wm_yuv_color_balance.gxa \ + exa_yuv_rgb.gxa \ + exa_yuv_gen4.g4i \ + exa_yuv_gen6.g4i INTEL_G4A = \ exa_sf.g4a \ @@ -9,6 +13,7 @@ INTEL_G4A = \ exa_wm_src_affine.g4a \ exa_wm_src_sample_argb.g4a \ exa_wm_src_sample_planar.g4a \ + exa_wm_yuv_color_balance.g4a \ exa_wm_yuv_rgb.g4a \ exa_wm_write.g4a @@ -20,6 +25,7 @@ INTEL_G4B = \ exa_wm_src_affine.g4b \ exa_wm_src_sample_argb.g4b \ exa_wm_src_sample_planar.g4b \ + exa_wm_yuv_color_balance.g4b \ exa_wm_yuv_rgb.g4b \ exa_wm_write.g4b @@ -29,14 +35,18 @@ INTEL_G4B_GEN5 = \ exa_wm_src_affine.g4b.gen5 \ exa_wm_src_sample_argb.g4b.gen5 \ exa_wm_src_sample_planar.g4b.gen5 \ + exa_wm_yuv_color_balance.g4b.gen5 \ exa_wm_yuv_rgb.g4b.gen5 \ exa_wm_write.g4b.gen5 +INTEL_G6I = $(INTEL_G4I) + INTEL_G6A = \ exa_wm_src_affine.g6a \ exa_wm_src_sample_argb.g6a \ exa_wm_src_sample_planar.g6a \ exa_wm_write.g6a \ + exa_wm_yuv_color_balance.g6a \ exa_wm_yuv_rgb.g6a INTEL_G6S = $(INTEL_G6A:%.g6a=%.g6s) @@ -46,13 +56,17 @@ INTEL_G6B = \ exa_wm_src_sample_argb.g6b \ exa_wm_src_sample_planar.g6b \ exa_wm_write.g6b \ + exa_wm_yuv_color_balance.g6b \ exa_wm_yuv_rgb.g6b +INTEL_G7I = $(INTEL_G4I) + INTEL_G7A = \ exa_wm_src_affine.g7a \ exa_wm_src_sample_argb.g7a \ exa_wm_src_sample_planar.g7a \ exa_wm_write.g7a \ + exa_wm_yuv_color_balance.g7a \ exa_wm_yuv_rgb.g7a INTEL_G7S = $(INTEL_G7A:%.g7a=%.g7s) @@ -62,13 +76,33 @@ INTEL_G7B = \ exa_wm_src_sample_argb.g7b \ exa_wm_src_sample_planar.g7b \ exa_wm_write.g7b \ + exa_wm_yuv_color_balance.g7b \ exa_wm_yuv_rgb.g7b # XXX: only regenerate binary for EU code containing JMPI instructions INTEL_G7B_HASWELL = \ exa_wm_src_sample_planar.g7b.haswell \ + exa_wm_yuv_color_balance.g7b.haswell \ $(NULL) +INTEL_G8A = \ + exa_wm_src_affine.g8a \ + exa_wm_src_sample_planar.g8a \ + exa_wm_src_sample_argb.g8a \ + exa_wm_yuv_color_balance.g8a \ + exa_wm_write.g8a \ + exa_wm_yuv_rgb.g8a + +INTEL_G8S = $(INTEL_G8A:%.g8a=%.g8s) + +INTEL_G8B = \ + exa_wm_src_affine.g8b \ + exa_wm_src_sample_planar.g8b \ + exa_wm_src_sample_argb.g8b \ + exa_wm_yuv_color_balance.g8b \ + exa_wm_yuv_rgb.g8b \ + exa_wm_write.g8b + TARGETS = if HAVE_GEN4ASM TARGETS += $(INTEL_G4B) @@ -76,11 +110,12 @@ TARGETS += $(INTEL_G4B_GEN5) TARGETS += $(INTEL_G6B) TARGETS += $(INTEL_G7B) TARGETS += $(INTEL_G7B_HASWELL) +TARGETS += $(INTEL_G8B) endif all-local: $(TARGETS) -SUFFIXES = .g4a .g4s .g4b .g6a .g6s .g6b .g7a .g7s .g7b .g7b.haswell +SUFFIXES = .g4a .g4s .g4b .g4b.gen5 .g6a .g6s .g6b .g7a .g7s .g7b .g7b.haswell .g8a .g8b .g8s if HAVE_GEN4ASM $(INTEL_G4S): $(INTEL_G4A) $(INTEL_G4I) @@ -104,12 +139,21 @@ $(INTEL_G7S): $(INTEL_G7A) $(INTEL_G7I) $(AM_V_GEN)$(GEN4ASM) -g 7 -o $@ $< .g7s.g7b.haswell: $(AM_V_GEN)$(GEN4ASM) -g 7.5 -o $@ $< + + +$(INTEL_G8S): $(INTEL_G8A) $(INTEL_G8I) +.g8a.g8s: + $(AM_V_GEN)m4 $< > $@ +.g8s.g8b: + $(AM_V_GEN)$(GEN4ASM) -g 8 -o $@ $< + endif CLEANFILES = \ $(INTEL_G4S) \ $(INTEL_G6S) \ $(INTEL_G7S) \ + $(INTEL_G8S) \ $(NULL) EXTRA_DIST = \ @@ -122,6 +166,8 @@ EXTRA_DIST = \ $(INTEL_G7A) \ $(INTEL_G7B) \ $(INTEL_G7B_HASWELL) \ + $(INTEL_G8A) \ + $(INTEL_G8B) \ $(NULL) # Extra clean files so that maintainer-clean removes *everything* diff --git a/src/shaders/render/exa_wm.g4i b/src/shaders/render/exa_wm.g4i index 8163de5..e186d3a 100644 --- a/src/shaders/render/exa_wm.g4i +++ b/src/shaders/render/exa_wm.g4i @@ -1,5 +1,5 @@ /* - * Copyright © 2006 Intel Corporation + * Copyright © 2006-2013 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -39,39 +39,43 @@ define(`screen_y0', `g1.4<0,1,0>F') define(`interleaved_uv', `g2.0<0,1,0>UW') /* Source transformation parameters */ -define(`src_du_dx', `g3.0<0,1,0>F') -define(`src_du_dy', `g3.4<0,1,0>F') -define(`src_uo', `g3.12<0,1,0>F') -define(`src_dv_dx', `g3.16<0,1,0>F') -define(`src_dv_dy', `g3.20<0,1,0>F') -define(`src_vo', `g3.28<0,1,0>F') -define(`src_dw_dx', `g4.0<0,1,0>F') -define(`src_dw_dy', `g4.4<0,1,0>F') -define(`src_wo', `g4.12<0,1,0>F') - -define(`mask_du_dx', `g5.0<0,1,0>F') -define(`mask_du_dy', `g5.4<0,1,0>F') -define(`mask_uo', `g5.12<0,1,0>F') -define(`mask_dv_dx', `g5.16<0,1,0>F') -define(`mask_dv_dy', `g5.20<0,1,0>F') -define(`mask_vo', `g5.28<0,1,0>F') -define(`mask_dw_dx', `g6.0<0,1,0>F') -define(`mask_dw_dy', `g6.4<0,1,0>F') -define(`mask_wo', `g6.12<0,1,0>F') +define(`src_du_dx', `g6.0<0,1,0>F') +define(`src_du_dy', `g6.4<0,1,0>F') +define(`src_uo', `g6.12<0,1,0>F') +define(`src_dv_dx', `g6.16<0,1,0>F') +define(`src_dv_dy', `g6.20<0,1,0>F') +define(`src_vo', `g6.28<0,1,0>F') +define(`src_dw_dx', `g7.0<0,1,0>F') +define(`src_dw_dy', `g7.4<0,1,0>F') +define(`src_wo', `g7.12<0,1,0>F') + +define(`mask_du_dx', `g8.0<0,1,0>F') +define(`mask_du_dy', `g8.4<0,1,0>F') +define(`mask_uo', `g8.12<0,1,0>F') +define(`mask_dv_dx', `g8.16<0,1,0>F') +define(`mask_dv_dy', `g8.20<0,1,0>F') +define(`mask_vo', `g8.28<0,1,0>F') +define(`mask_dw_dx', `g9.0<0,1,0>F') +define(`mask_dw_dy', `g9.4<0,1,0>F') +define(`mask_wo', `g9.12<0,1,0>F') + +/* Attribute for snb+ */ +define(`a0_a_x',`g10.0<0,1,0>F') +define(`a0_a_y',`g10.16<0,1,0>F') /* * Local variables. Pairs must be aligned on even reg boundry */ /* this holds the X dest coordinates */ -define(`dst_x', `g8') +define(`dst_x', `g42') define(`dst_x_0', `dst_x') -define(`dst_x_1', `g9') +define(`dst_x_1', `g43') /* this holds the Y dest coordinates */ -define(`dst_y', `g10') +define(`dst_y', `g44') define(`dst_y_0', `dst_y') -define(`dst_y_1', `g11') +define(`dst_y_1', `g45') /* When computing x * dn/dx, use this */ define(`temp_x', `g30') @@ -142,6 +146,25 @@ define(`mask_sample_a', `g28') define(`mask_sample_a_01', `g28') define(`mask_sample_a_23', `g29') +/* Color Balance to these registers */ +define(`color_balance_base', `g32') + +define(`color_balance_r', `g32') +define(`color_balance_r_01', `g32') +define(`color_balance_r_23', `g33') + +define(`color_balance_g', `g34') +define(`color_balance_g_01', `g34') +define(`color_balance_g_23', `g35') + +define(`color_balance_b', `g36') +define(`color_balance_b_01', `g37') +define(`color_balance_b_23', `g37') + +define(`color_balance_a', `g38') +define(`color_balance_a_01', `g39') +define(`color_balance_a_23', `g39') + /* data port SIMD16 send registers */ define(`data_port_msg_0', `m0') diff --git a/src/shaders/render/exa_wm_src_affine.g4b b/src/shaders/render/exa_wm_src_affine.g4b index d30da87..7507b72 100644 --- a/src/shaders/render/exa_wm_src_affine.g4b +++ b/src/shaders/render/exa_wm_src_affine.g4b @@ -1,8 +1,8 @@ - { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000060 }, - { 0x00802041, 0x238077bd, 0x008d0140, 0x00000064 }, + { 0x00802041, 0x23c077bd, 0x008d0540, 0x000000c0 }, + { 0x00802041, 0x238077bd, 0x008d0580, 0x000000c4 }, { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, - { 0x00802040, 0x204077be, 0x008d03c0, 0x0000006c }, - { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000070 }, - { 0x00802041, 0x238077bd, 0x008d0140, 0x00000074 }, + { 0x00802040, 0x204077be, 0x008d03c0, 0x000000cc }, + { 0x00802041, 0x23c077bd, 0x008d0540, 0x000000d0 }, + { 0x00802041, 0x238077bd, 0x008d0580, 0x000000d4 }, { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, - { 0x00802040, 0x208077be, 0x008d03c0, 0x0000007c }, + { 0x00802040, 0x208077be, 0x008d03c0, 0x000000dc }, diff --git a/src/shaders/render/exa_wm_src_affine.g4b.gen5 b/src/shaders/render/exa_wm_src_affine.g4b.gen5 index d30da87..7507b72 100644 --- a/src/shaders/render/exa_wm_src_affine.g4b.gen5 +++ b/src/shaders/render/exa_wm_src_affine.g4b.gen5 @@ -1,8 +1,8 @@ - { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000060 }, - { 0x00802041, 0x238077bd, 0x008d0140, 0x00000064 }, + { 0x00802041, 0x23c077bd, 0x008d0540, 0x000000c0 }, + { 0x00802041, 0x238077bd, 0x008d0580, 0x000000c4 }, { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, - { 0x00802040, 0x204077be, 0x008d03c0, 0x0000006c }, - { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000070 }, - { 0x00802041, 0x238077bd, 0x008d0140, 0x00000074 }, + { 0x00802040, 0x204077be, 0x008d03c0, 0x000000cc }, + { 0x00802041, 0x23c077bd, 0x008d0540, 0x000000d0 }, + { 0x00802041, 0x238077bd, 0x008d0580, 0x000000d4 }, { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, - { 0x00802040, 0x208077be, 0x008d03c0, 0x0000007c }, + { 0x00802040, 0x208077be, 0x008d03c0, 0x000000dc }, diff --git a/src/shaders/render/exa_wm_src_affine.g6a b/src/shaders/render/exa_wm_src_affine.g6a index 568aef3..04358cb 100644 --- a/src/shaders/render/exa_wm_src_affine.g6a +++ b/src/shaders/render/exa_wm_src_affine.g6a @@ -35,9 +35,6 @@ define(`vh', `m5') define(`bl', `g2.0<8,8,1>F') define(`bh', `g4.0<8,8,1>F') -define(`a0_a_x',`g7.0<0,1,0>F') -define(`a0_a_y',`g7.16<0,1,0>F') - /* U */ pln (8) ul<1>F a0_a_x bl { align1 }; /* pixel 0-7 */ pln (8) uh<1>F a0_a_x bh { align1 }; /* pixel 8-15 */ diff --git a/src/shaders/render/exa_wm_src_affine.g6b b/src/shaders/render/exa_wm_src_affine.g6b index 5d0ffcc..22c1d22 100644 --- a/src/shaders/render/exa_wm_src_affine.g6b +++ b/src/shaders/render/exa_wm_src_affine.g6b @@ -1,4 +1,4 @@ - { 0x0060005a, 0x204077be, 0x000000e0, 0x008d0040 }, - { 0x0060005a, 0x206077be, 0x000000e0, 0x008d0080 }, - { 0x0060005a, 0x208077be, 0x000000f0, 0x008d0040 }, - { 0x0060005a, 0x20a077be, 0x000000f0, 0x008d0080 }, + { 0x0060005a, 0x204077be, 0x00000140, 0x008d0040 }, + { 0x0060005a, 0x206077be, 0x00000140, 0x008d0080 }, + { 0x0060005a, 0x208077be, 0x00000150, 0x008d0040 }, + { 0x0060005a, 0x20a077be, 0x00000150, 0x008d0080 }, diff --git a/src/shaders/render/exa_wm_src_affine.g7a b/src/shaders/render/exa_wm_src_affine.g7a index a786bc0..88e5ed5 100644 --- a/src/shaders/render/exa_wm_src_affine.g7a +++ b/src/shaders/render/exa_wm_src_affine.g7a @@ -35,8 +35,6 @@ define(`vh', `g69') define(`bl', `g2.0<8,8,1>F') define(`bh', `g4.0<8,8,1>F') -define(`a0_a_x',`g7.0<0,1,0>F') -define(`a0_a_y',`g7.16<0,1,0>F') /* U */ pln (8) ul<1>F a0_a_x bl { align1 }; /* pixel 0-7 */ diff --git a/src/shaders/render/exa_wm_src_affine.g7b b/src/shaders/render/exa_wm_src_affine.g7b index 5dbbf1b..a15b7b6 100644 --- a/src/shaders/render/exa_wm_src_affine.g7b +++ b/src/shaders/render/exa_wm_src_affine.g7b @@ -1,4 +1,4 @@ - { 0x0060005a, 0x284077bd, 0x000000e0, 0x008d0040 }, - { 0x0060005a, 0x286077bd, 0x000000e0, 0x008d0080 }, - { 0x0060005a, 0x288077bd, 0x000000f0, 0x008d0040 }, - { 0x0060005a, 0x28a077bd, 0x000000f0, 0x008d0080 }, + { 0x0060005a, 0x284077bd, 0x00000140, 0x008d0040 }, + { 0x0060005a, 0x286077bd, 0x00000140, 0x008d0080 }, + { 0x0060005a, 0x288077bd, 0x00000150, 0x008d0040 }, + { 0x0060005a, 0x28a077bd, 0x00000150, 0x008d0080 }, diff --git a/src/shaders/render/exa_wm_src_affine.g8a b/src/shaders/render/exa_wm_src_affine.g8a new file mode 100644 index 0000000..7927c3b --- /dev/null +++ b/src/shaders/render/exa_wm_src_affine.g8a @@ -0,0 +1,45 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +/* + * Fragment to compute src u/v values + */ +include(`exa_wm.g4i') + +define(`ul', `g66') +define(`uh', `g67') +define(`vl', `g68') +define(`vh', `g69') + +define(`bl', `g2.0<8,8,1>F') +define(`bh', `g4.0<8,8,1>F') + + +/* U */ +pln (8) ul<1>F a0_a_x bl { align1 }; /* pixel 0-7 */ +pln (8) uh<1>F a0_a_x bh { align1 }; /* pixel 8-15 */ + +/* V */ +pln (8) vl<1>F a0_a_y bl { align1 }; /* pixel 0-7 */ +pln (8) vh<1>F a0_a_y bh { align1 }; /* pixel 8-15 */ diff --git a/src/shaders/render/exa_wm_src_affine.g8b b/src/shaders/render/exa_wm_src_affine.g8b new file mode 100644 index 0000000..f5f9eca --- /dev/null +++ b/src/shaders/render/exa_wm_src_affine.g8b @@ -0,0 +1,4 @@ + { 0x0060005a, 0x28403ae8, 0x3a000140, 0x008d0040 }, + { 0x0060005a, 0x28603ae8, 0x3a000140, 0x008d0080 }, + { 0x0060005a, 0x28803ae8, 0x3a000150, 0x008d0040 }, + { 0x0060005a, 0x28a03ae8, 0x3a000150, 0x008d0080 }, diff --git a/src/shaders/render/exa_wm_src_sample_argb.g4a b/src/shaders/render/exa_wm_src_sample_argb.g4a index 8cc693e..c657655 100644 --- a/src/shaders/render/exa_wm_src_sample_argb.g4a +++ b/src/shaders/render/exa_wm_src_sample_argb.g4a @@ -48,5 +48,5 @@ send (16) src_msg_ind /* msg reg index */ /* here(src->dst) we should use src_sampler and src_surface */ mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */ -mul (8) src_sample_a_01<1>f src_sample_a_01<1>f global_alpha { align1 }; -mul (8) src_sample_a_23<1>f src_sample_a_23<1>f global_alpha { align1 }; +mul (8) src_sample_a_01<1>f src_sample_a_01<8,8,1>f global_alpha { align1 }; +mul (8) src_sample_a_23<1>f src_sample_a_23<8,8,1>f global_alpha { align1 }; diff --git a/src/shaders/render/exa_wm_src_sample_argb.g4b b/src/shaders/render/exa_wm_src_sample_argb.g4b index 963c121..42e4a68 100644 --- a/src/shaders/render/exa_wm_src_sample_argb.g4b +++ b/src/shaders/render/exa_wm_src_sample_argb.g4b @@ -1,4 +1,4 @@ { 0x00000201, 0x20080061, 0x00000000, 0x00000000 }, { 0x01800031, 0x21c01d29, 0x008d0000, 0x02580001 }, - { 0x00600041, 0x228077bd, 0x00200280, 0x00000040 }, - { 0x00600041, 0x22a077bd, 0x002002a0, 0x00000040 }, + { 0x00600041, 0x228077bd, 0x008d0280, 0x00000040 }, + { 0x00600041, 0x22a077bd, 0x008d02a0, 0x00000040 }, diff --git a/src/shaders/render/exa_wm_src_sample_argb.g4b.gen5 b/src/shaders/render/exa_wm_src_sample_argb.g4b.gen5 index 45b3641..2012f89 100644 --- a/src/shaders/render/exa_wm_src_sample_argb.g4b.gen5 +++ b/src/shaders/render/exa_wm_src_sample_argb.g4b.gen5 @@ -1,4 +1,4 @@ { 0x00000201, 0x20080061, 0x00000000, 0x00000000 }, { 0x01800031, 0x21c01d29, 0x208d0000, 0x0a8a0001 }, - { 0x00600041, 0x228077bd, 0x00200280, 0x00000040 }, - { 0x00600041, 0x22a077bd, 0x002002a0, 0x00000040 }, + { 0x00600041, 0x228077bd, 0x008d0280, 0x00000040 }, + { 0x00600041, 0x22a077bd, 0x008d02a0, 0x00000040 }, diff --git a/src/shaders/render/exa_wm_src_sample_argb.g6a b/src/shaders/render/exa_wm_src_sample_argb.g6a index 48e79f7..c30b209 100644 --- a/src/shaders/render/exa_wm_src_sample_argb.g6a +++ b/src/shaders/render/exa_wm_src_sample_argb.g6a @@ -50,6 +50,6 @@ send (16) src_msg_ind /* msg reg index */ /* here(src->dst) we should use src_sampler and src_surface */ mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */ -mul (8) src_sample_a_01<1>f src_sample_a_01<1>f global_alpha { align1 }; +mul (8) src_sample_a_01<1>f src_sample_a_01<8,8,1>f global_alpha { align1 }; mul (8) src_sample_a_23<1>f src_sample_a_23<1>f global_alpha { align1 }; diff --git a/src/shaders/render/exa_wm_src_sample_argb.g6b b/src/shaders/render/exa_wm_src_sample_argb.g6b index 8964e45..53c6248 100644 --- a/src/shaders/render/exa_wm_src_sample_argb.g6b +++ b/src/shaders/render/exa_wm_src_sample_argb.g6b @@ -1,5 +1,5 @@ { 0x00000201, 0x20080061, 0x00000000, 0x00000000 }, { 0x00600201, 0x20200022, 0x008d0000, 0x00000000 }, { 0x02800031, 0x21c01cc9, 0x00000020, 0x0a8a0001 }, - { 0x00600041, 0x228077bd, 0x00200280, 0x000000c0 }, + { 0x00600041, 0x228077bd, 0x008d0280, 0x000000c0 }, { 0x00600041, 0x22a077bd, 0x002002a0, 0x000000c0 }, diff --git a/src/shaders/render/exa_wm_src_sample_argb.g7a b/src/shaders/render/exa_wm_src_sample_argb.g7a index 620e0e7..0165f7b 100644 --- a/src/shaders/render/exa_wm_src_sample_argb.g7a +++ b/src/shaders/render/exa_wm_src_sample_argb.g7a @@ -54,6 +54,6 @@ send (16) src_msg_ind_gen7 /* msg reg index */ /* here(src->dst) we should use src_sampler and src_surface */ mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */ -mul (8) src_sample_a_01<1>f src_sample_a_01<1>f global_alpha { align1 mask_disable }; -mul (8) src_sample_a_23<1>f src_sample_a_23<1>f global_alpha { align1 mask_disable }; +mul (8) src_sample_a_01<1>f src_sample_a_01<8,8,1>f global_alpha { align1 mask_disable }; +mul (8) src_sample_a_23<1>f src_sample_a_23<8,8,1>f global_alpha { align1 mask_disable }; diff --git a/src/shaders/render/exa_wm_src_sample_argb.g7b b/src/shaders/render/exa_wm_src_sample_argb.g7b index 674fc74..0708bc0 100644 --- a/src/shaders/render/exa_wm_src_sample_argb.g7b +++ b/src/shaders/render/exa_wm_src_sample_argb.g7b @@ -1,5 +1,5 @@ { 0x00000201, 0x20080061, 0x00000000, 0x00000000 }, { 0x00600201, 0x28200021, 0x008d0000, 0x00000000 }, { 0x02800031, 0x21c01ca9, 0x00000820, 0x0a8c0001 }, - { 0x00600241, 0x228077bd, 0x00200280, 0x000000c0 }, - { 0x00600241, 0x22a077bd, 0x002002a0, 0x000000c0 }, + { 0x00600241, 0x228077bd, 0x008d0280, 0x000000c0 }, + { 0x00600241, 0x22a077bd, 0x008d02a0, 0x000000c0 }, diff --git a/src/shaders/render/exa_wm_src_sample_argb.g8a b/src/shaders/render/exa_wm_src_sample_argb.g8a new file mode 100644 index 0000000..3a4e99f --- /dev/null +++ b/src/shaders/render/exa_wm_src_sample_argb.g8a @@ -0,0 +1,59 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Keith Packard <keithp@keithp.com> + */ + +/* Sample the src surface */ + +include(`exa_wm.g4i') + +/* Ivybridge uses GRFs in SEND instruction */ +define(`src_msg_gen8', `g65') +define(`src_msg_ind_gen8',`65') + +/* subpicture global alpha */ +define(`global_alpha', `r6.0<0,1,0>f') + +/* prepare sampler read back gX register, which would be written back to output */ + +/* use simd16 sampler, param 0 is u, param 1 is v. */ +/* 'payload' loading, assuming tex coord start from g4 */ + +/* load argb */ +mov (1) g0.8<1>UD 0x00000000UD { align1 mask_disable }; +mov (8) src_msg_gen8<1>UD g0<8,8,1>UD { align1 mask_disable }; + +/* src_msg will be copied with g0, as it contains send desc */ +/* emit sampler 'send' cmd */ +send (16) src_msg_ind_gen8 /* msg reg index */ + src_sample_base<1>UW /* readback */ + null + sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype) + /* here(src->dst) we should use src_sampler and src_surface */ + mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */ + +mul (8) src_sample_a_01<1>f src_sample_a_01<8,8,1>f global_alpha { align1 mask_disable }; +mul (8) src_sample_a_23<1>f src_sample_a_23<8,8,1>f global_alpha { align1 mask_disable }; + diff --git a/src/shaders/render/exa_wm_src_sample_argb.g8b b/src/shaders/render/exa_wm_src_sample_argb.g8b new file mode 100644 index 0000000..2b04637 --- /dev/null +++ b/src/shaders/render/exa_wm_src_sample_argb.g8b @@ -0,0 +1,5 @@ + { 0x00000001, 0x2008060c, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2820020c, 0x008d0000, 0x00000000 }, + { 0x02800031, 0x21c00a48, 0x0e000820, 0x0a8c0001 }, + { 0x00600041, 0x22803aec, 0x3a8d0280, 0x000000c0 }, + { 0x00600041, 0x22a03aec, 0x3a8d02a0, 0x000000c0 }, diff --git a/src/shaders/render/exa_wm_src_sample_planar.g8a b/src/shaders/render/exa_wm_src_sample_planar.g8a new file mode 100644 index 0000000..7684491 --- /dev/null +++ b/src/shaders/render/exa_wm_src_sample_planar.g8a @@ -0,0 +1,106 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Keith Packard <keithp@keithp.com> + * Zhao Yakui <yakui.zhao@intel.com> + */ + +/* Sample the src surface in planar format */ + +include(`exa_wm.g4i') + +/* Ivybridge uses GRFs in SEND instruction */ +define(`src_msg_gen8', `g65') +define(`src_msg_ind_gen8',`65') +/* UV flag */ +define(`uv_flag', `g6.0<0,1,0>UW') + +/* prepare sampler read back gX register, which would be written back to output */ + +/* use simd16 sampler, param 0 is u, param 1 is v. */ +/* 'payload' loading, assuming tex coord start from g4 */ +cmp.e.f0.0 (1) null uv_flag 0x1UW {align1}; +(f0.0) jmpi INTERLEAVED_UV; + +cmp.e.f0.0 (1) null uv_flag 0x2UW {align1}; +(f0.0) jmpi CONSTANT_UV; + +/* load r */ +mov (1) g0.8<1>UD 0x0000e000UD { align1 mask_disable }; +mov (8) src_msg_gen8<1>UD g0<8,8,1>UD { align1 mask_disable }; + +/* emit sampler 'send' cmd */ + +/* sample U (Cr) */ +send (16) src_msg_ind_gen8 /* msg reg index */ + src_sample_g<1>UW /* readback */ + null + sampler (3,2,F) /* sampler message description, (binding_table,sampler_index,datatype) + /* here(src->dst) we should use src_sampler and src_surface */ + mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */ + +/* sample V (Cb) */ +mov (1) g0.8<1>UD 0x0000e000UD { align1 mask_disable }; +mov (8) src_msg_gen8<1>UD g0<8,8,1>UD { align1 mask_disable }; + +send (16) src_msg_ind_gen8 /* msg reg index */ + src_sample_b<1>UW /* readback */ + null + sampler (5,4,F) /* sampler message description, (binding_table,sampler_index,datatype) + /* here(src->dst) we should use src_sampler and src_surface */ + mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */ + +jmpi SAMPLE_Y; + +CONSTANT_UV: +mov (16) src_sample_g<1>f 0.5f { compr align1 mask_disable }; +mov (16) src_sample_b<1>f 0.5f { compr align1 mask_disable }; + +jmpi SAMPLE_Y; + +INTERLEAVED_UV: +mov (1) g0.8<1>UD 0x0000c000UD { align1 mask_disable }; +mov (8) src_msg_gen8<1>UD g0<8,8,1>UD { align1 mask_disable }; + +/* sample UV (CrCb) */ +send (16) src_msg_ind_gen8 /* msg reg index */ + src_sample_g<1>UW /* readback */ + null + sampler (3,2,F) /* sampler message description, (binding_table,sampler_index,datatype) + /* here(src->dst) we should use src_sampler and src_surface */ + mlen 5 rlen 4 { align1 }; /* required message len 5, readback len 8 */ + + +SAMPLE_Y: +mov (1) g0.8<1>UD 0x0000e000UD { align1 mask_disable }; +mov (8) src_msg_gen8<1>UD g0<8,8,1>UD { align1 mask_disable }; + +/* sample Y */ +send (16) src_msg_ind_gen8 /* msg reg index */ + src_sample_r<1>UW /* readback */ + null + sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype) + /* here(src->dst) we should use src_sampler and src_surface */ + mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */ + diff --git a/src/shaders/render/exa_wm_src_sample_planar.g8b b/src/shaders/render/exa_wm_src_sample_planar.g8b new file mode 100644 index 0000000..f29cfe4 --- /dev/null +++ b/src/shaders/render/exa_wm_src_sample_planar.g8b @@ -0,0 +1,20 @@ + { 0x01000010, 0x200012e0, 0x160000c0, 0x00010001 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000c0 }, + { 0x01000010, 0x200012e0, 0x160000c0, 0x00020002 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000070 }, + { 0x00000001, 0x2008060c, 0x00000000, 0x0000e000 }, + { 0x00600001, 0x2820020c, 0x008d0000, 0x00000000 }, + { 0x02800031, 0x22000a48, 0x0e000820, 0x0a2c0203 }, + { 0x00000001, 0x2008060c, 0x00000000, 0x0000e000 }, + { 0x00600001, 0x2820020c, 0x008d0000, 0x00000000 }, + { 0x02800031, 0x22400a48, 0x0e000820, 0x0a2c0405 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000060 }, + { 0x00800001, 0x22003eec, 0x38000000, 0x3f000000 }, + { 0x00800001, 0x22403eec, 0x38000000, 0x3f000000 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000030 }, + { 0x00000001, 0x2008060c, 0x00000000, 0x0000c000 }, + { 0x00600001, 0x2820020c, 0x008d0000, 0x00000000 }, + { 0x02800031, 0x22000a48, 0x0e000820, 0x0a4c0203 }, + { 0x00000001, 0x2008060c, 0x00000000, 0x0000e000 }, + { 0x00600001, 0x2820020c, 0x008d0000, 0x00000000 }, + { 0x02800031, 0x21c00a48, 0x0e000820, 0x0a2c0001 }, diff --git a/src/shaders/render/exa_wm_write.g7a b/src/shaders/render/exa_wm_write.g7a index a2fb447..4b17929 100644 --- a/src/shaders/render/exa_wm_write.g7a +++ b/src/shaders/render/exa_wm_write.g7a @@ -45,17 +45,17 @@ define(`slot_b_01', `g71') define(`slot_a_00', `g72') define(`slot_a_01', `g73') -mov (8) slot_r_00<1>F src_sample_r_01<1>F { align1 mask_disable }; -mov (8) slot_r_01<1>F src_sample_r_23<1>F { align1 mask_disable }; +mov (8) slot_r_00<1>F src_sample_r_01<8,8,1>F { align1 mask_disable }; +mov (8) slot_r_01<1>F src_sample_r_23<8,8,1>F { align1 mask_disable }; -mov (8) slot_g_00<1>F src_sample_g_01<1>F { align1 mask_disable }; -mov (8) slot_g_01<1>F src_sample_g_23<1>F { align1 mask_disable }; +mov (8) slot_g_00<1>F src_sample_g_01<8,8,1>F { align1 mask_disable }; +mov (8) slot_g_01<1>F src_sample_g_23<8,8,1>F { align1 mask_disable }; -mov (8) slot_b_00<1>F src_sample_b_01<1>F { align1 mask_disable }; -mov (8) slot_b_01<1>F src_sample_b_23<1>F { align1 mask_disable }; +mov (8) slot_b_00<1>F src_sample_b_01<8,8,1>F { align1 mask_disable }; +mov (8) slot_b_01<1>F src_sample_b_23<8,8,1>F { align1 mask_disable }; -mov (8) slot_a_00<1>F src_sample_a_01<1>F { align1 mask_disable }; -mov (8) slot_a_01<1>F src_sample_a_23<1>F { align1 mask_disable }; +mov (8) slot_a_00<1>F src_sample_a_01<8,8,1>F { align1 mask_disable }; +mov (8) slot_a_01<1>F src_sample_a_23<8,8,1>F { align1 mask_disable }; send (16) data_port_msg_2_ind diff --git a/src/shaders/render/exa_wm_write.g7b b/src/shaders/render/exa_wm_write.g7b index 05e1801..4f347cb 100644 --- a/src/shaders/render/exa_wm_write.g7b +++ b/src/shaders/render/exa_wm_write.g7b @@ -1,13 +1,13 @@ { 0x00600201, 0x28000021, 0x008d0000, 0x00000000 }, { 0x00600201, 0x28200021, 0x008d0020, 0x00000000 }, - { 0x00600201, 0x284003bd, 0x002001c0, 0x00000000 }, - { 0x00600201, 0x286003bd, 0x002001e0, 0x00000000 }, - { 0x00600201, 0x288003bd, 0x00200200, 0x00000000 }, - { 0x00600201, 0x28a003bd, 0x00200220, 0x00000000 }, - { 0x00600201, 0x28c003bd, 0x00200240, 0x00000000 }, - { 0x00600201, 0x28e003bd, 0x00200260, 0x00000000 }, - { 0x00600201, 0x290003bd, 0x00200280, 0x00000000 }, - { 0x00600201, 0x292003bd, 0x002002a0, 0x00000000 }, + { 0x00600201, 0x284003bd, 0x008d01c0, 0x00000000 }, + { 0x00600201, 0x286003bd, 0x008d01e0, 0x00000000 }, + { 0x00600201, 0x288003bd, 0x008d0200, 0x00000000 }, + { 0x00600201, 0x28a003bd, 0x008d0220, 0x00000000 }, + { 0x00600201, 0x28c003bd, 0x008d0240, 0x00000000 }, + { 0x00600201, 0x28e003bd, 0x008d0260, 0x00000000 }, + { 0x00600201, 0x290003bd, 0x008d0280, 0x00000000 }, + { 0x00600201, 0x292003bd, 0x008d02a0, 0x00000000 }, { 0x05800031, 0x20001ca8, 0x00000800, 0x940b1000 }, { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/src/shaders/render/exa_wm_write.g8a b/src/shaders/render/exa_wm_write.g8a new file mode 100644 index 0000000..e6da9b6 --- /dev/null +++ b/src/shaders/render/exa_wm_write.g8a @@ -0,0 +1,83 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +include(`exa_wm.g4i') + +/* header */ +define(`data_port_msg_2_0', `g64') +define(`data_port_msg_2_1', `g65') +define(`data_port_msg_2_ind', `64') + +mov (8) data_port_msg_2_0<1>UD g0<8,8,1>UD {align1 mask_disable}; +mov (8) data_port_msg_2_1<1>UD g1<8,8,1>UD {align1 mask_disable}; + +/* + * Prepare data in g66-g67 for Red channel, g68-g69 for Green channel, + * g70-g71 for Blue and g72-g73 for Alpha channel + */ +define(`slot_r_00', `g66') +define(`slot_r_01', `g67') +define(`slot_g_00', `g68') +define(`slot_g_01', `g69') +define(`slot_b_00', `g70') +define(`slot_b_01', `g71') +define(`slot_a_00', `g72') +define(`slot_a_01', `g73') + +mov (8) slot_r_00<1>F src_sample_r_01<8,8,1>F { align1 mask_disable }; +mov (8) slot_r_01<1>F src_sample_r_23<8,8,1>F { align1 mask_disable }; + +mov (8) slot_g_00<1>F src_sample_g_01<8,8,1>F { align1 mask_disable }; +mov (8) slot_g_01<1>F src_sample_g_23<8,8,1>F { align1 mask_disable }; + +mov (8) slot_b_00<1>F src_sample_b_01<8,8,1>F { align1 mask_disable }; +mov (8) slot_b_01<1>F src_sample_b_23<8,8,1>F { align1 mask_disable }; + +mov (8) slot_a_00<1>F src_sample_a_01<8,8,1>F { align1 mask_disable }; +mov (8) slot_a_01<1>F src_sample_a_23<8,8,1>F { align1 mask_disable }; + +send (16) + data_port_msg_2_ind + null<1>UW + null + write ( + 0, /* binding table index */ + 16, /* last render target(1) + slots 15:0(0) + msg type simd16 single source(000) */ + 12, /* render target write */ + 0, /* ignore for Ivybridge */ + 1 /* header present */ + ) + mlen 10 + rlen 0 + { align1 EOT }; + +nop; +nop; +nop; +nop; +nop; +nop; +nop; +nop; + diff --git a/src/shaders/render/exa_wm_write.g8b b/src/shaders/render/exa_wm_write.g8b new file mode 100644 index 0000000..822578d --- /dev/null +++ b/src/shaders/render/exa_wm_write.g8b @@ -0,0 +1,19 @@ + { 0x00600001, 0x2800020c, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x2820020c, 0x008d0020, 0x00000000 }, + { 0x00600001, 0x28403aec, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x28603aec, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x28803aec, 0x008d0200, 0x00000000 }, + { 0x00600001, 0x28a03aec, 0x008d0220, 0x00000000 }, + { 0x00600001, 0x28c03aec, 0x008d0240, 0x00000000 }, + { 0x00600001, 0x28e03aec, 0x008d0260, 0x00000000 }, + { 0x00600001, 0x29003aec, 0x008d0280, 0x00000000 }, + { 0x00600001, 0x29203aec, 0x008d02a0, 0x00000000 }, + { 0x05800031, 0x20000a40, 0x0e000800, 0x940b1000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/src/shaders/render/exa_wm_xy.g4b b/src/shaders/render/exa_wm_xy.g4b index 327fc29..2b3b235 100644 --- a/src/shaders/render/exa_wm_xy.g4b +++ b/src/shaders/render/exa_wm_xy.g4b @@ -1,4 +1,4 @@ { 0x00800040, 0x23c06d29, 0x00480028, 0x10101010 }, { 0x00800040, 0x23806d29, 0x0048002a, 0x11001100 }, - { 0x00802040, 0x2100753d, 0x008d03c0, 0x00004020 }, - { 0x00802040, 0x2140753d, 0x008d0380, 0x00004024 }, + { 0x00802040, 0x2540753d, 0x008d03c0, 0x00004020 }, + { 0x00802040, 0x2580753d, 0x008d0380, 0x00004024 }, diff --git a/src/shaders/render/exa_wm_xy.g4b.gen5 b/src/shaders/render/exa_wm_xy.g4b.gen5 index 327fc29..2b3b235 100644 --- a/src/shaders/render/exa_wm_xy.g4b.gen5 +++ b/src/shaders/render/exa_wm_xy.g4b.gen5 @@ -1,4 +1,4 @@ { 0x00800040, 0x23c06d29, 0x00480028, 0x10101010 }, { 0x00800040, 0x23806d29, 0x0048002a, 0x11001100 }, - { 0x00802040, 0x2100753d, 0x008d03c0, 0x00004020 }, - { 0x00802040, 0x2140753d, 0x008d0380, 0x00004024 }, + { 0x00802040, 0x2540753d, 0x008d03c0, 0x00004020 }, + { 0x00802040, 0x2580753d, 0x008d0380, 0x00004024 }, diff --git a/src/shaders/render/exa_wm_yuv_color_balance.g4a b/src/shaders/render/exa_wm_yuv_color_balance.g4a new file mode 100644 index 0000000..33ba67a --- /dev/null +++ b/src/shaders/render/exa_wm_yuv_color_balance.g4a @@ -0,0 +1,38 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Haihao Xiang <haihao.xiang@intel.com> + * + */ + +include(`exa_wm.g4i') + +/* Color Balance parameters */ +define(`skip_color_balance', `g2.2<0,1,0>uw') +define(`contrast', `g2.16<0,1,0>f') +define(`brightness', `g2.20<0,1,0>f') +define(`cos_c_s', `g2.24<0,1,0>f') +define(`sin_c_s', `g2.28<0,1,0>f') +define(`sin_c_s_t', `g2.28') + +include(`exa_wm_yuv_color_balance.gxa') diff --git a/src/shaders/render/exa_wm_yuv_color_balance.g4b b/src/shaders/render/exa_wm_yuv_color_balance.g4b new file mode 100644 index 0000000..cba9aca --- /dev/null +++ b/src/shaders/render/exa_wm_yuv_color_balance.g4b @@ -0,0 +1,15 @@ + { 0x01000010, 0x20002d3c, 0x00000042, 0x00010001 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x0000000d }, + { 0x00802040, 0x24007fbd, 0x008d01c0, 0xbd808081 }, + { 0x00802041, 0x240077bd, 0x008d0400, 0x00000050 }, + { 0x00802040, 0x240077bd, 0x008d0400, 0x00000054 }, + { 0x00802040, 0x21c07fbd, 0x008d0400, 0x3d808081 }, + { 0x00802040, 0x24807fbd, 0x008d0200, 0xbf008084 }, + { 0x00802040, 0x24407fbd, 0x008d0240, 0xbf008084 }, + { 0x00802001, 0x240003fc, 0x00000000, 0x3f008084 }, + { 0x00802048, 0x240077bc, 0x008d0440, 0x0000005c }, + { 0x00802048, 0x220077bd, 0x008d0480, 0x00000058 }, + { 0x00000041, 0x205c7fbd, 0x0000005c, 0xbf800000 }, + { 0x00802001, 0x240003fc, 0x00000000, 0x3f008084 }, + { 0x00802048, 0x240077bc, 0x008d0480, 0x0000005c }, + { 0x00802048, 0x224077bd, 0x008d0440, 0x00000058 }, diff --git a/src/shaders/render/exa_wm_yuv_color_balance.g4b.gen5 b/src/shaders/render/exa_wm_yuv_color_balance.g4b.gen5 new file mode 100644 index 0000000..5a24a0e --- /dev/null +++ b/src/shaders/render/exa_wm_yuv_color_balance.g4b.gen5 @@ -0,0 +1,15 @@ + { 0x01000010, 0x20002d3c, 0x00000042, 0x00010001 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x0000001a }, + { 0x00802040, 0x24007fbd, 0x008d01c0, 0xbd808081 }, + { 0x00802041, 0x240077bd, 0x008d0400, 0x00000050 }, + { 0x00802040, 0x240077bd, 0x008d0400, 0x00000054 }, + { 0x00802040, 0x21c07fbd, 0x008d0400, 0x3d808081 }, + { 0x00802040, 0x24807fbd, 0x008d0200, 0xbf008084 }, + { 0x00802040, 0x24407fbd, 0x008d0240, 0xbf008084 }, + { 0x00802001, 0x240003fc, 0x00000000, 0x3f008084 }, + { 0x00802048, 0x240077bc, 0x008d0440, 0x0000005c }, + { 0x00802048, 0x220077bd, 0x008d0480, 0x00000058 }, + { 0x00000041, 0x205c7fbd, 0x0000005c, 0xbf800000 }, + { 0x00802001, 0x240003fc, 0x00000000, 0x3f008084 }, + { 0x00802048, 0x240077bc, 0x008d0480, 0x0000005c }, + { 0x00802048, 0x224077bd, 0x008d0440, 0x00000058 }, diff --git a/src/shaders/render/exa_wm_yuv_color_balance.g6a b/src/shaders/render/exa_wm_yuv_color_balance.g6a new file mode 100644 index 0000000..6906357 --- /dev/null +++ b/src/shaders/render/exa_wm_yuv_color_balance.g6a @@ -0,0 +1,38 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Haihao Xiang <haihao.xiang@intel.com> + * + */ + +include(`exa_wm.g4i') + +/* Color Balance parameters */ +define(`skip_color_balance', `g6.2<0,1,0>uw') +define(`contrast', `g6.16<0,1,0>f') +define(`brightness', `g6.20<0,1,0>f') +define(`cos_c_s', `g6.24<0,1,0>f') +define(`sin_c_s', `g6.28<0,1,0>f') +define(`sin_c_s_t', `g6.28') + +include(`exa_wm_yuv_color_balance.gxa') diff --git a/src/shaders/render/exa_wm_yuv_color_balance.g6b b/src/shaders/render/exa_wm_yuv_color_balance.g6b new file mode 100644 index 0000000..0a9e6b9 --- /dev/null +++ b/src/shaders/render/exa_wm_yuv_color_balance.g6b @@ -0,0 +1,15 @@ + { 0x01000010, 0x20002d3c, 0x000000c2, 0x00010001 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x0000001a }, + { 0x00800040, 0x24007fbd, 0x008d01c0, 0xbd808081 }, + { 0x00800041, 0x240077bd, 0x008d0400, 0x000000d0 }, + { 0x00800040, 0x240077bd, 0x008d0400, 0x000000d4 }, + { 0x00800040, 0x21c07fbd, 0x008d0400, 0x3d808081 }, + { 0x00800040, 0x24807fbd, 0x008d0200, 0xbf008084 }, + { 0x00800040, 0x24407fbd, 0x008d0240, 0xbf008084 }, + { 0x00800001, 0x240003fc, 0x00000000, 0x3f008084 }, + { 0x00800048, 0x240077bc, 0x008d0440, 0x000000dc }, + { 0x00800048, 0x220077bd, 0x008d0480, 0x000000d8 }, + { 0x00000041, 0x20dc7fbd, 0x000000dc, 0xbf800000 }, + { 0x00800001, 0x240003fc, 0x00000000, 0x3f008084 }, + { 0x00800048, 0x240077bc, 0x008d0480, 0x000000dc }, + { 0x00800048, 0x224077bd, 0x008d0440, 0x000000d8 }, diff --git a/src/shaders/render/exa_wm_yuv_color_balance.g7a b/src/shaders/render/exa_wm_yuv_color_balance.g7a new file mode 100644 index 0000000..6906357 --- /dev/null +++ b/src/shaders/render/exa_wm_yuv_color_balance.g7a @@ -0,0 +1,38 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Haihao Xiang <haihao.xiang@intel.com> + * + */ + +include(`exa_wm.g4i') + +/* Color Balance parameters */ +define(`skip_color_balance', `g6.2<0,1,0>uw') +define(`contrast', `g6.16<0,1,0>f') +define(`brightness', `g6.20<0,1,0>f') +define(`cos_c_s', `g6.24<0,1,0>f') +define(`sin_c_s', `g6.28<0,1,0>f') +define(`sin_c_s_t', `g6.28') + +include(`exa_wm_yuv_color_balance.gxa') diff --git a/src/shaders/render/exa_wm_yuv_color_balance.g7b b/src/shaders/render/exa_wm_yuv_color_balance.g7b new file mode 100644 index 0000000..0a9e6b9 --- /dev/null +++ b/src/shaders/render/exa_wm_yuv_color_balance.g7b @@ -0,0 +1,15 @@ + { 0x01000010, 0x20002d3c, 0x000000c2, 0x00010001 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x0000001a }, + { 0x00800040, 0x24007fbd, 0x008d01c0, 0xbd808081 }, + { 0x00800041, 0x240077bd, 0x008d0400, 0x000000d0 }, + { 0x00800040, 0x240077bd, 0x008d0400, 0x000000d4 }, + { 0x00800040, 0x21c07fbd, 0x008d0400, 0x3d808081 }, + { 0x00800040, 0x24807fbd, 0x008d0200, 0xbf008084 }, + { 0x00800040, 0x24407fbd, 0x008d0240, 0xbf008084 }, + { 0x00800001, 0x240003fc, 0x00000000, 0x3f008084 }, + { 0x00800048, 0x240077bc, 0x008d0440, 0x000000dc }, + { 0x00800048, 0x220077bd, 0x008d0480, 0x000000d8 }, + { 0x00000041, 0x20dc7fbd, 0x000000dc, 0xbf800000 }, + { 0x00800001, 0x240003fc, 0x00000000, 0x3f008084 }, + { 0x00800048, 0x240077bc, 0x008d0480, 0x000000dc }, + { 0x00800048, 0x224077bd, 0x008d0440, 0x000000d8 }, diff --git a/src/shaders/render/exa_wm_yuv_color_balance.g7b.haswell b/src/shaders/render/exa_wm_yuv_color_balance.g7b.haswell new file mode 100644 index 0000000..2780c08 --- /dev/null +++ b/src/shaders/render/exa_wm_yuv_color_balance.g7b.haswell @@ -0,0 +1,15 @@ + { 0x01000010, 0x20002d3c, 0x000000c2, 0x00010001 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x000000d0 }, + { 0x00800040, 0x24007fbd, 0x008d01c0, 0xbd808081 }, + { 0x00800041, 0x240077bd, 0x008d0400, 0x000000d0 }, + { 0x00800040, 0x240077bd, 0x008d0400, 0x000000d4 }, + { 0x00800040, 0x21c07fbd, 0x008d0400, 0x3d808081 }, + { 0x00800040, 0x24807fbd, 0x008d0200, 0xbf008084 }, + { 0x00800040, 0x24407fbd, 0x008d0240, 0xbf008084 }, + { 0x00800001, 0x240003fc, 0x00000000, 0x3f008084 }, + { 0x00800048, 0x240077bc, 0x008d0440, 0x000000dc }, + { 0x00800048, 0x220077bd, 0x008d0480, 0x000000d8 }, + { 0x00000041, 0x20dc7fbd, 0x000000dc, 0xbf800000 }, + { 0x00800001, 0x240003fc, 0x00000000, 0x3f008084 }, + { 0x00800048, 0x240077bc, 0x008d0480, 0x000000dc }, + { 0x00800048, 0x224077bd, 0x008d0440, 0x000000d8 }, diff --git a/src/shaders/render/exa_wm_yuv_color_balance.g8a b/src/shaders/render/exa_wm_yuv_color_balance.g8a new file mode 100644 index 0000000..f3cc28f --- /dev/null +++ b/src/shaders/render/exa_wm_yuv_color_balance.g8a @@ -0,0 +1,39 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Haihao Xiang <haihao.xiang@intel.com> + * Zhao Yakui <yakui.zhao@intel.com> + * + */ + +include(`exa_wm.g4i') + +/* Color Balance parameters */ +define(`skip_color_balance', `g6.2<0,1,0>uw') +define(`contrast', `g6.16<0,1,0>f') +define(`brightness', `g6.20<0,1,0>f') +define(`cos_c_s', `g6.24<0,1,0>f') +define(`sin_c_s', `g6.28<0,1,0>f') +define(`sin_c_s_t', `g6.28') + +include(`exa_wm_yuv_color_balance.gxa') diff --git a/src/shaders/render/exa_wm_yuv_color_balance.g8b b/src/shaders/render/exa_wm_yuv_color_balance.g8b new file mode 100644 index 0000000..5dc2c8b --- /dev/null +++ b/src/shaders/render/exa_wm_yuv_color_balance.g8b @@ -0,0 +1,15 @@ + { 0x01000010, 0x200012e0, 0x160000c2, 0x00010001 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000d0 }, + { 0x00800040, 0x24003ae8, 0x3e8d01c0, 0xbd808081 }, + { 0x00800041, 0x24003ae8, 0x3a8d0400, 0x000000d0 }, + { 0x00800040, 0x24003ae8, 0x3a8d0400, 0x000000d4 }, + { 0x00800040, 0x21c03ae8, 0x3e8d0400, 0x3d808081 }, + { 0x00800040, 0x24803ae8, 0x3e8d0200, 0xbf008084 }, + { 0x00800040, 0x24403ae8, 0x3e8d0240, 0xbf008084 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x3f008084 }, + { 0x00800048, 0x24003ae0, 0x3a8d0440, 0x000000dc }, + { 0x00800048, 0x22003ae8, 0x3a8d0480, 0x000000d8 }, + { 0x00000041, 0x20dc3ae8, 0x3e0000dc, 0xbf800000 }, + { 0x00800001, 0x24003ee0, 0x38000000, 0x3f008084 }, + { 0x00800048, 0x24003ae0, 0x3a8d0480, 0x000000dc }, + { 0x00800048, 0x22403ae8, 0x3a8d0440, 0x000000d8 }, diff --git a/src/shaders/render/exa_wm_yuv_color_balance.gxa b/src/shaders/render/exa_wm_yuv_color_balance.gxa new file mode 100644 index 0000000..948067c --- /dev/null +++ b/src/shaders/render/exa_wm_yuv_color_balance.gxa @@ -0,0 +1,75 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Haihao Xiang <haihao.xiang@intel.com> + * + */ + +define(`Cr', `src_sample_b') +define(`Cr_01', `src_sample_b_01') +define(`Cr_23', `src_sample_b_23') + +define(`Y', `src_sample_r') +define(`Y_01', `src_sample_r_01') +define(`Y_23', `src_sample_r_23') + +define(`Cb', `src_sample_g') +define(`Cb_01', `src_sample_g_01') +define(`Cb_23', `src_sample_g_23') + +define(`Crn', `color_balance_g') +define(`Crn_01', `color_balance_g_01') +define(`Crn_23', `color_balance_g_23') + +define(`Yn', `color_balance_r') +define(`Yn_01', `color_balance_r_01') +define(`Yn_23', `color_balance_r_23') + +define(`Cbn', `color_balance_b') +define(`Cbn_01', `color_balance_b_01') +define(`Cbn_23', `color_balance_b_23') + +cmp.e.f0.0 (1) null skip_color_balance 0x1uw {align1}; +(f0.0) jmpi _DONE_COLOR_BALANCE; + +/* Yout = (Yin - 16 / 255) * contrast + brightness + 16 / 255 */ +add (16) Yn<1>F Y<8,8,1>F -0.0627451F { compr align1 }; +mul (16) Yn<1>F Yn<8,8,1>F contrast { compr align1 }; +add (16) Yn<1>F Yn<8,8,1>F brightness { compr align1 }; +add (16) Y<1>F Yn<8,8,1>F 0.0627451F { compr align1 }; + +/* Uout = (Uin - 128 / 255) * cos_c_s + (Vin - 128 / 255) * sin_c_s + 128 / 255 */ +/* Vout = (Vin - 128 / 255) * cos_c_s - (Uin - 128 / 255) * sin_c_s + 128 / 255 */ +add (16) Cbn<1>F Cb<8,8,1>F -0.501961F { compr align1 }; +add (16) Crn<1>F Cr<8,8,1>F -0.501961F { compr align1 }; + +mov (16) acc0<1>F 0.501961F { compr align1 }; +mac (16) acc0<1>F Crn<8,8,1>F sin_c_s { compr align1 }; +mac (16) Cb<1>F Cbn<8,8,1>F cos_c_s { compr align1 }; + +mul (1) sin_c_s_t<1>F sin_c_s -1.0F { align1}; +mov (16) acc0<1>F 0.501961F { compr align1 }; +mac (16) acc0<1>F Cbn<8,8,1>F sin_c_s { compr align1 }; +mac (16) Cr<1>F Crn<8,8,1>F cos_c_s { compr align1 }; + +_DONE_COLOR_BALANCE: diff --git a/src/shaders/render/exa_wm_yuv_rgb.g4a b/src/shaders/render/exa_wm_yuv_rgb.g4a index b3abe4b..e3d2464 100644 --- a/src/shaders/render/exa_wm_yuv_rgb.g4a +++ b/src/shaders/render/exa_wm_yuv_rgb.g4a @@ -1,5 +1,5 @@ /* - * Copyright © 2006 Intel Corporation + * Copyright © 2006-2013 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -27,72 +27,6 @@ */ include(`exa_wm.g4i') +include(`exa_yuv_gen4.g4i') +include(`exa_yuv_rgb.gxa') -define(`YCbCr_base', `src_sample_base') - -define(`Cr', `src_sample_b') -define(`Cr_01', `src_sample_b_01') -define(`Cr_23', `src_sample_b_23') - -define(`Y', `src_sample_r') -define(`Y_01', `src_sample_r_01') -define(`Y_23', `src_sample_r_23') - -define(`Cb', `src_sample_g') -define(`Cb_01', `src_sample_g_01') -define(`Cb_23', `src_sample_g_23') - -define(`Crn', `mask_sample_g') -define(`Crn_01', `mask_sample_g_01') -define(`Crn_23', `mask_sample_g_23') - -define(`Yn', `mask_sample_r') -define(`Yn_01', `mask_sample_r_01') -define(`Yn_23', `mask_sample_r_23') - -define(`Cbn', `mask_sample_b') -define(`Cbn_01', `mask_sample_b_01') -define(`Cbn_23', `mask_sample_b_23') - - /* color space conversion function: - * R = Clamp ( 1.164(Y-16/255) + 1.596(Cr-128/255), 0, 1) - * G = Clamp ( 1.164(Y-16/255) - 0.813(Cr-128/255) - 0.392(Cb-128/255), 0, 1) - * B = Clamp ( 1.164(Y-16/255) + 2.017(Cb-128/255), 0, 1) - */ - - /* Normalize Y, Cb and Cr: - * - * Yn = (Y - 16/255) * 1.164 - * Crn = Cr - 128 / 255 - * Cbn = Cb - 128 / 255 - */ -add (16) Yn<1>F Y<8,8,1>F -0.0627451F { compr align1 }; -mul (16) Yn<1>F Yn<8,8,1>F 1.164F { compr align1 }; - -add (16) Crn<1>F Cr<8,8,1>F -0.501961F { compr align1 }; - -add (16) Cbn<1>F Cb<8,8,1>F -0.501961F { compr align1 }; - - /* - * R = Y + Cr * 1.596 - */ -mov (16) acc0<1>F Yn<8,8,1>F { compr align1 }; -mac.sat(16) src_sample_r<1>F Crn<8,8,1>F 1.596F { compr align1 }; - - /* - * G = Crn * -0.813 + Cbn * -0.392 + Y - */ -mov (16) acc0<1>F Yn<8,8,1>F { compr align1 }; -mac (16) acc0<1>F Crn<8,8,1>F -0.813F { compr align1 }; -mac.sat(16) src_sample_g<1>F Cbn<8,8,1>F -0.392F { compr align1 }; - - /* - * B = Cbn * 2.017 + Y - */ -mov (16) acc0<1>F Yn<8,8,1>F { compr align1 }; -mac.sat(16) src_sample_b<1>F Cbn<8,8,1>F 2.017F { compr align1 }; - - /* - * A = 1.0 - */ -mov (16) src_sample_a<1>F 1.0F { compr align1 }; diff --git a/src/shaders/render/exa_wm_yuv_rgb.g4b b/src/shaders/render/exa_wm_yuv_rgb.g4b index 6b99838..b116ece 100644 --- a/src/shaders/render/exa_wm_yuv_rgb.g4b +++ b/src/shaders/render/exa_wm_yuv_rgb.g4b @@ -1,12 +1,13 @@ - { 0x00802040, 0x22c07fbd, 0x008d01c0, 0xbd808081 }, - { 0x00802041, 0x22c07fbd, 0x008d02c0, 0x3f94fdf4 }, - { 0x00802040, 0x23007fbd, 0x008d0240, 0xbf008084 }, - { 0x00802040, 0x23407fbd, 0x008d0200, 0xbf008084 }, - { 0x00802001, 0x240003bc, 0x008d02c0, 0x00000000 }, - { 0x80802048, 0x21c07fbd, 0x008d0300, 0x3fcc49ba }, - { 0x00802001, 0x240003bc, 0x008d02c0, 0x00000000 }, - { 0x00802048, 0x24007fbc, 0x008d0300, 0xbf5020c5 }, - { 0x80802048, 0x22007fbd, 0x008d0340, 0xbec8b439 }, - { 0x00802001, 0x240003bc, 0x008d02c0, 0x00000000 }, - { 0x80802048, 0x22407fbd, 0x008d0340, 0x40011687 }, + { 0x00802040, 0x22c077bd, 0x008d01c0, 0x0000006c }, + { 0x00802040, 0x230077bd, 0x008d0200, 0x0000007c }, + { 0x00802040, 0x234077bd, 0x008d0240, 0x0000008c }, + { 0x00802041, 0x240077bc, 0x008d02c0, 0x00000060 }, + { 0x00802048, 0x240077bc, 0x008d0300, 0x00000064 }, + { 0x80802048, 0x21c077bd, 0x008d0340, 0x00000068 }, + { 0x00802041, 0x240077bc, 0x008d02c0, 0x00000070 }, + { 0x00802048, 0x240077bc, 0x008d0300, 0x00000074 }, + { 0x80802048, 0x220077bd, 0x008d0340, 0x00000078 }, + { 0x00802041, 0x240077bc, 0x008d02c0, 0x00000080 }, + { 0x00802048, 0x240077bc, 0x008d0300, 0x00000084 }, + { 0x80802048, 0x224077bd, 0x008d0340, 0x00000088 }, { 0x00802001, 0x228003fd, 0x00000000, 0x3f800000 }, diff --git a/src/shaders/render/exa_wm_yuv_rgb.g4b.gen5 b/src/shaders/render/exa_wm_yuv_rgb.g4b.gen5 index 6b99838..b116ece 100644 --- a/src/shaders/render/exa_wm_yuv_rgb.g4b.gen5 +++ b/src/shaders/render/exa_wm_yuv_rgb.g4b.gen5 @@ -1,12 +1,13 @@ - { 0x00802040, 0x22c07fbd, 0x008d01c0, 0xbd808081 }, - { 0x00802041, 0x22c07fbd, 0x008d02c0, 0x3f94fdf4 }, - { 0x00802040, 0x23007fbd, 0x008d0240, 0xbf008084 }, - { 0x00802040, 0x23407fbd, 0x008d0200, 0xbf008084 }, - { 0x00802001, 0x240003bc, 0x008d02c0, 0x00000000 }, - { 0x80802048, 0x21c07fbd, 0x008d0300, 0x3fcc49ba }, - { 0x00802001, 0x240003bc, 0x008d02c0, 0x00000000 }, - { 0x00802048, 0x24007fbc, 0x008d0300, 0xbf5020c5 }, - { 0x80802048, 0x22007fbd, 0x008d0340, 0xbec8b439 }, - { 0x00802001, 0x240003bc, 0x008d02c0, 0x00000000 }, - { 0x80802048, 0x22407fbd, 0x008d0340, 0x40011687 }, + { 0x00802040, 0x22c077bd, 0x008d01c0, 0x0000006c }, + { 0x00802040, 0x230077bd, 0x008d0200, 0x0000007c }, + { 0x00802040, 0x234077bd, 0x008d0240, 0x0000008c }, + { 0x00802041, 0x240077bc, 0x008d02c0, 0x00000060 }, + { 0x00802048, 0x240077bc, 0x008d0300, 0x00000064 }, + { 0x80802048, 0x21c077bd, 0x008d0340, 0x00000068 }, + { 0x00802041, 0x240077bc, 0x008d02c0, 0x00000070 }, + { 0x00802048, 0x240077bc, 0x008d0300, 0x00000074 }, + { 0x80802048, 0x220077bd, 0x008d0340, 0x00000078 }, + { 0x00802041, 0x240077bc, 0x008d02c0, 0x00000080 }, + { 0x00802048, 0x240077bc, 0x008d0300, 0x00000084 }, + { 0x80802048, 0x224077bd, 0x008d0340, 0x00000088 }, { 0x00802001, 0x228003fd, 0x00000000, 0x3f800000 }, diff --git a/src/shaders/render/exa_wm_yuv_rgb.g6a b/src/shaders/render/exa_wm_yuv_rgb.g6a index b3abe4b..ede0298 100644 --- a/src/shaders/render/exa_wm_yuv_rgb.g6a +++ b/src/shaders/render/exa_wm_yuv_rgb.g6a @@ -1,5 +1,5 @@ /* - * Copyright © 2006 Intel Corporation + * Copyright © 2006-2013 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -27,72 +27,5 @@ */ include(`exa_wm.g4i') - -define(`YCbCr_base', `src_sample_base') - -define(`Cr', `src_sample_b') -define(`Cr_01', `src_sample_b_01') -define(`Cr_23', `src_sample_b_23') - -define(`Y', `src_sample_r') -define(`Y_01', `src_sample_r_01') -define(`Y_23', `src_sample_r_23') - -define(`Cb', `src_sample_g') -define(`Cb_01', `src_sample_g_01') -define(`Cb_23', `src_sample_g_23') - -define(`Crn', `mask_sample_g') -define(`Crn_01', `mask_sample_g_01') -define(`Crn_23', `mask_sample_g_23') - -define(`Yn', `mask_sample_r') -define(`Yn_01', `mask_sample_r_01') -define(`Yn_23', `mask_sample_r_23') - -define(`Cbn', `mask_sample_b') -define(`Cbn_01', `mask_sample_b_01') -define(`Cbn_23', `mask_sample_b_23') - - /* color space conversion function: - * R = Clamp ( 1.164(Y-16/255) + 1.596(Cr-128/255), 0, 1) - * G = Clamp ( 1.164(Y-16/255) - 0.813(Cr-128/255) - 0.392(Cb-128/255), 0, 1) - * B = Clamp ( 1.164(Y-16/255) + 2.017(Cb-128/255), 0, 1) - */ - - /* Normalize Y, Cb and Cr: - * - * Yn = (Y - 16/255) * 1.164 - * Crn = Cr - 128 / 255 - * Cbn = Cb - 128 / 255 - */ -add (16) Yn<1>F Y<8,8,1>F -0.0627451F { compr align1 }; -mul (16) Yn<1>F Yn<8,8,1>F 1.164F { compr align1 }; - -add (16) Crn<1>F Cr<8,8,1>F -0.501961F { compr align1 }; - -add (16) Cbn<1>F Cb<8,8,1>F -0.501961F { compr align1 }; - - /* - * R = Y + Cr * 1.596 - */ -mov (16) acc0<1>F Yn<8,8,1>F { compr align1 }; -mac.sat(16) src_sample_r<1>F Crn<8,8,1>F 1.596F { compr align1 }; - - /* - * G = Crn * -0.813 + Cbn * -0.392 + Y - */ -mov (16) acc0<1>F Yn<8,8,1>F { compr align1 }; -mac (16) acc0<1>F Crn<8,8,1>F -0.813F { compr align1 }; -mac.sat(16) src_sample_g<1>F Cbn<8,8,1>F -0.392F { compr align1 }; - - /* - * B = Cbn * 2.017 + Y - */ -mov (16) acc0<1>F Yn<8,8,1>F { compr align1 }; -mac.sat(16) src_sample_b<1>F Cbn<8,8,1>F 2.017F { compr align1 }; - - /* - * A = 1.0 - */ -mov (16) src_sample_a<1>F 1.0F { compr align1 }; +include(`exa_yuv_gen6.g4i') +include(`exa_yuv_rgb.gxa') diff --git a/src/shaders/render/exa_wm_yuv_rgb.g6b b/src/shaders/render/exa_wm_yuv_rgb.g6b index 6c8c724..d09ae00 100644 --- a/src/shaders/render/exa_wm_yuv_rgb.g6b +++ b/src/shaders/render/exa_wm_yuv_rgb.g6b @@ -1,12 +1,13 @@ - { 0x00800040, 0x22c07fbd, 0x008d01c0, 0xbd808081 }, - { 0x00800041, 0x22c07fbd, 0x008d02c0, 0x3f94fdf4 }, - { 0x00800040, 0x23007fbd, 0x008d0240, 0xbf008084 }, - { 0x00800040, 0x23407fbd, 0x008d0200, 0xbf008084 }, - { 0x00800001, 0x240003bc, 0x008d02c0, 0x00000000 }, - { 0x80800048, 0x21c07fbd, 0x008d0300, 0x3fcc49ba }, - { 0x00800001, 0x240003bc, 0x008d02c0, 0x00000000 }, - { 0x00800048, 0x24007fbc, 0x008d0300, 0xbf5020c5 }, - { 0x80800048, 0x22007fbd, 0x008d0340, 0xbec8b439 }, - { 0x00800001, 0x240003bc, 0x008d02c0, 0x00000000 }, - { 0x80800048, 0x22407fbd, 0x008d0340, 0x40011687 }, + { 0x00800040, 0x22c077bd, 0x008d01c0, 0x000000ec }, + { 0x00800040, 0x230077bd, 0x008d0200, 0x000000fc }, + { 0x00800040, 0x234077bd, 0x008d0240, 0x0000010c }, + { 0x00800041, 0x240077bc, 0x008d02c0, 0x000000e0 }, + { 0x00800048, 0x240077bc, 0x008d0300, 0x000000e4 }, + { 0x80800048, 0x21c077bd, 0x008d0340, 0x000000e8 }, + { 0x00800041, 0x240077bc, 0x008d02c0, 0x000000f0 }, + { 0x00800048, 0x240077bc, 0x008d0300, 0x000000f4 }, + { 0x80800048, 0x220077bd, 0x008d0340, 0x000000f8 }, + { 0x00800041, 0x240077bc, 0x008d02c0, 0x00000100 }, + { 0x00800048, 0x240077bc, 0x008d0300, 0x00000104 }, + { 0x80800048, 0x224077bd, 0x008d0340, 0x00000108 }, { 0x00800001, 0x228003fd, 0x00000000, 0x3f800000 }, diff --git a/src/shaders/render/exa_wm_yuv_rgb.g7a b/src/shaders/render/exa_wm_yuv_rgb.g7a index 5cd33e2..ede0298 100644 --- a/src/shaders/render/exa_wm_yuv_rgb.g7a +++ b/src/shaders/render/exa_wm_yuv_rgb.g7a @@ -1,5 +1,5 @@ /* - * Copyright © 2006 Intel Corporation + * Copyright © 2006-2013 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -27,72 +27,5 @@ */ include(`exa_wm.g4i') - -define(`YCbCr_base', `src_sample_base') - -define(`Cr', `src_sample_b') -define(`Cr_01', `src_sample_b_01') -define(`Cr_23', `src_sample_b_23') - -define(`Y', `src_sample_r') -define(`Y_01', `src_sample_r_01') -define(`Y_23', `src_sample_r_23') - -define(`Cb', `src_sample_g') -define(`Cb_01', `src_sample_g_01') -define(`Cb_23', `src_sample_g_23') - -define(`Crn', `mask_sample_g') -define(`Crn_01', `mask_sample_g_01') -define(`Crn_23', `mask_sample_g_23') - -define(`Yn', `mask_sample_r') -define(`Yn_01', `mask_sample_r_01') -define(`Yn_23', `mask_sample_r_23') - -define(`Cbn', `mask_sample_b') -define(`Cbn_01', `mask_sample_b_01') -define(`Cbn_23', `mask_sample_b_23') - - /* color space conversion function: - * R = Clamp ( 1.164(Y-16/255) + 1.596(Cr-128/255), 0, 1) - * G = Clamp ( 1.164(Y-16/255) - 0.813(Cr-128/255) - 0.392(Cb-128/255), 0, 1) - * B = Clamp ( 1.164(Y-16/255) + 2.017(Cb-128/255), 0, 1) - */ - - /* Normalize Y, Cb and Cr: - * - * Yn = (Y - 16/255) * 1.164 - * Crn = Cr - 128 / 255 - * Cbn = Cb - 128 / 255 - */ -add (16) Yn<1>F Y<8,8,1>F -0.0627451F { compr align1 }; -mul (16) Yn<1>F Yn<8,8,1>F 1.164F { compr align1 }; - -add (16) Crn<1>F Cr<8,8,1>F -0.501961F { compr align1 }; - -add (16) Cbn<1>F Cb<8,8,1>F -0.501961F { compr align1 }; - - /* - * R = Y + Cr * 1.596 - */ -mov (16) acc0<1>F Yn<8,8,1>F { compr align1 }; -mac.sat(16) src_sample_r<1>F Crn<8,8,1>F 1.596F { compr align1 }; - - /* - * G = Crn * -0.813 + Cbn * -0.392 + Y - */ -mov (16) acc0<1>F Yn<8,8,1>F { compr align1 }; -mac (16) acc0<1>F Crn<8,8,1>F -0.813F { compr align1 }; -mac.sat(16) src_sample_g<1>F Cbn<8,8,1>F -0.392F { compr align1 }; - - /* - * B = Cbn * 2.017 + Y - */ -mov (16) acc0<1>F Yn<8,8,1>F { compr align1 }; -mac.sat(16) src_sample_b<1>F Cbn<8,8,1>F 2.017F { compr align1 }; - - /* - * A = 1.0 - */ -mov (16) src_sample_a<1>F 1.0F { compr align1 }; +include(`exa_yuv_gen6.g4i') +include(`exa_yuv_rgb.gxa') diff --git a/src/shaders/render/exa_wm_yuv_rgb.g7b b/src/shaders/render/exa_wm_yuv_rgb.g7b index 6c8c724..d09ae00 100644 --- a/src/shaders/render/exa_wm_yuv_rgb.g7b +++ b/src/shaders/render/exa_wm_yuv_rgb.g7b @@ -1,12 +1,13 @@ - { 0x00800040, 0x22c07fbd, 0x008d01c0, 0xbd808081 }, - { 0x00800041, 0x22c07fbd, 0x008d02c0, 0x3f94fdf4 }, - { 0x00800040, 0x23007fbd, 0x008d0240, 0xbf008084 }, - { 0x00800040, 0x23407fbd, 0x008d0200, 0xbf008084 }, - { 0x00800001, 0x240003bc, 0x008d02c0, 0x00000000 }, - { 0x80800048, 0x21c07fbd, 0x008d0300, 0x3fcc49ba }, - { 0x00800001, 0x240003bc, 0x008d02c0, 0x00000000 }, - { 0x00800048, 0x24007fbc, 0x008d0300, 0xbf5020c5 }, - { 0x80800048, 0x22007fbd, 0x008d0340, 0xbec8b439 }, - { 0x00800001, 0x240003bc, 0x008d02c0, 0x00000000 }, - { 0x80800048, 0x22407fbd, 0x008d0340, 0x40011687 }, + { 0x00800040, 0x22c077bd, 0x008d01c0, 0x000000ec }, + { 0x00800040, 0x230077bd, 0x008d0200, 0x000000fc }, + { 0x00800040, 0x234077bd, 0x008d0240, 0x0000010c }, + { 0x00800041, 0x240077bc, 0x008d02c0, 0x000000e0 }, + { 0x00800048, 0x240077bc, 0x008d0300, 0x000000e4 }, + { 0x80800048, 0x21c077bd, 0x008d0340, 0x000000e8 }, + { 0x00800041, 0x240077bc, 0x008d02c0, 0x000000f0 }, + { 0x00800048, 0x240077bc, 0x008d0300, 0x000000f4 }, + { 0x80800048, 0x220077bd, 0x008d0340, 0x000000f8 }, + { 0x00800041, 0x240077bc, 0x008d02c0, 0x00000100 }, + { 0x00800048, 0x240077bc, 0x008d0300, 0x00000104 }, + { 0x80800048, 0x224077bd, 0x008d0340, 0x00000108 }, { 0x00800001, 0x228003fd, 0x00000000, 0x3f800000 }, diff --git a/src/shaders/render/exa_wm_yuv_rgb.g8a b/src/shaders/render/exa_wm_yuv_rgb.g8a new file mode 100644 index 0000000..9da53c8 --- /dev/null +++ b/src/shaders/render/exa_wm_yuv_rgb.g8a @@ -0,0 +1,32 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Keith Packard <keithp@keithp.com> + * Eric Anholt <eric@anholt.net> + * Zhao Yakui <yakui.zhao@intel.com> + * + */ + +include(`exa_wm.g4i') +include(`exa_yuv_gen6.g4i') +include(`exa_yuv_rgb.gxa') diff --git a/src/shaders/render/exa_wm_yuv_rgb.g8b b/src/shaders/render/exa_wm_yuv_rgb.g8b new file mode 100644 index 0000000..6b6b4d1 --- /dev/null +++ b/src/shaders/render/exa_wm_yuv_rgb.g8b @@ -0,0 +1,13 @@ + { 0x00800040, 0x22c03ae8, 0x3a8d01c0, 0x000000ec }, + { 0x00800040, 0x23003ae8, 0x3a8d0200, 0x000000fc }, + { 0x00800040, 0x23403ae8, 0x3a8d0240, 0x0000010c }, + { 0x00800041, 0x24003ae0, 0x3a8d02c0, 0x000000e0 }, + { 0x00800048, 0x24003ae0, 0x3a8d0300, 0x000000e4 }, + { 0x80800048, 0x21c03ae8, 0x3a8d0340, 0x000000e8 }, + { 0x00800041, 0x24003ae0, 0x3a8d02c0, 0x000000f0 }, + { 0x00800048, 0x24003ae0, 0x3a8d0300, 0x000000f4 }, + { 0x80800048, 0x22003ae8, 0x3a8d0340, 0x000000f8 }, + { 0x00800041, 0x24003ae0, 0x3a8d02c0, 0x00000100 }, + { 0x00800048, 0x24003ae0, 0x3a8d0300, 0x00000104 }, + { 0x80800048, 0x22403ae8, 0x3a8d0340, 0x00000108 }, + { 0x00800001, 0x22803ee8, 0x38000000, 0x3f800000 }, diff --git a/src/shaders/render/exa_yuv_gen4.g4i b/src/shaders/render/exa_yuv_gen4.g4i new file mode 100644 index 0000000..5a66616 --- /dev/null +++ b/src/shaders/render/exa_yuv_gen4.g4i @@ -0,0 +1,42 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Zhao Yakui <yakui.zhao@intel.com> + */ + +/* YUV to RGB matrix coeff */ + +define(`coef_ry', `g3.0<0,1,0>F') +define(`coef_ru', `g3.4<0,1,0>F') +define(`coef_rv', `g3.8<0,1,0>F') +define(`coef_yd', `g3.12<0,1,0>F') + +define(`coef_gy', `g3.16<0,1,0>F') +define(`coef_gu', `g3.20<0,1,0>F') +define(`coef_gv', `g3.24<0,1,0>F') +define(`coef_ud', `g3.28<0,1,0>F') + +define(`coef_by', `g4.0<0,1,0>F') +define(`coef_bu', `g4.4<0,1,0>F') +define(`coef_bv', `g4.8<0,1,0>F') +define(`coef_vd', `g4.12<0,1,0>F') diff --git a/src/shaders/render/exa_yuv_gen6.g4i b/src/shaders/render/exa_yuv_gen6.g4i new file mode 100644 index 0000000..a8d69ee --- /dev/null +++ b/src/shaders/render/exa_yuv_gen6.g4i @@ -0,0 +1,42 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Zhao Yakui <yakui.zhao@intel.com> + */ +/* YUV to RGB matrix coeff */ + + +define(`coef_ry', `g7.0<0,1,0>F') +define(`coef_ru', `g7.4<0,1,0>F') +define(`coef_rv', `g7.8<0,1,0>F') +define(`coef_yd', `g7.12<0,1,0>F') + +define(`coef_gy', `g7.16<0,1,0>F') +define(`coef_gu', `g7.20<0,1,0>F') +define(`coef_gv', `g7.24<0,1,0>F') +define(`coef_ud', `g7.28<0,1,0>F') + +define(`coef_by', `g8.0<0,1,0>F') +define(`coef_bu', `g8.4<0,1,0>F') +define(`coef_bv', `g8.8<0,1,0>F') +define(`coef_vd', `g8.12<0,1,0>F') diff --git a/src/shaders/render/exa_yuv_rgb.gxa b/src/shaders/render/exa_yuv_rgb.gxa new file mode 100644 index 0000000..656ae73 --- /dev/null +++ b/src/shaders/render/exa_yuv_rgb.gxa @@ -0,0 +1,74 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Zhao Yakui <yakui.zhao@intel.com> + */ + +define(`YCbCr_base', `src_sample_base') + +define(`Cr', `src_sample_b') +define(`Cr_01', `src_sample_b_01') +define(`Cr_23', `src_sample_b_23') + +define(`Y', `src_sample_r') +define(`Y_01', `src_sample_r_01') +define(`Y_23', `src_sample_r_23') + +define(`Cb', `src_sample_g') +define(`Cb_01', `src_sample_g_01') +define(`Cb_23', `src_sample_g_23') + +define(`Crn', `mask_sample_b') +define(`Crn_01', `mask_sample_b_01') +define(`Crn_23', `mask_sample_b_23') + +define(`Yn', `mask_sample_r') +define(`Yn_01', `mask_sample_r_01') +define(`Yn_23', `mask_sample_r_23') + +define(`Cbn', `mask_sample_g') +define(`Cbn_01', `mask_sample_g_01') +define(`Cbn_23', `mask_sample_g_23') + +add (16) Yn<1>F Y<8,8,1>F coef_yd { compr align1 }; + +add (16) Cbn<1>F Cb<8,8,1>F coef_ud { compr align1 }; + +add (16) Crn<1>F Cr<8,8,1>F coef_vd { compr align1 }; + +mul (16) acc0<1>F Yn<8,8,1>F coef_ry { compr align1 }; +mac (16) acc0<1>F Cbn<8,8,1>F coef_ru { compr align1 }; +mac.sat (16) src_sample_r<1>F Crn<8,8,1>F coef_rv { compr align1 }; + +mul (16) acc0<1>F Yn<8,8,1>F coef_gy { compr align1 }; +mac (16) acc0<1>F Cbn<8,8,1>F coef_gu { compr align1 }; +mac.sat(16) src_sample_g<1>F Crn<8,8,1>F coef_gv { compr align1 }; + +mul (16) acc0<1>F Yn<8,8,1>F coef_by { compr align1 }; +mac (16) acc0<1>F Cbn<8,8,1>F coef_bu { compr align1 }; +mac.sat(16) src_sample_b<1>F Crn<8,8,1>F coef_bv { compr align1 }; + + /* + * A = 1.0 + */ +mov (16) src_sample_a<1>F 1.0F { compr align1 }; diff --git a/src/shaders/utils/Makefile.am b/src/shaders/utils/Makefile.am index b8f3121..dd19d62 100644 --- a/src/shaders/utils/Makefile.am +++ b/src/shaders/utils/Makefile.am @@ -6,6 +6,9 @@ MFC_CORE_AVC = \ mfc_batchbuffer_avc_intra.asm \ mfc_batchbuffer_avc_inter.asm +MFC_CORE_HSW = \ + mfc_batchbuffer_hsw.asm + INTEL_G6B = mfc_batchbuffer_avc_intra.g6b mfc_batchbuffer_avc_inter.g6b INTEL_G6A = mfc_batchbuffer_avc_intra.g6a mfc_batchbuffer_avc_inter.g6a INTEL_GEN6_INC = mfc_batchbuffer.inc @@ -16,15 +19,21 @@ INTEL_G7A = mfc_batchbuffer_avc_intra.g7a mfc_batchbuffer_avc_inter.g7a INTEL_GEN7_INC = mfc_batchbuffer.inc INTEL_GEN7_ASM = $(INTEL_G7A:%.g7a=%.gen7.asm) +INTEL_G75B = mfc_batchbuffer_hsw.g75b +INTEL_G75A = mfc_batchbuffer_hsw.g75a +INTEL_GEN75_INC = mfc_batchbuffer_hsw.inc +INTEL_GEN75_ASM = $(INTEL_G75A:%.g75a=%.gen75.asm) + TARGETS = if HAVE_GEN4ASM TARGETS += $(INTEL_G6B) TARGETS += $(INTEL_G7B) +TARGETS += $(INTEL_G75B) endif all-local: $(TARGETS) -SUFFIXES = .g6a .g6b .g7a .g7b .gen6.asm .gen7.asm +SUFFIXES = .g6a .g6b .g7a .g7b .gen6.asm .gen7.asm .g75a .g75b .gen75.asm if HAVE_GEN4ASM $(INTEL_GEN6_ASM): $(MFC_CORE) $(MFC_CORE_AVC) $(INTEL_GEN6_INC) @@ -42,19 +51,31 @@ $(INTEL_GEN7_ASM): $(MFC_CORE) $(MFC_CORE_AVC) $(INTEL_GEN7_INC) rm _mfc0.$@ .gen7.asm.g7b: $(AM_V_GEN)$(GEN4ASM) -g 7 -o $@ $< + +$(INTEL_GEN75_ASM): $(MFC_CORE_HSW) $(INTEL_GEN75_INC) +.g75a.gen75.asm: + $(AM_V_GEN)cpp -P $< > _mfc0.$@ && \ + m4 _mfc0.$@ > $@ && \ + rm _mfc0.$@ +.gen75.asm.g75b: + $(AM_V_GEN)$(GEN4ASM) -g 7.5 -o $@ $< endif -CLEANFILES = $(INTEL_GEN6_ASM) $(INTEL_GEN7_ASM) +CLEANFILES = $(INTEL_GEN6_ASM) $(INTEL_GEN7_ASM) $(INTEL_GEN75_ASM) EXTRA_DIST = \ $(INTEL_G6A) \ $(INTEL_G6B) \ $(INTEL_G7A) \ $(INTEL_G7B) \ + $(INTEL_G75A) \ + $(INTEL_G75B) \ $(INTEL_GEN6_INC) \ $(INTEL_GEN7_INC) \ + $(INTEL_GEN75_INC) \ $(MFC_CORE) \ $(MFC_CORE_AVC) \ + $(MFC_CORE_HSW) \ $(NULL) # Extra clean files so that maintainer-clean removes *everything* diff --git a/src/shaders/utils/mfc_batchbuffer.inc b/src/shaders/utils/mfc_batchbuffer.inc index c83d5d4..c3a0fec 100644 --- a/src/shaders/utils/mfc_batchbuffer.inc +++ b/src/shaders/utils/mfc_batchbuffer.inc @@ -139,6 +139,8 @@ define(`mb_y', `inline_reg0.17') /* :ub, */ define(`mb_xy', `inline_reg0.16') /* :uw, */ define(`width_in_mb', `inline_reg0.20') /* :uw, the picture width in macroblocks */ define(`qp', `inline_reg0.22') /* :ub, */ +define(`ref_idx0', `inline_reg0.24') /* :ud */ +define(`ref_idx1', `inline_reg0.28') /* :ud */ /* * GRF 8~15 -- temporary registers diff --git a/src/shaders/utils/mfc_batchbuffer_avc_inter.asm b/src/shaders/utils/mfc_batchbuffer_avc_inter.asm index 59152b8..549f021 100644 --- a/src/shaders/utils/mfc_batchbuffer_avc_inter.asm +++ b/src/shaders/utils/mfc_batchbuffer_avc_inter.asm @@ -103,6 +103,12 @@ __FILL_INTER_PAK_COMMAND: /* DW7 */ mov (1) pak_object7_ud<1>:ud ob_read_wb0.4<0,1,0>:ud {align1} ; + /* DW8 */ + mov (1) pak_object8_ud<1>:ud ref_idx0<0,1,0>:ud {align1} ; + + /* DW9 */ + mov (1) pak_object9_ud<1>:ud ref_idx1<0,1,0>:ud {align1} ; + jmpi (1) __OUTPUT_PAK_COMMAND ; __FILL_INTRA_PAK_COMMAND: diff --git a/src/shaders/utils/mfc_batchbuffer_avc_inter.g6b b/src/shaders/utils/mfc_batchbuffer_avc_inter.g6b index 2e1703e..24b268f 100644 --- a/src/shaders/utils/mfc_batchbuffer_avc_inter.g6b +++ b/src/shaders/utils/mfc_batchbuffer_avc_inter.g6b @@ -24,7 +24,7 @@ { 0x00800001, 0x23400061, 0x00000000, 0x00000000 }, { 0x01000005, 0x20002d28, 0x020000ac, 0x00020002 }, { 0x01000005, 0x20000c20, 0x00000200, 0x00002000 }, - { 0x00110020, 0x34001c00, 0x00001400, 0x00000022 }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000026 }, { 0x00000001, 0x23400061, 0x00000000, 0x71490009 }, { 0x00000041, 0x23480c21, 0x000001e0, 0x000000a0 }, { 0x00000001, 0x23540061, 0x00000000, 0x000f000f }, @@ -41,6 +41,8 @@ { 0x00110001, 0x23580061, 0x00000000, 0x00000000 }, { 0x00000040, 0x23584421, 0x00000358, 0x000000b6 }, { 0x00000001, 0x235c0021, 0x00000204, 0x00000000 }, + { 0x00000001, 0x23600021, 0x000000b8, 0x00000000 }, + { 0x00000001, 0x23640021, 0x000000bc, 0x00000000 }, { 0x00000020, 0x34001c00, 0x00001400, 0x00000022 }, { 0x00000001, 0x23400061, 0x00000000, 0x71490009 }, { 0x00000001, 0x23540061, 0x00000000, 0x000f000f }, @@ -67,7 +69,7 @@ { 0x00000040, 0x21480c21, 0x00000148, 0x00000004 }, { 0x00000040, 0x21e00c21, 0x000001e0, 0x00000001 }, { 0x01000040, 0x20ae3dad, 0x000000ae, 0xffffffff }, - { 0x00110020, 0x34001c00, 0x00001400, 0xffffff9e }, + { 0x00110020, 0x34001c00, 0x00001400, 0xffffff9a }, { 0x00010020, 0x34001c00, 0x02001400, 0x0000001e }, { 0x00600001, 0x20000022, 0x008d0120, 0x00000000 }, { 0x05800031, 0x22001cc9, 0x00000000, 0x021a0001 }, diff --git a/src/shaders/utils/mfc_batchbuffer_avc_inter.g7b b/src/shaders/utils/mfc_batchbuffer_avc_inter.g7b index 1664010..f0e2012 100644 --- a/src/shaders/utils/mfc_batchbuffer_avc_inter.g7b +++ b/src/shaders/utils/mfc_batchbuffer_avc_inter.g7b @@ -24,7 +24,7 @@ { 0x00800001, 0x23400061, 0x00000000, 0x00000000 }, { 0x01000005, 0x20002d28, 0x020000ac, 0x00020002 }, { 0x01000005, 0x20000c20, 0x00000200, 0x00002000 }, - { 0x00110020, 0x34001c00, 0x00001400, 0x00000022 }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000026 }, { 0x00000001, 0x23400061, 0x00000000, 0x71490009 }, { 0x00000041, 0x23480c21, 0x000001e0, 0x000000a0 }, { 0x00000001, 0x23540061, 0x00000000, 0x000f000f }, @@ -41,6 +41,8 @@ { 0x00110001, 0x23580061, 0x00000000, 0x00000000 }, { 0x00000040, 0x23584421, 0x00000358, 0x000000b6 }, { 0x00000001, 0x235c0021, 0x00000204, 0x00000000 }, + { 0x00000001, 0x23600021, 0x000000b8, 0x00000000 }, + { 0x00000001, 0x23640021, 0x000000bc, 0x00000000 }, { 0x00000020, 0x34001c00, 0x00001400, 0x00000022 }, { 0x00000001, 0x23400061, 0x00000000, 0x71490009 }, { 0x00000001, 0x23540061, 0x00000000, 0x000f000f }, @@ -67,7 +69,7 @@ { 0x00000040, 0x21480c21, 0x00000148, 0x00000004 }, { 0x00000040, 0x21e00c21, 0x000001e0, 0x00000001 }, { 0x01000040, 0x20ae3dad, 0x000000ae, 0xffffffff }, - { 0x00110020, 0x34001c00, 0x00001400, 0xffffff9e }, + { 0x00110020, 0x34001c00, 0x00001400, 0xffffff9a }, { 0x00010020, 0x34001c00, 0x02001400, 0x0000001e }, { 0x00600001, 0x28000021, 0x008d0120, 0x00000000 }, { 0x0a800031, 0x22001ca9, 0x00000800, 0x02180001 }, diff --git a/src/shaders/utils/mfc_batchbuffer_hsw.asm b/src/shaders/utils/mfc_batchbuffer_hsw.asm new file mode 100644 index 0000000..c34e934 --- /dev/null +++ b/src/shaders/utils/mfc_batchbuffer_hsw.asm @@ -0,0 +1,296 @@ +/* + * Copyright © 2010-2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Zhao Yakui <yakui.zhao@intel.com> + */ + +START: + mov (16) pak_object_reg0.0<1>:ud 0x0:ud {align1}; + mov (8) obw_m0.0<1>:ud 0x0:ud {align1}; + mov (8) mb_cur_msg.0<1>:ud 0x0:ud {align1}; + mov (16) mb_temp.0<1>:ud 0x0:ud {align1}; + mov (1) cur_mb_x<1>:uw mb_x<0,1,0>:ub {align1}; + mov (1) cur_mb_y<1>:uw mb_y<0,1,0>:ub {align1}; + mov (1) end_mb_x<1>:uw slice_end_x<0,1,0>:ub {align1}; + mov (1) end_mb_y<1>:uw slice_end_y<0,1,0>:ub {align1}; + mov (1) end_loop_count<1>:uw total_mbs<0,1,0>:uw {align1}; + mov (1) vme_len<1>:ud 2:ud {align1}; + and.z.f0.0 (1) null:uw mb_flag<0,1,0>:ub INTRA_SLICE:uw {align1}; + (f0.0) mov (1) vme_len<1>:ud 24:ud {align1}; + + mov (1) obw_m0.8<1>:UD buffer_offset<0,1,0>:ud {align1}; + mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + + mul (1) mb_cur_msg.8<1>:UD width_in_mbs<0,1,0>:UW cur_mb_y<0,1,0>:UW {align1}; + add (1) mb_cur_msg.8<1>:UD mb_cur_msg.8<0,1,0>:UD cur_mb_x<0,1,0>:uw {align1}; + mul (1) mb_cur_msg.8<1>:UD mb_cur_msg.8<0,1,0>:UD vme_len<0,1,0>:UD {align1}; + mov (1) mb_cur_msg.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + mov (1) pak_object0_ud<1>:ud MFC_AVC_PAK_OBJECT_DW0:ud {align1}; + mov (1) pak_object5_ud<1>:ud MFC_AVC_PAK_OBJECT_DW5:ud {align1}; + mov (1) pak_object10_ud<1>:ud MFC_AVC_PAK_OBJECT_DW10:ud {align1}; + mov (1) pak_object6_ud<1>:ub qp_flag<0,1,0>:ub {align1}; + +pak_object_loop: + mov (8) mb_msg0.0<1>:ud mb_cur_msg.0<8,8,1>:ud {align1}; + mov (1) pak_object4_ud<1>:ud MFC_AVC_PAK_OBJECT_DW4:ud {align1}; + mov (1) tmp_reg0.0<1>:ub cur_mb_x<0,1,0>:ub {align1}; + mov (1) tmp_reg0.1<1>:ub cur_mb_y<0,1,0>:ub {align1}; + mov (1) pak_object4_ud<1>:uw tmp_reg0.0<0,1,0>:uw {align1}; + /* pak_object6_ud */ + mov (1) pak_object_reg0.26<1>:uw 0x0:uw {align1}; + + cmp.e.f0.0 (1) null:uw cur_mb_x<0,1,0>:uw end_mb_x<0,1,0>:uw {align1}; + (-f0.0) jmpi (1) start_mb_flag; + cmp.e.f0.0 (1) null:uw cur_mb_y<0,1,0>:uw end_mb_y<0,1,0>:uw {align1}; + (f0.0) mov (1) pak_object_reg0.26<1>:uw MFC_AVC_PAK_LAST_MB:uw {align1}; +start_mb_flag: + and.z.f0.0 (1) null:uw mb_flag<0,1,0>:ub INTRA_SLICE:uw {align1}; + (f0.0) jmpi (1) inter_frame_start; + +/* bind index 0, read 2 oword (32bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + null + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_2, + MV_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; + jmpi (1) intra_pak_command; + +nop; +nop; +inter_frame_start: +/* bind index 0, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + null + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + MV_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) jmpi (1) intra_pak_command; + +/* MV len and MV mode */ + and (1) pak_object3_ud<1>:ud mb_inter_wb.0<0,1,0>:ud MFC_AVC_INTER_MASK_DW3:ud {align1}; + add (1) pak_object3_ud<1>:ud pak_object3_ud<0,1,0>:ud MFC_AVC_PAK_CBP:ud {align1}; + and (1) tmp_reg0.0<1>:uw mb_inter_wb.0<0,1,0>:uw INTER_MASK:uw {align1}; + mov (1) pak_object1_ud<1>:ud 32:ud {align1}; + cmp.e.f0.0 (1) null:uw tmp_reg0.0<0,1,0>:uw INTER_8X8MODE:uw {align1}; + (-f0.0) add (1) pak_object3_ud<1>:ud pak_object3_ud<0,1,0>:ud INTER_MV8:ud {align1}; + (-f0.0) jmpi (1) inter_mv_check; + and.nz.f0.0 (1) null:ud mb_inter_wb.4<0,1,0>:uw SUBSHAPE_MASK:uw {align1}; + (f0.0) mov (1) pak_object1_ud<1>:ud 128:ud {align1}; + (f0.0) add (1) pak_object3_ud<1>:ud pak_object3_ud<0,1,0>:ud INTER_MV32:ud {align1}; + (f0.0) jmpi (1) mv_check_end; + + add (1) pak_object3_ud<1>:ud pak_object3_ud<0,1,0>:ud INTER_MV8:ud {align1}; + +inter_mv_check: + and (1) tmp_reg0.0<1>:uw mb_inter_wb.0<0,1,0>:uw INTER_MASK:uw {align1}; + cmp.e.f0.0 (1) null:uw tmp_reg0.0<0,1,0>:uw INTER_16X16MODE:uw {align1}; + (f0.0) jmpi (1) mv_check_end; + +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB A */ +/* bind index 0, read 8 oword (128bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + null + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_8, + MV_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 4 + {align1}; +/* TODO: RefID is required after multi-references are added */ + + mov (2) mb_mv0.8<1>:ud mb_mv1.0<2,2,1>:ud {align1}; + mov (2) mb_mv0.16<1>:ud mb_mv2.0<2,2,1>:ud {align1}; + mov (2) mb_mv0.24<1>:ud mb_mv3.0<2,2,1>:ud {align1}; + + mov (8) msg_reg0.0<1>:ud mb_msg0.0<8,8,1>:ud {align1} ; + mov (8) msg_reg1.0<1>:ud mb_mv0.0<8,8,1>:ud {align1} ; +/* Write MV for MB A */ +/* bind index 0, write 2 oword (32bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + MV_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +mv_check_end: + +/* ref list */ + mov (1) pak_object8_ud<1>:ud fwd_ref<0,1,0>:ud {align1}; + mov (1) pak_object9_ud<1>:ud bwd_ref<0,1,0>:ud {align1}; +/* inter_mode. pak_object7_ud */ + mov (1) pak_object7_ud<1>:ud 0x0:ud {align1}; + mov (1) pak_object_reg0.28<1>:ub mb_inter_wb.5<0,1,0>:ub {align1}; + mov (1) pak_object_reg0.29<1>:ub mb_inter_wb.6<0,1,0>:ub {align1}; + +/* mv start address */ + add (1) tmp_reg0.4<1>:ud mb_cur_msg.8<0,1,0>:ud 3:ud {align1}; + mul (1) pak_object2_ud<1>:ud tmp_reg0.4<0,1,0>:ud 16:ud {align1}; + + jmpi (1) write_pak_command; + +intra_pak_command: + /* object 1/2 is set to zero */ + mov (2) pak_object1_ud<1>:ud 0x0:ud {align1}; + /* object 7/8 intra mode */ + mov (1) pak_object7_ud<1>:ud mb_intra_wb.4<0,1,0>:ud {align1}; + mov (1) pak_object8_ud<1>:ud mb_intra_wb.8<0,1,0>:ud {align1}; + /* object 9 Intra structure */ + mov (1) pak_object9_ud<1>:ud 0x0:ud {align1}; + mov (1) pak_object9_ud<1>:ub mb_intra_wb.12<0,1,0>:ub {align1}; + + and (1) pak_object3_ud<1>:ud mb_intra_wb.0<0,1,0>:ud MFC_AVC_INTRA_MASK_DW3:ud {align1}; + add (1) pak_object3_ud<1>:ud pak_object3_ud<0,1,0>:ud MFC_AVC_INTRA_FLAG + MFC_AVC_PAK_CBP:ud {align1}; + + mov (1) tmp_reg0.0<1>:ud 0:ud {align1}; + mov (1) tmp_reg0.1<1>:ub mb_intra_wb.2<0,1,0>:ub {align1}; + and (1) tmp_reg0.0<1>:uw tmp_reg0.0<0,1,0>:uw AVC_INTRA_MASK:uw {align1}; + add (1) pak_object3_ud<1>:ud pak_object3_ud<0,1,0>:ud tmp_reg0.0<0,1,0>:ud {align1}; + +/* Write the pak command into the batchbuffer */ +write_pak_command: + mov (8) msg_reg0.0<1>:ud obw_m0.0<8,8,1>:ud {align1} ; + mov (8) msg_reg1.0<1>:ud pak_object_reg0.0<8,8,1>:ud {align1} ; + +/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + MFC_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + + add (1) msg_reg0.8<1>:ud msg_reg0.8<0,1,0>:ud 2:ud {align1}; + mov (8) msg_reg1.0<1>:ud pak_object_reg1.0<8,8,1>:ud {align1}; + +/* bind index 3, write 1 oword (16bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_0, + MFC_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + + +/* Check the next mb */ +add (1) cur_loop_count<1>:uw cur_loop_count<0,1,0>:uw 1:uw {align1}; +cmp.e.f0.0 (1) null:uw cur_loop_count<0,1,0>:uw end_loop_count<0,1,0>:uw {align1}; +(f0.0) jmpi (1) pak_loop_end; +/* the buffer offset for next block */ +add (1) obw_m0.8<1>:ud obw_m0.8<0,1,0>:ud 3:uw {align1}; +add (1) mb_cur_msg.8<1>:ud mb_cur_msg.8<0,1,0>:ud vme_len<0,1,0>:ud {align1}; +add (1) cur_mb_x<1>:uw cur_mb_x<0,1,0>:uw 1:uw {align1}; +/* Check whether it is already equal to width in mbs */ +cmp.e.f0.0 (1) null:uw cur_mb_x<0,1,0>:uw width_in_mbs<0,1,0>:uw {align1}; +(f0.0) add (1) cur_mb_y<1>:uw cur_mb_y<0,1,0>:uw 1:uw {align1}; +(f0.0) mov (1) cur_mb_x<1>:uw 0:uw {align1}; + +/* continue the pak command for next mb */ +jmpi (1) pak_object_loop; +nop; +nop; +pak_loop_end: +/* Issue message fence so that the previous write message is committed */ +send (16) + msg_ind + mb_wb.0<1>:ud + null + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_FENCE, + OBR_MF_COMMIT, + MFC_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; + +__EXIT: +/* + * kill thread + */ +mov (8) ts_msg_reg0<1>:UD r0<8,8,1>:UD {align1}; +send (1) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT}; + +nop; + diff --git a/src/shaders/utils/mfc_batchbuffer_hsw.g75a b/src/shaders/utils/mfc_batchbuffer_hsw.g75a new file mode 100644 index 0000000..4a96754 --- /dev/null +++ b/src/shaders/utils/mfc_batchbuffer_hsw.g75a @@ -0,0 +1,29 @@ +/* + * Copyright © 2010-2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Zhao Yakui <yakui.zhao@intel.com> + */ + +#include "mfc_batchbuffer_hsw.inc" +#include "mfc_batchbuffer_hsw.asm" + diff --git a/src/shaders/utils/mfc_batchbuffer_hsw.g75b b/src/shaders/utils/mfc_batchbuffer_hsw.g75b new file mode 100644 index 0000000..2f42643 --- /dev/null +++ b/src/shaders/utils/mfc_batchbuffer_hsw.g75b @@ -0,0 +1,105 @@ + { 0x00800001, 0x23400061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2b000061, 0x00000000, 0x00000000 }, + { 0x00800001, 0x2ac00061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x2ac00229, 0x000000a8, 0x00000000 }, + { 0x00000001, 0x2ac20229, 0x000000a9, 0x00000000 }, + { 0x00000001, 0x2ae00229, 0x000000b0, 0x00000000 }, + { 0x00000001, 0x2ae20229, 0x000000b1, 0x00000000 }, + { 0x00000001, 0x2ae40129, 0x000000ac, 0x00000000 }, + { 0x00000001, 0x2ae80061, 0x00000000, 0x00000002 }, + { 0x01000005, 0x20002e28, 0x000000a4, 0x00010001 }, + { 0x00010001, 0x2ae80061, 0x00000000, 0x00000018 }, + { 0x00000001, 0x21e80021, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x21f40231, 0x00000014, 0x00000000 }, + { 0x00000041, 0x2b082521, 0x000000aa, 0x00000ac2 }, + { 0x00000040, 0x2b082421, 0x00000b08, 0x00000ac0 }, + { 0x00000041, 0x2b080421, 0x00000b08, 0x00000ae8 }, + { 0x00000001, 0x2b140231, 0x00000014, 0x00000000 }, + { 0x00000001, 0x23400061, 0x00000000, 0x7149000a }, + { 0x00000001, 0x23540061, 0x00000000, 0x000f000f }, + { 0x00000001, 0x23680061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23580231, 0x000000a6, 0x00000000 }, + { 0x00600001, 0x2b400021, 0x008d0b00, 0x00000000 }, + { 0x00000001, 0x23500061, 0x00000000, 0xffff0000 }, + { 0x00000001, 0x21000231, 0x00000ac0, 0x00000000 }, + { 0x00000001, 0x21010231, 0x00000ac2, 0x00000000 }, + { 0x00000001, 0x23500129, 0x00000100, 0x00000000 }, + { 0x00000001, 0x235a0169, 0x00000000, 0x00000000 }, + { 0x01000010, 0x20002528, 0x00000ac0, 0x00000ae0 }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000020 }, + { 0x01000010, 0x20002528, 0x00000ac2, 0x00000ae2 }, + { 0x00010001, 0x235a0169, 0x00000000, 0x04000400 }, + { 0x01000005, 0x20002e28, 0x000000a4, 0x00010001 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000040 }, + { 0x0a800031, 0x2b601ca1, 0x00000b40, 0x02180200 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000240 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0a800031, 0x2b601ca1, 0x00000b40, 0x02280300 }, + { 0x05000010, 0x2000252c, 0x00000b70, 0x00000b88 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x000001f0 }, + { 0x00000005, 0x234c0c21, 0x00000b80, 0x1f00ffff }, + { 0x00000040, 0x234c0c21, 0x0000034c, 0x000e0000 }, + { 0x00000005, 0x21002d29, 0x00000b80, 0x00030003 }, + { 0x00000001, 0x23440061, 0x00000000, 0x00000020 }, + { 0x01000010, 0x20002d28, 0x00000100, 0x00030003 }, + { 0x00110040, 0x234c0c21, 0x0000034c, 0x00400000 }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000050 }, + { 0x02000005, 0x20002d20, 0x00000b84, 0xff00ff00 }, + { 0x00010001, 0x23440061, 0x00000000, 0x00000080 }, + { 0x00010040, 0x234c0c21, 0x0000034c, 0x00600000 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x000000c0 }, + { 0x00000040, 0x234c0c21, 0x0000034c, 0x00400000 }, + { 0x00000005, 0x21002d29, 0x00000b80, 0x00030003 }, + { 0x01000010, 0x20002d28, 0x00000100, 0x00000000 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000080 }, + { 0x00000040, 0x2b480c21, 0x00000b48, 0x00000003 }, + { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02480400 }, + { 0x00200001, 0x2ba80021, 0x00450bc0, 0x00000000 }, + { 0x00200001, 0x2bb00021, 0x00450be0, 0x00000000 }, + { 0x00200001, 0x2bb80021, 0x00450c00, 0x00000000 }, + { 0x00600001, 0x28000021, 0x008d0b40, 0x00000000 }, + { 0x00600001, 0x28200021, 0x008d0ba0, 0x00000000 }, + { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0200 }, + { 0x00000001, 0x23600021, 0x000000b4, 0x00000000 }, + { 0x00000001, 0x23640021, 0x000000b8, 0x00000000 }, + { 0x00000001, 0x235c0061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x235c0231, 0x00000b85, 0x00000000 }, + { 0x00000001, 0x235d0231, 0x00000b86, 0x00000000 }, + { 0x00000040, 0x21040c21, 0x00000b08, 0x00000003 }, + { 0x00000041, 0x23480c21, 0x00000104, 0x00000010 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000000b0 }, + { 0x00200001, 0x23440061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x235c0021, 0x00000b64, 0x00000000 }, + { 0x00000001, 0x23600021, 0x00000b68, 0x00000000 }, + { 0x00000001, 0x23640061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23640231, 0x00000b6c, 0x00000000 }, + { 0x00000005, 0x234c0c21, 0x00000b60, 0x0000c0ff }, + { 0x00000040, 0x234c0c21, 0x0000034c, 0x000e2000 }, + { 0x00000001, 0x21000061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x21010231, 0x00000b62, 0x00000000 }, + { 0x00000005, 0x21002d29, 0x00000100, 0x1f001f00 }, + { 0x00000040, 0x234c0421, 0x0000034c, 0x00000100 }, + { 0x00600001, 0x28000021, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x28200021, 0x008d0340, 0x00000000 }, + { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0202 }, + { 0x00000040, 0x28080c21, 0x00000808, 0x00000002 }, + { 0x00600001, 0x28200021, 0x008d0360, 0x00000000 }, + { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0002 }, + { 0x00000040, 0x2ac42d29, 0x00000ac4, 0x00010001 }, + { 0x01000010, 0x20002528, 0x00000ac4, 0x00000ae4 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000090 }, + { 0x00000040, 0x21e82c21, 0x000001e8, 0x00030003 }, + { 0x00000040, 0x2b080421, 0x00000b08, 0x00000ae8 }, + { 0x00000040, 0x2ac02d29, 0x00000ac0, 0x00010001 }, + { 0x01000010, 0x20002528, 0x00000ac0, 0x000000aa }, + { 0x00010040, 0x2ac22d29, 0x00000ac2, 0x00010001 }, + { 0x00010001, 0x2ac00169, 0x00000000, 0x00000000 }, + { 0x00000020, 0x34001c00, 0x00001400, 0xfffffb30 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0a800031, 0x2b601ca1, 0x00000800, 0x0219e002 }, + { 0x00600001, 0x2e000021, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x24001ca8, 0x00000e00, 0x82000010 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/src/shaders/utils/mfc_batchbuffer_hsw.inc b/src/shaders/utils/mfc_batchbuffer_hsw.inc new file mode 100644 index 0000000..588006e --- /dev/null +++ b/src/shaders/utils/mfc_batchbuffer_hsw.inc @@ -0,0 +1,195 @@ +/* + * Copyright © 2010-2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Zhao Yakui <yakui.zhao@intel.com> + */ + +/* GRF registers + * r0 header + * r1~r4 constant buffer (reserved) + * r5 inline data + * r6~r7 reserved + * r8~r15 temporary registers + * r16 write back of Oword Block Write + */ + +/* + * GRF 0 -- header + */ +define(`thread_id_ub', `r0.20<0,1,0>:UB') /* thread id in payload */ + +define(`inline_reg0', `r5') +define(`buffer_offset', `inline_reg0.0') /* :ud, in units of Owords */ +/* :ub, + * bit0 indicates the frame type. 1 is the I-frame. 0 is P-B frame + */ +define(`mb_flag', `inline_reg0.4') +define(`qp_flag', `inline_reg0.6') /* :ub */ + +define(`mb_x', `inline_reg0.8') /* :ub, */ +define(`mb_y', `inline_reg0.9') /* :ub, */ +define(`mb_xy', `inline_reg0.8') /* :uw, */ +/* :uw, the picture width in macroblocks */ +define(`width_in_mbs', `inline_reg0.10') +/* :w, the number of macroblock commands being processed by the kernel */ +define(`total_mbs', `inline_reg0.12') +/* ub, the mb x/y of the last mb in slice */ +define(`slice_end_x', `inline_reg0.16') +define(`slice_end_y', `inline_reg0.17') + +/* :ud the forward reference picture list */ +define(`fwd_ref', `inline_reg0.20') +/* :ud the backward reference picture list */ +define(`bwd_ref', `inline_reg0.24') + +/* + * GRF 8~15 -- temporary registers + */ +define(`tmp_reg0', `r8') +define(`tmp_reg1', `r9') +define(`tmp_reg2', `r10') +define(`tmp_reg3', `r11') +define(`tmp_reg4', `r12') +define(`tmp_reg5', `r13') +define(`tmp_reg6', `r14') +define(`tmp_reg7', `r15') + +define(`obw_m0', `tmp_reg7') + +define(`obw_wb', `null<1>:W') +define(`obw_wb_length', `0') + +/* + * GRF 26~27 + */ +define(`pak_object_reg0', `r26') +define(`pak_object0_ud', `r26.0') +define(`pak_object1_ud', `r26.4') +define(`pak_object2_ud', `r26.8') +define(`pak_object3_ud', `r26.12') +define(`pak_object4_ud', `r26.16') +define(`pak_object5_ud', `r26.20') +define(`pak_object6_ud', `r26.24') +define(`pak_object7_ud', `r26.28') + +define(`pak_object_reg1', `r27') +define(`pak_object8_ud', `r27.0') +define(`pak_object9_ud', `r27.4') +define(`pak_object10_ud', `r27.8') +define(`pak_object11_ud', `r27.12') + +/* + * Message Payload registers + */ +define(`msg_ind', `64') +define(`msg_reg0', `g64') +define(`msg_reg1', `g65') +define(`msg_reg2', `g66') +define(`msg_reg3', `g67') +define(`msg_reg4', `g68') +define(`msg_reg5', `g69') +define(`msg_reg6', `g70') +define(`msg_reg7', `g71') +define(`msg_reg8', `g72') + +define(`MV_BIND_IDX', `0') +define(`MFC_BIND_IDX', `2') + +define(`ts_msg_ind', `112') +define(`ts_msg_reg0', `r112') + + +define(`MFC_AVC_PAK_OBJECT_DW0', `0x7149000a') +define(`MFC_AVC_PAK_OBJECT_DW4', `0xFFFF0000') /* CBP for Y */ +define(`MFC_AVC_PAK_OBJECT_DW5', `0x000F000F') +define(`MFC_AVC_PAK_OBJECT_DW10', `0x0000000') + +define(`OBR_MESSAGE_TYPE', `0') +define(`OBR_CACHE_TYPE', `10') + +define(`OBR_MESSAGE_FENCE', `7') +define(`OBR_MF_NOCOMMIT', `0') +define(`OBR_MF_COMMIT', `0x20') + +define(`OBR_CONTROL_0', `0') /* 1 OWord, low 128 bits */ +define(`OBR_CONTROL_1', `1') /* 1 OWord, high 128 bits */ +define(`OBR_CONTROL_2', `2') /* 2 OWords */ +define(`OBR_CONTROL_4', `3') /* 4 OWords */ +define(`OBR_CONTROL_8', `4') /* 8 OWords */ + +define(`OBR_HEADER_PRESENT', `1') +define(`OBR_WRITE_COMMIT_CATEGORY', `0') /* category on Ivybridge */ + +define(`OBW_WRITE_COMMIT_CATEGORY', `0') /* category on Ivybridge */ + +define(`OBW_CACHE_TYPE', `10') + + +define(`OBW_MESSAGE_TYPE', `8') + +define(`OBW_CONTROL_0', `0') /* 1 OWord, low 128 bits */ +define(`OBW_CONTROL_1', `1') /* 1 OWord, high 128 bits */ +define(`OBW_CONTROL_2', `2') /* 2 OWords */ +define(`OBW_CONTROL_4', `3') /* 4 OWords */ +define(`OBW_CONTROL_8', `4') /* 8 OWords */ +define(`OBW_HEADER_PRESENT', `1') + +define(`INTER_MASK', `0x03') +define(`INTER_16X16MODE', `0x0') +define(`INTER_16X8MODE', `0x01') +define(`INTER_8X16MODE', `0x02') +define(`INTER_8X8MODE', `0x03') +define(`SUBSHAPE_MASK', `0xFF00') + +define(`mb_ind', `90') +define(`mb_msg0', `r90') +define(`mb_wb', `r91') +define(`mb_intra_wb', `r91') +define(`mb_inter_wb', `r92') +define(`mb_mv0', `r93') +define(`mb_mv1', `r94') +define(`mb_mv2', `r95') +define(`mb_mv3', `r96') + +define(`mb_temp', `r86') +define(`cur_mb_x', `mb_temp.0') /* :uw, */ +define(`cur_mb_y', `mb_temp.2') /* :uw, */ +define(`cur_loop_count', `mb_temp.4') /* :uw, */ +define(`mb_end', `r87') +define(`end_mb_x', `mb_end.0') /* :uw, */ +define(`end_mb_y', `mb_end.2') /* :uw, */ +define(`end_loop_count', `mb_end.4') /* :uw, */ +/* :ud the length of VME predict result for every mb. Units in owords */ +define(`vme_len', `mb_end.8') +define(`mb_cur_msg', `r88') + +define(`INTRA_SLICE', `0x0001') +define(`MFC_AVC_PAK_LAST_MB', `0x0400') + +define(`MFC_AVC_INTER_MASK_DW3', `0x1F00FFFF') +define(`MFC_AVC_INTRA_MASK_DW3', `0x0000C0FF') +define(`INTER_MV8', `0x00400000') +define(`INTER_MV32', `0x00600000') +define(`MFC_AVC_PAK_CBP', `0x000E0000') +define(`MFC_AVC_INTRA_FLAG', `0x00002000') +define(`AVC_INTRA_MASK', `0x1F00') diff --git a/src/shaders/vme/Makefile.am b/src/shaders/vme/Makefile.am index e3c401d..d89b689 100644 --- a/src/shaders/vme/Makefile.am +++ b/src/shaders/vme/Makefile.am @@ -1,32 +1,41 @@ VME_CORE = batchbuffer.asm intra_frame.asm inter_frame.asm -VME7_CORE = batchbuffer.asm intra_frame_ivb.asm inter_frame_ivb.asm inter_bframe_ivb..asm -VME75_CORE = batchbuffer.asm intra_frame_haswell.asm inter_frame_haswell.asm inter_bframe_haswell.asm +VME7_CORE = batchbuffer.asm intra_frame_ivb.asm inter_frame_ivb.asm inter_bframe_ivb.asm mpeg2_inter_ivb.asm +VME75_CORE = batchbuffer.asm intra_frame_haswell.asm inter_frame_haswell.asm inter_bframe_haswell.asm mpeg2_inter_haswell.asm +VME8_CORE = intra_frame_gen8.asm inter_frame_gen8.asm inter_bframe_gen8.asm mpeg2_inter_gen8.asm INTEL_G6B = batchbuffer.g6b intra_frame.g6b inter_frame.g6b INTEL_G6A = batchbuffer.g6a intra_frame.g6a inter_frame.g6a INTEL_GEN6_INC = batchbuffer.inc vme.inc INTEL_GEN6_ASM = $(INTEL_G6A:%.g6a=%.gen6.asm) -INTEL_G7B = batchbuffer.g7b intra_frame.g7b inter_frame.g7b mpeg2_inter_frame.g7b intra_frame_ivb.g7b inter_frame_ivb.g7b inter_bframe_ivb.g7b -INTEL_G7A = batchbuffer.g7a intra_frame.g7a inter_frame.g7a mpeg2_inter_frame.g7a intra_frame_ivb.g7a inter_frame_ivb.g7a inter_bframe_ivb.g7a +INTEL_G7B = batchbuffer.g7b intra_frame.g7b inter_frame.g7b intra_frame_ivb.g7b inter_frame_ivb.g7b inter_bframe_ivb.g7b mpeg2_inter_ivb.g7b +INTEL_G7A = batchbuffer.g7a intra_frame.g7a inter_frame.g7a intra_frame_ivb.g7a inter_frame_ivb.g7a inter_bframe_ivb.g7a mpeg2_inter_ivb.g7a INTEL_GEN7_INC = batchbuffer.inc vme.inc vme7_mpeg2.inc vme7.inc INTEL_GEN7_ASM = $(INTEL_G7A:%.g7a=%.gen7.asm) -INTEL_G75B = batchbuffer.g75b intra_frame_haswell.g75b inter_frame_haswell.g75b mpeg2_inter_frame_haswell.g75b inter_bframe_haswell.g75b -INTEL_G75A = batchbuffer.g75a intra_frame_haswell.g75a inter_frame_haswell.g75a mpeg2_inter_frame_haswell.g75a inter_bframe_haswell.g75a +INTEL_G75B = batchbuffer.g75b intra_frame_haswell.g75b inter_frame_haswell.g75b inter_bframe_haswell.g75b mpeg2_inter_haswell.g75b +INTEL_G75A = batchbuffer.g75a intra_frame_haswell.g75a inter_frame_haswell.g75a inter_bframe_haswell.g75a mpeg2_inter_haswell.g75a INTEL_GEN75_INC = batchbuffer.inc vme75.inc vme75_mpeg2.inc INTEL_GEN75_ASM = $(INTEL_G75A:%.g75a=%.gen75.asm) + +INTEL_G8B = intra_frame_gen8.g8b inter_frame_gen8.g8b inter_bframe_gen8.g8b mpeg2_inter_gen8.g8b +INTEL_G8A = intra_frame_gen8.g8a inter_frame_gen8.g8a inter_bframe_gen8.g8a mpeg2_inter_gen8.g8a +INTEL_GEN8_INC = vme8.inc vme75_mpeg2.inc +INTEL_GEN8_ASM = $(INTEL_G8A:%.g8a=%.gen8.asm) + + TARGETS = if HAVE_GEN4ASM TARGETS += $(INTEL_G6B) TARGETS += $(INTEL_G7B) TARGETS += $(INTEL_G75B) +TARGETS += $(INTEL_G8B) endif all-local: $(TARGETS) -SUFFIXES = .g6a .g6b .g7a .g7b .gen6.asm .gen7.asm .g75a .g75b .gen75.asm +SUFFIXES = .g6a .g6b .g7a .g7b .gen6.asm .gen7.asm .g75a .g75b .gen75.asm .g8a .g8b .gen8.asm if HAVE_GEN4ASM $(INTEL_GEN6_ASM): $(VME_CORE) $(INTEL_GEN6_INC) @@ -37,7 +46,7 @@ $(INTEL_GEN6_ASM): $(VME_CORE) $(INTEL_GEN6_INC) .gen6.asm.g6b: $(AM_V_GEN)$(GEN4ASM) -g 6 -o $@ $< -$(INTEL_GEN7_ASM): $(VME_CORE) $(INTEL_GEN7_INC) +$(INTEL_GEN7_ASM): $(VME7_CORE) $(INTEL_GEN7_INC) .g7a.gen7.asm: $(AM_V_GEN)cpp -P -DDEV_IVB $< > _vme0.$@ && \ m4 _vme0.$@ > $@ && \ @@ -53,9 +62,18 @@ $(INTEL_GEN75_ASM): $(VME75_CORE) $(INTEL_GEN75_INC) rm _vme0.$@ .gen75.asm.g75b: $(AM_V_GEN)$(GEN4ASM) -g 7.5 -o $@ $< + +$(INTEL_GEN8_ASM): $(VME8_CORE) $(INTEL_GEN8_INC) +.g8a.gen8.asm: + $(AM_V_GEN)cpp -P $< > _vme0.$@ && \ + m4 _vme0.$@ > $@ && \ + rm _vme0.$@ +.gen8.asm.g8b: + $(AM_V_GEN)$(GEN4ASM) -g 8 -o $@ $< + endif -CLEANFILES = $(INTEL_GEN6_ASM) $(INTEL_GEN7_ASM) $(INTEL_GEN75_ASM) +CLEANFILES = $(INTEL_GEN6_ASM) $(INTEL_GEN7_ASM) $(INTEL_GEN75_ASM) $(INTEL_GEN8_ASM) EXTRA_DIST = \ $(INTEL_G6A) \ @@ -64,13 +82,15 @@ EXTRA_DIST = \ $(INTEL_G75B) \ $(INTEL_G7A) \ $(INTEL_G7B) \ - $(INTEL_GEN6_ASM) \ + $(INTEL_G8A) \ + $(INTEL_G8B) \ $(INTEL_GEN6_INC) \ - $(INTEL_GEN75_ASM) \ $(INTEL_GEN75_INC) \ - $(INTEL_GEN7_ASM) \ $(INTEL_GEN7_INC) \ + $(INTEL_GEN8_INC) \ $(VME75_CORE) \ + $(VME7_CORE) \ + $(VME8_CORE) \ $(VME_CORE) \ $(NULL) diff --git a/src/shaders/vme/inter_bframe_gen8.asm b/src/shaders/vme/inter_bframe_gen8.asm new file mode 100644 index 0000000..240dc61 --- /dev/null +++ b/src/shaders/vme/inter_bframe_gen8.asm @@ -0,0 +1,875 @@ +/* + * Copyright © <2010>, Intel Corporation. + * + * This program is licensed under the terms and conditions of the + * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at + * http://www.opensource.org/licenses/eclipse-1.0.php. + * Authors: Zhao Yakui <yakui.zhao@intel.com> + */ +// Modual name: Inter_bframe_haswell.asm +// +// Make inter predition estimation for Inter frame for B-frame +// + +// +// Now, begin source code.... +// + +#define SAVE_RET add (1) RETURN_REG<1>:ud ip:ud 32:ud +#define RETURN mov (1) ip:ud RETURN_REG<0,1,0>:ud + +/* + * __START + */ +__INTER_START: +mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1}; +mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1}; +mov (16) tmp_reg4.0<1>:UD 0x0:UD {align1} ; +mov (16) tmp_reg6.0<1>:UD 0x0:UD {align1} ; + +shl (2) read0_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ +add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D -8:W {align1}; /* X offset */ +add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D -1:W {align1}; /* Y offset */ +mov (1) read0_header.8<1>:UD BLOCK_32X1 {align1}; +mov (1) read0_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +shl (2) read1_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ +add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D -4:W {align1}; /* X offset */ +mov (1) read1_header.8<1>:UD BLOCK_4X16 {align1}; +mov (1) read1_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ +mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +mul (1) obw_m0.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1}; +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1}; +mul (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 24:UD {align1}; +mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* + * Media Read Message -- fetch Luma neighbor edge pixels + */ +/* ROW */ +mov (8) msg_reg0.0<1>:UD read0_header.0<8,8,1>:UD {align1}; +send (8) msg_ind INEP_ROW<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1}; + +/* COL */ +mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1}; +send (8) msg_ind INEP_COL0<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1}; + +/* + * Media Read Message -- fetch Chroma neighbor edge pixels + */ +/* ROW */ +shl (2) read0_header.0<1>:D orig_xy_ub<2,2,1>:UB 3:UW {align1}; /* x * 16 , y * 8 */ +mul (1) read0_header.0<1>:D read0_header.0<0,1,0>:D 2:W {align1}; +add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D -8:W {align1}; /* X offset */ +add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D -1:W {align1}; /* Y offset */ +mov (8) msg_reg0.0<1>:UD read0_header.0<8,8,1>:UD {align1}; +send (8) msg_ind CHROMA_ROW<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1}; + +/* COL */ +shl (2) read1_header.0<1>:D orig_xy_ub<2,2,1>:UB 3:UW {align1}; /* x * 16, y * 8 */ +mul (1) read1_header.0<1>:D read1_header.0<0,1,0>:D 2:W {align1}; +add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D -4:W {align1}; /* X offset */ +mov (1) read1_header.8<1>:UD BLOCK_8X4 {align1}; +mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1}; +send (8) msg_ind CHROMA_COL<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1}; + +mov (8) vme_m1.0<1>:ud 0:ud {align1}; +mov (8) mb_mvp_ref.0<1>:ud 0:ud {align1}; +mov (8) mb_ref_win.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1}; +(f0.0) jmpi (1) __mb_hwdep_end; + +/* read back the data for MB A */ +/* the layout of MB result is: rx.0(Available). rx.4(MVa), rX.8(MVb), rX.16(Pred_L0 flag), +* rX.18 (Pred_L1 flag), rX.20(Forward reference ID), rX.22(Backwared reference ID) +*/ +mov (8) mba_result.0<1>:ud 0x0:ud {align1}; +mov (8) mbb_result.0<1>:ud 0x0:ud {align1}; +mov (8) mbc_result.0<1>:ud 0x0:ud {align1}; +mba_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1}; +/* MB A doesn't exist. Zero MV. mba_flag is zero and ref ID = -1 */ +(f0.0) mov (2) mba_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mbb_start; +mov (1) mba_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w -1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mba_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mbb_start; + +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB A */ +/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_8, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 4 + {align1}; +/* TODO: RefID is required after multi-references are added */ +/* MV */ +mov (2) mba_result.20<1>:w -1:w {align1}; +mov (1) INPUT_ARG0.0<1>:ud mb_inter_wb.4<0,1,0>:ud {align1}; +mov (1) INPUT_ARG0.4<1>:ud mb_inter_wb.0<0,1,0>:ud {align1}; +mov (1) INPUT_ARG0.8<1>:ud INTER_BLOCK1:ud {align1}; +SAVE_RET {align1}; +jmpi (1) mb_pred_func; +mov (1) mb_pred_mode.0<1>:uw RET_ARG<0,1,0>:uw {align1}; +cmp.e.f0.0 (1) null:uw mb_pred_mode.0<0,1,0>:uw PRED_L0 {align1}; +(f0.0) mov (1) mba_result.16<1>:uw MB_PRED_FLAG {align1}; +(f0.0) mov (1) mba_result.20<1>:w 0:w {align1}; +(f0.0) mov (1) mba_result.4<1>:ud mb_mv1.8<0,1,0>:ud {align1}; +(f0.0) jmpi (1) mbb_start; +cmp.e.f0.0 (1) null:uw mb_pred_mode.0<0,1,0>:uw PRED_L1 {align1}; +(f0.0) mov (1) mba_result.18<1>:uw MB_PRED_FLAG {align1}; +(f0.0) mov (1) mba_result.22<1>:w 0:w {align1}; +(f0.0) mov (1) mba_result.8<1>:ud mb_mv1.12<0,1,0>:ud {align1}; +(f0.0) jmpi (1) mbb_start; +mov (2) mba_result.4<1>:ud mb_mv1.8<2,2,1>:ud {align1}; +mov (2) mba_result.16<1>:uw MB_PRED_FLAG {align1}; +mov (2) mba_result.20<1>:w 0:w {align1}; + +mbb_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1}; +/* MB B doesn't exist. Zero MV. mba_flag is zero */ +/* If MB B doesn't exist, neither MB C nor D exists */ +(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; +mov (1) mbb_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mbc_start; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB B */ +/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_8, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 4 + {align1}; +/* TODO: RefID is required after multi-references are added */ +mov (2) mbb_result.20<1>:w -1:w {align1}; +mov (1) INPUT_ARG0.0<1>:ud mb_inter_wb.4<0,1,0>:ud {align1}; +mov (1) INPUT_ARG0.4<1>:ud mb_inter_wb.0<0,1,0>:ud {align1}; +mov (1) INPUT_ARG0.8<1>:ud INTER_BLOCK2:ud {align1}; +SAVE_RET {align1}; +jmpi (1) mb_pred_func; +mov (1) mb_pred_mode.0<1>:uw RET_ARG<0,1,0>:uw {align1}; +cmp.e.f0.0 (1) null:uw mb_pred_mode.0<0,1,0>:uw PRED_L0 {align1}; +(f0.0) mov (1) mbb_result.16<1>:uw MB_PRED_FLAG {align1}; +(f0.0) mov (1) mbb_result.20<1>:w 0:w {align1}; +(f0.0) mov (1) mbb_result.4<1>:ud mb_mv2.16<0,1,0>:ud {align1}; +(f0.0) jmpi (1) mbc_start; +cmp.e.f0.0 (1) null:uw mb_pred_mode.0<0,1,0>:uw PRED_L1 {align1}; +(f0.0) mov (1) mbb_result.18<1>:uw MB_PRED_FLAG {align1}; +(f0.0) mov (1) mbb_result.22<1>:w 0:w {align1}; +(f0.0) mov (1) mbb_result.8<1>:ud mb_mv2.20<0,1,0>:ud {align1}; +(f0.0) jmpi (1) mbc_start; +mov (2) mbb_result.16<1>:uw MB_PRED_FLAG {align1}; +mov (2) mbb_result.20<1>:w 0:w {align1}; +mov (2) mbb_result.4<1>:ud mb_mv2.16<2,2,1>:ud {align1}; + +mbc_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_C:uw {align1}; +/* MB C doesn't exist. Zero MV. mba_flag is zero */ +/* Based on h264 spec the MB D will be replaced if MB C doesn't exist */ +(f0.0) jmpi (1) mbd_start; +mov (1) mbc_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1}; +add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB C */ +/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_8, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 4 + {align1}; +/* TODO: RefID is required after multi-references are added */ +/* Forward MV */ +mov (2) mbc_result.20<1>:w -1:w {align1}; +mov (1) INPUT_ARG0.0<1>:ud mb_inter_wb.4<0,1,0>:ud {align1}; +mov (1) INPUT_ARG0.4<1>:ud mb_inter_wb.0<0,1,0>:ud {align1}; +mov (1) INPUT_ARG0.8<1>:ud INTER_BLOCK2:ud {align1}; +SAVE_RET {align1}; +jmpi (1) mb_pred_func; +mov (1) mb_pred_mode.0<1>:uw RET_ARG<0,1,0>:uw {align1}; +cmp.e.f0.0 (1) null:uw mb_pred_mode.0<0,1,0>:uw PRED_L0 {align1}; +(f0.0) mov (1) mbc_result.16<1>:uw MB_PRED_FLAG {align1}; +(f0.0) mov (1) mbc_result.20<1>:w 0:w {align1}; +(f0.0) mov (1) mbc_result.4<1>:ud mb_mv2.16<0,1,0>:ud {align1}; +(f0.0) jmpi (1) mb_mvp_start; +cmp.e.f0.0 (1) null:uw mb_pred_mode.0<0,1,0>:uw PRED_L1 {align1}; +(f0.0) mov (1) mbc_result.18<1>:uw MB_PRED_FLAG {align1}; +(f0.0) mov (1) mbc_result.22<1>:w 0:w {align1}; +(f0.0) mov (1) mbc_result.8<1>:ud mb_mv2.20<0,1,0>:ud {align1}; +(f0.0) jmpi (1) mb_mvp_start; +mov (2) mbc_result.16<1>:uw MB_PRED_FLAG {align1}; +mov (2) mbc_result.20<1>:w 0:w {align1}; +mov (2) mbc_result.4<1>:ud mb_mv2.16<2,2,1>:ud {align1}; + +jmpi (1) mb_mvp_start; +mbd_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_D:uw {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; +mov (1) mbc_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (2) tmp_reg0.0<1>:w tmp_reg0.0<2,2,1>:w -1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; + +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB D */ +/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ub + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_8, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 4 + {align1}; + +/* TODO: RefID is required after multi-references are added */ + +/* Forward MV */ +mov (2) mbc_result.20<1>:w -1:w {align1}; +mov (1) INPUT_ARG0.0<1>:ud mb_inter_wb.4<0,1,0>:ud {align1}; +mov (1) INPUT_ARG0.4<1>:ud mb_inter_wb.0<0,1,0>:ud {align1}; +mov (1) INPUT_ARG0.8<1>:ud INTER_BLOCK3:ud {align1}; +SAVE_RET {align1}; +jmpi (1) mb_pred_func; +mov (1) mb_pred_mode.0<1>:uw RET_ARG<0,1,0>:uw {align1}; +cmp.e.f0.0 (1) null:uw mb_pred_mode.0<0,1,0>:uw PRED_L0 {align1}; +(f0.0) mov (1) mbc_result.16<1>:uw MB_PRED_FLAG {align1}; +(f0.0) mov (1) mbc_result.20<1>:w 0:w {align1}; +(f0.0) mov (1) mbc_result.4<1>:ud mb_mv3.24<0,1,0>:ud {align1}; +(f0.0) jmpi (1) mb_mvp_start; +cmp.e.f0.0 (1) null:uw mb_pred_mode.0<0,1,0>:uw PRED_L1 {align1}; +(f0.0) mov (1) mbc_result.18<1>:uw MB_PRED_FLAG {align1}; +(f0.0) mov (1) mbc_result.22<1>:w 0:w {align1}; +(f0.0) mov (1) mbc_result.8<1>:ud mb_mv3.28<0,1,0>:ud {align1}; +(f0.0) jmpi (1) mb_mvp_start; +mov (2) mbc_result.16<1>:uw MB_PRED_FLAG {align1}; +mov (2) mbc_result.20<1>:w 0:w {align1}; +mov (2) mbc_result.4<1>:ud mb_mv3.24<2,2,1>:ud {align1}; + +mb_mvp_start: +/*TODO: Add the skip prediction */ +/* Check whether both MB B and C are inavailable */ +add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1}; +cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1}; +(-f0.0) jmpi (1) mb_median_start; +cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 0:d {align1}; +(f0.0) mov (2) mbb_result.4<1>:ud mba_result.4<2,2,1>:ud {align1}; +(f0.0) mov (2) mbc_result.4<1>:ud mba_result.4<2,2,1>:ud {align1}; +(f0.0) mov (2) mbb_result.20<1>:uw mba_result.20<2,2,1>:uw {align1}; +(f0.0) mov (2) mbc_result.20<1>:uw mba_result.20<2,2,1>:uw {align1}; +(f0.0) mov (2) mb_mvp_ref.0<1>:ud mba_result.4<2,2,1>:ud {align1}; +(-f0.0) mov (2) mb_mvp_ref.0<1>:ud 0:ud {align1}; +jmpi (1) __mb_hwdep_end; + +mb_median_start: +/* forward_MVP */ +/* check whether only one neighbour MB has the same ref ID with the current MB */ +mov (8) tmp_reg0.0<1>:ud 0:ud {align1}; +cmp.z.f0.0 (1) null:d mba_result.20<0,1,0>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mba_result.4<0,1,0>:ud {align1}; +cmp.z.f0.0 (1) null:d mbb_result.20<0,1,0>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mbb_result.4<0,1,0>:ud {align1}; +cmp.z.f0.0 (1) null:d mbc_result.20<0,1,0>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mbc_result.4<0,1,0>:ud {align1}; +cmp.e.f0.0 (1) null:d tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) mb_mvp_ref.0<1>:ud tmp_reg0.4<0,1,0>:ud {align1}; +(f0.0) jmpi (1) mvp_backward; + +mov (1) INPUT_ARG0.0<1>:w mba_result.4<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.4<1>:w mbb_result.4<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.8<1>:w mbc_result.4<0,1,0>:w {align1}; +SAVE_RET {align1}; + jmpi (1) word_imedian; +mov (1) mb_mvp_ref.0<1>:w RET_ARG<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.0<1>:w mba_result.6<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.4<1>:w mbb_result.6<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.8<1>:w mbc_result.6<0,1,0>:w {align1}; +SAVE_RET {align1}; +jmpi (1) word_imedian; +mov (1) mb_mvp_ref.2<1>:w RET_ARG<0,1,0>:w {align1}; + + +mvp_backward: +/* check whether only one neighbour MB has the same ref ID with the current MB */ +mov (8) tmp_reg0.0<1>:ud 0:ud {align1}; +cmp.z.f0.0 (1) null:d mba_result.22<0,1,0>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mba_result.8<0,1,0>:ud {align1}; +cmp.z.f0.0 (1) null:d mbb_result.22<0,1,0>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mbb_result.8<0,1,0>:ud {align1}; +cmp.z.f0.0 (1) null:d mbc_result.22<0,1,0>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mbc_result.8<0,1,0>:ud {align1}; +cmp.e.f0.0 (1) null:d tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) mb_mvp_ref.4<1>:ud tmp_reg0.4<0,1,0>:ud {align1}; +(f0.0) jmpi (1) __mb_hwdep_end; + +mov (1) INPUT_ARG0.0<1>:w mba_result.8<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.4<1>:w mbb_result.8<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.8<1>:w mbc_result.8<0,1,0>:w {align1}; +SAVE_RET {align1}; + jmpi (1) word_imedian; +mov (1) mb_mvp_ref.4<1>:w RET_ARG<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.0<1>:w mba_result.10<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.4<1>:w mbb_result.10<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.8<1>:w mbc_result.10<0,1,0>:w {align1}; +SAVE_RET {align1}; +jmpi (1) word_imedian; +mov (1) mb_mvp_ref.6<1>:w RET_ARG<0,1,0>:w {align1}; + +__mb_hwdep_end: +asr (4) mb_ref_win.0<1>:w mb_mvp_ref.0<4,4,1>:w 2:w {align1}; +add (4) mb_ref_win.8<1>:w mb_ref_win.0<4,4,1>:w 3:w {align1}; +and (4) mb_ref_win.16<1>:uw mb_ref_win.8<4,4,1>:uw 0xFFFC:uw {align1}; +/* m2, get the MV/Mb cost passed from constant buffer when +spawning thread by MEDIA_OBJECT */ +mov (8) vme_m2<1>:UD r1.0<8,8,1>:UD {align1}; + +mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1}; +/* m3 cost center */ +mov (8) vme_m3.0<1>:ud 0x0:ud {align1}; +mov (8) vme_msg_3<1>:UD vme_m3.0<8,8,1>:UD {align1}; + +/* m4. skip center */ +mov (8) vme_msg_4<1>:ud 0x0:ud {align1}; + +/* m5 */ +mov (1) INEP_ROW.0<1>:UD 0x0:UD {align1}; +and (1) INEP_ROW.4<1>:UD INEP_ROW.4<0,1,0>:UD 0xFF000000:UD {align1}; +mov (8) vme_msg_5<1>:UD INEP_ROW.0<8,8,1>:UD {align1}; +/* Use the Luma mode */ +mov (1) tmp_reg0.0<1>:UW LUMA_INTRA_MODE:UW {align1}; +mov (1) vme_msg_5.5<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; + +/* m6 */ +mov (8) vme_msg_6<1>:UD 0x0:UD {align1}; +mov (16) vme_msg_6.0<1>:UB INEP_COL0.3<32,8,4>:UB {align1}; +mov (1) vme_msg_6.16<1>:UD INTRA_PREDICTORE_MODE {align1}; + +/* the penalty for Intra mode */ +mov (1) vme_msg_6.28<1>:UD 0x010101:UD {align1}; +mov (1) vme_msg_6.20<1>:UW CHROMA_ROW.6<0,1,0>:UW {align1}; + + +/* m7 */ + +mov (4) vme_msg_7.16<1>:UD CHROMA_ROW.8<4,4,1>:UD {align1}; +mov (8) vme_msg_7.0<1>:UW CHROMA_COL.2<16,8,2>:UW {align1}; + +/* + * SIC VME message + */ +/* m1 */ +mov (1) intra_flag<1>:UW 0x0:UW {align1} ; +and.z.f0.0 (1) null<1>:UW transform_8x8_ub<0,1,0>:UB 1:UW {align1}; +(f0.0) mov (1) intra_part_mask_ub<1>:UB LUMA_INTRA_8x8_DISABLE {align1}; + +/* assign MB intra struct from the thread payload*/ +mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1}; + +/* Disable DC HAAR component when calculating HARR SATD block */ +mov (1) tmp_reg0.0<1>:UW DC_HARR_DISABLE:UW {align1}; +mov (1) vme_m1.30<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; +mov (8) vme_msg_1<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +/* m0 */ +mov (1) vme_m0.12<1>:UD INTRA_SAD_HAAR:UD {align1}; /* 16x16 Source, Intra_harr */ +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; + +/* after verification it will be passed by using payload */ +send (8) + vme_msg_ind + vme_wb<1>:UD + null + cre( + BIND_IDX_VME, + VME_SIC_MESSAGE_TYPE + ) + mlen sic_vme_msg_length + rlen vme_wb_length + {align1}; +/* + * Oword Block Write message + */ +mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; + +mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1}; +mov (1) msg_reg1.4<1>:UD vme_wb.16<0,1,0>:UD {align1}; +mov (1) msg_reg1.8<1>:UD vme_wb.20<0,1,0>:UD {align1}; +mov (1) msg_reg1.12<1>:UD vme_wb.24<0,1,0>:UD {align1}; + +/* Distortion, Intra (17-16), */ +mov (1) msg_reg1.16<1>:UW vme_wb.12<0,1,0>:UW {align1}; + +mov (1) msg_reg1.20<1>:UD vme_wb.8<0,1,0>:UD {align1}; +/* VME clock counts */ +mov (1) msg_reg1.24<1>:UD vme_wb.28<0,1,0>:UD {align1}; + +mov (1) msg_reg1.28<1>:UD obw_m0.8<0,1,0>:UD {align1}; + +/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +/* IME search */ +mov (1) vme_m0.12<1>:UD SEARCH_CTRL_DUAL_REFERENCE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */ +mov (1) vme_m0.22<1>:UW DREF_REGION_SIZE {align1}; +/* Dual Reference Width&Height,32x32 */ + +mov (1) vme_m0.0<1>:UD vme_m0.8<0,1,0>:UD {align1}; + +/* Reference = (x-8,y-8)-(x+8,y+8) */ +add (1) vme_m0.0<1>:W vme_m0.0<0,1,0>:W -8:W {align1}; +add (1) vme_m0.2<1>:W vme_m0.2<0,1,0>:W -8:W {align1}; + +mov (1) vme_m0.0<1>:W -8:W {align1}; +mov (1) vme_m0.2<1>:W -8:W {align1}; + +mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1}; +(f0.0) add (1) vme_m0.0<1>:w vme_m0.0<0,1,0>:w 4:w {align1}; +(f0.0) add (1) vme_m0.4<1>:w vme_m0.4<0,1,0>:w 4:w {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1}; +(f0.0) add (1) vme_m0.2<1>:w vme_m0.2<0,1,0>:w 4:w {align1}; +(f0.0) add (1) vme_m0.6<1>:w vme_m0.6<0,1,0>:w 4:w {align1}; + +add (2) vme_m0.0<1>:w vme_m0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1}; +add (2) vme_m0.4<1>:w vme_m0.4<2,2,1>:w mb_ref_win.20<2,2,1>:w {align1}; + +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; + +mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ; +/* the Max MV number is passed by constant buffer */ +mov (1) vme_m1.4<1>:UB r4.28<0,1,0>:UB {align1}; +mov (1) vme_m1.8<1>:UD DSTART_CENTER + DSEARCH_PATH_LEN:UD {align1}; +mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1}; + + +/* Setup the Cost center */ +/* currently four 8x8 share the same cost center */ +mov (4) vme_m3.0<2>:ud mb_mvp_ref.0<0,1,0>:ud {align1}; +mov (4) vme_m3.4<2>:ud mb_mvp_ref.4<0,1,0>:ud {align1}; + +/* M4/M5 search path */ + +mov (1) vme_msg_4.0<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_4.4<1>:UD 0x100F0F0F:UD {align1}; +mov (1) vme_msg_4.8<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_4.12<1>:UD 0x000F0F0F:UD {align1}; + +mov (4) vme_msg_4.16<1>:UD 0x0:UD {align1}; +mov (8) vme_msg_5.16<1>:UD 0x0:UD {align1}; + +send (8) + vme_msg_ind + vme_wb<1>:UD + null + vme( + BIND_IDX_VME, + 0, + 0, + VME_IME_MESSAGE_TYPE + ) + mlen ime_vme_msg_length + rlen vme_wb_length {align1}; + +/* Set Macroblock-shape/mode for FBR */ + +mov (1) vme_m2.20<1>:UD 0x0:UD {align1}; +mov (1) vme_m2.21<1>:UB vme_wb.25<0,1,0>:UB {align1}; +mov (1) vme_m2.22<1>:UB vme_wb.26<0,1,0>:UB {align1}; + +and (1) tmp_reg0.0<1>:UW vme_wb.0<0,1,0>:UW 0x03:UW {align1}; +mov (1) vme_m2.20<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; + +/* Send FBR message into CRE */ + +mov (8) vme_msg_4.0<1>:UD vme_wb1.0<8,8,1>:UD {align1}; +mov (8) vme_msg_5.0<1>:ud vme_wb2.0<8,8,1>:ud {align1}; +mov (8) vme_msg_6.0<1>:ud vme_wb3.0<8,8,1>:ud {align1}; +mov (8) vme_msg_7.0<1>:ud vme_wb4.0<8,8,1>:ud {align1}; + + /* 16x16 Source, 1/4 pixel, harr, BME ENABLE */ +mov (1) vme_m0.12<1>:UD INTER_SAD_HAAR + SUB_PEL_MODE_QUARTER + FBR_BME_ENABLE:UD {align1}; + +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; + +mov (1) tmp_reg0.0<1>:uw BI_WEIGHT {align1}; +mov (1) vme_m1.6<1>:UB tmp_reg0.0<0,1,0>:ub {align1}; +mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +mov (8) vme_msg_2.0<1>:UD vme_m2.0<8,8,1>:UD {align1}; +mov (8) vme_msg_3.0<1>:UD vme_m3.0<8,8,1>:UD {align1}; + +/* after verification it will be passed by using payload */ +send (8) + vme_msg_ind + vme_wb<1>:UD + null + cre( + BIND_IDX_VME, + VME_FBR_MESSAGE_TYPE + ) + mlen fbr_vme_msg_length + rlen vme_wb_length + {align1}; + +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x02:UD {align1}; +mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; +/* write FME info */ +mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1}; + +mov (1) msg_reg1.4<1>:UD vme_wb.24<0,1,0>:UD {align1}; +/* Inter distortion of FME */ +mov (1) msg_reg1.8<1>:UD vme_wb.8<0,1,0>:UD {align1}; + +mov (1) msg_reg1.12<1>:UD vme_m2.20<0,1,0>:UD {align1}; + +/* bind index 3, write oword (16bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_0, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +/* Write FME/BME MV */ +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x01:UD {align1}; +mov (8) msg_reg0.0<1>:UD obw_m0.0<8,8,1>:UD {align1}; + + +mov (8) msg_reg1.0<1>:UD vme_wb1.0<8,8,1>:UD {align1}; +mov (8) msg_reg2.0<1>:ud vme_wb2.0<8,8,1>:ud {align1}; +mov (8) msg_reg3.0<1>:ud vme_wb3.0<8,8,1>:ud {align1}; +mov (8) msg_reg4.0<1>:ud vme_wb4.0<8,8,1>:ud {align1}; +/* bind index 3, write 8 oword (128 bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_8, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 5 + rlen obw_wb_length + {align1}; + +/* Write FME/BME RefID */ +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x08:UD {align1}; +mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; + +mov (8) msg_reg1.0<1>:UD vme_wb6.0<8,8,1>:UD {align1}; + +/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + + +/* Issue message fence so that the previous write message is committed */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_FENCE, + OBR_MF_COMMIT, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; + +__EXIT: +/* + * kill thread + */ +mov (8) ts_msg_reg0<1>:UD r0<8,8,1>:UD {align1}; +send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT}; + + + nop ; + nop ; +/* Compare three word data to get the min value */ +word_imin: + cmp.le.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1}; + (f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.0<0,1,0>:w {align1}; + (-f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.4<0,1,0>:w {align1}; + cmp.le.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w TEMP_VAR0.0<0,1,0>:w {align1}; + (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1}; + RETURN {align1}; + +/* Compare three word data to get the max value */ +word_imax: + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1}; + (f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.0<0,1,0>:w {align1}; + (-f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.4<0,1,0>:w {align1}; + cmp.ge.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w TEMP_VAR0.0<0,1,0>:w {align1}; + (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1}; + RETURN {align1}; + +word_imedian: + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1}; + (f0.0) jmpi (1) cmp_a_ge_b; + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1}; + (f0.0) jmpi (1) cmp_end; + cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1}; + jmpi (1) cmp_end; +cmp_a_ge_b: + cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1}; + (f0.0) jmpi (1) cmp_end; + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1}; +cmp_end: + RETURN {align1}; + +mb_pred_func: + mov (8) TEMP_VAR0.0<1>:ud 0:ud {align1}; + mov (1) TEMP_VAR0.0<1>:ub INPUT_ARG0.2<0,1,0>:ub {align1}; + and (1) TEMP_VAR0.4<1>:uw INPUT_ARG0.4<0,1,0>:uw INTER_MASK:uw {align1}; + /* INTER16x16 mode. The bit1-0 is the prediction mode */ + cmp.e.f0.0 (1) null:uw TEMP_VAR0.4<0,1,0>:uw INTER_16X16MODE:uw {align1}; + (f0.0) and (1) RET_ARG<1>:uw TEMP_VAR0.0<0,1,0>:uw PRED_MASK {align1}; + (f0.0) jmpi (1) end_mb_pred; + /* Check whether it is INTER8x8 mode. */ + cmp.e.f0.0 (1) null:uw TEMP_VAR0.4<0,1,0>:uw INTER_8X8MODE:uw {align1}; + (f0.0) jmpi (1) mb_pred_func_8; + + /* Check whether it is INTER16x8 mode. */ + cmp.e.f0.0 (1) null:uw TEMP_VAR0.4<0,1,0>:uw INTER_16X8MODE:uw {align1}; + (f0.0) jmpi (1) mb_pred_func_168; +mb_pred_func_816: + /* Block 0/2 uses the bit1-0. Block 1/3 uses the bit3-2 */ + mov (1) TEMP_VAR0.8<1>:uw INPUT_ARG0.8<0,1,0>:uw {align1}; + and.z.f0.0 (1) null:uw TEMP_VAR0.8<0,1,0>:uw INTER_BLOCK1:uw {align1}; + (f0.0) and (1) RET_ARG<1>:uw TEMP_VAR0.0<0,1,0>:uw PRED_MASK {align1}; + (f0.0) jmpi (1) end_mb_pred; + shr (1) TEMP_VAR0.16<1>:uw TEMP_VAR0.0<0,1,0>:uw 2:uw {align1}; + and (1) RET_ARG<1>:uw TEMP_VAR0.16<0,1,0>:uw PRED_MASK {align1}; + jmpi (1) end_mb_pred; + +mb_pred_func_168: + /* Block 0/1 uses the bit1-0. Block 2/3 uses the bit3-2 */ + mov (1) TEMP_VAR0.8<1>:uw INPUT_ARG0.8<0,1,0>:uw {align1}; + cmp.l.f0.0 (1) null:uw TEMP_VAR0.8<0,1,0>:uw INTER_BLOCK2:uw {align1}; + (f0.0) and (1) RET_ARG<1>:uw TEMP_VAR0.0<0,1,0>:uw PRED_MASK {align1}; + (f0.0) jmpi (1) end_mb_pred; + shr (1) TEMP_VAR0.16<1>:uw TEMP_VAR0.0<0,1,0>:uw 2:uw {align1}; + and (1) RET_ARG<1>:uw TEMP_VAR0.16<0,1,0>:uw PRED_MASK {align1}; + jmpi (1) end_mb_pred; + +mb_pred_func_8: + /* 8X8 mode. Every block uses two bits as the prediction mode. */ + mul (1) TEMP_VAR0.8<1>:uw INPUT_ARG0.8<0,1,0>:uw 2:uw {align1}; + shr (1) TEMP_VAR0.16<1>:uw TEMP_VAR0.0<0,1,0>:uw TEMP_VAR0.8<0,1,0>:uw {align1}; + and (1) RET_ARG<1>:uw TEMP_VAR0.16<0,1,0>:uw PRED_MASK {align1}; +end_mb_pred: + RETURN {align1}; + diff --git a/src/shaders/vme/inter_bframe_gen8.g8a b/src/shaders/vme/inter_bframe_gen8.g8a new file mode 100644 index 0000000..8aff32e --- /dev/null +++ b/src/shaders/vme/inter_bframe_gen8.g8a @@ -0,0 +1,2 @@ +#include "vme8.inc" +#include "inter_bframe_gen8.asm" diff --git a/src/shaders/vme/inter_bframe_gen8.g8b b/src/shaders/vme/inter_bframe_gen8.g8b new file mode 100644 index 0000000..77daf5a --- /dev/null +++ b/src/shaders/vme/inter_bframe_gen8.g8b @@ -0,0 +1,423 @@ + { 0x00800001, 0x24000608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24400608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24800608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24c00608, 0x00000000, 0x00000000 }, + { 0x00200009, 0x24002228, 0x164500a0, 0x00040004 }, + { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 }, + { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff }, + { 0x00000001, 0x24080e08, 0x08000000, 0x0000001f }, + { 0x00000001, 0x24142288, 0x00000014, 0x00000000 }, + { 0x00200009, 0x24202228, 0x164500a0, 0x00040004 }, + { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc }, + { 0x00000001, 0x24280e08, 0x08000000, 0x000f0003 }, + { 0x00000001, 0x24342288, 0x00000014, 0x00000000 }, + { 0x00200009, 0x24482248, 0x164500a0, 0x00040004 }, + { 0x00000001, 0x24542288, 0x00000014, 0x00000000 }, + { 0x00000041, 0x24881208, 0x220000a2, 0x000000a1 }, + { 0x00000040, 0x24880208, 0x22000488, 0x000000a0 }, + { 0x00000041, 0x24880208, 0x06000488, 0x00000018 }, + { 0x00000001, 0x24942288, 0x00000014, 0x00000000 }, + { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 }, + { 0x04600031, 0x23800a88, 0x0e000800, 0x02190004 }, + { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 }, + { 0x04600031, 0x23a00a88, 0x0e000800, 0x02290004 }, + { 0x00200009, 0x24002228, 0x164500a0, 0x00030003 }, + { 0x00000041, 0x24000a28, 0x1e000400, 0x00020002 }, + { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 }, + { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff }, + { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 }, + { 0x04600031, 0x26000a88, 0x0e000800, 0x02190006 }, + { 0x00200009, 0x24202228, 0x164500a0, 0x00030003 }, + { 0x00000041, 0x24200a28, 0x1e000420, 0x00020002 }, + { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc }, + { 0x00000001, 0x24280e08, 0x08000000, 0x00070003 }, + { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 }, + { 0x04600031, 0x26200a88, 0x0e000800, 0x02190006 }, + { 0x00600001, 0x24600608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2ac00608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2a800608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20001240, 0x160000a6, 0x00040004 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000d60 }, + { 0x00600001, 0x2ae00608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2b000608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2b200608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00600060 }, + { 0x00210001, 0x2af41e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000210 }, + { 0x00000001, 0x2ae00e28, 0x08000000, 0x00000001 }, + { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, + { 0x00000040, 0x24001a68, 0x1e000400, 0xffffffff }, + { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, + { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, + { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, + { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, + { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, + { 0x00210001, 0x2af41e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000160 }, + { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, + { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02480403 }, + { 0x00200001, 0x2af41e68, 0x18000000, 0xffffffff }, + { 0x00000001, 0x2fa00208, 0x00000b84, 0x00000000 }, + { 0x00000001, 0x2fa40208, 0x00000b80, 0x00000000 }, + { 0x00000001, 0x2fa80608, 0x00000000, 0x00000001 }, + { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00001490 }, + { 0x00000001, 0x2aa01248, 0x00000fe4, 0x00000000 }, + { 0x01000010, 0x20001240, 0x16000aa0, 0x00000000 }, + { 0x00010001, 0x2af01e48, 0x18000000, 0x00010001 }, + { 0x00010001, 0x2af41e68, 0x18000000, 0x00000000 }, + { 0x00010001, 0x2ae40208, 0x00000bc8, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000080 }, + { 0x01000010, 0x20001240, 0x16000aa0, 0x00010001 }, + { 0x00010001, 0x2af21e48, 0x18000000, 0x00010001 }, + { 0x00010001, 0x2af61e68, 0x18000000, 0x00000000 }, + { 0x00010001, 0x2ae80208, 0x00000bcc, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000030 }, + { 0x00200001, 0x2ae40208, 0x00450bc8, 0x00000000 }, + { 0x00200001, 0x2af01e48, 0x18000000, 0x00010001 }, + { 0x00200001, 0x2af41e68, 0x18000000, 0x00000000 }, + { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00100010 }, + { 0x00210001, 0x2b141e68, 0x18000000, 0xffffffff }, + { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000006c0 }, + { 0x00000001, 0x2b000e28, 0x08000000, 0x00000001 }, + { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, + { 0x00000040, 0x24021a68, 0x1e000402, 0xffffffff }, + { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, + { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, + { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, + { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, + { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, + { 0x00210001, 0x2b141e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000160 }, + { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, + { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02480403 }, + { 0x00200001, 0x2b141e68, 0x18000000, 0xffffffff }, + { 0x00000001, 0x2fa00208, 0x00000b84, 0x00000000 }, + { 0x00000001, 0x2fa40208, 0x00000b80, 0x00000000 }, + { 0x00000001, 0x2fa80608, 0x00000000, 0x00000002 }, + { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00001230 }, + { 0x00000001, 0x2aa01248, 0x00000fe4, 0x00000000 }, + { 0x01000010, 0x20001240, 0x16000aa0, 0x00000000 }, + { 0x00010001, 0x2b101e48, 0x18000000, 0x00010001 }, + { 0x00010001, 0x2b141e68, 0x18000000, 0x00000000 }, + { 0x00010001, 0x2b040208, 0x00000bf0, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000080 }, + { 0x01000010, 0x20001240, 0x16000aa0, 0x00010001 }, + { 0x00010001, 0x2b121e48, 0x18000000, 0x00010001 }, + { 0x00010001, 0x2b161e68, 0x18000000, 0x00000000 }, + { 0x00010001, 0x2b080208, 0x00000bf4, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000030 }, + { 0x00200001, 0x2b101e48, 0x18000000, 0x00010001 }, + { 0x00200001, 0x2b141e68, 0x18000000, 0x00000000 }, + { 0x00200001, 0x2b040208, 0x00450bf0, 0x00000000 }, + { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00080008 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000230 }, + { 0x00000001, 0x2b200e28, 0x08000000, 0x00000001 }, + { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, + { 0x00000040, 0x24021a68, 0x1e000402, 0xffffffff }, + { 0x00000040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, + { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, + { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, + { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, + { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, + { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000003c0 }, + { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, + { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02480403 }, + { 0x00200001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00000001, 0x2fa00208, 0x00000b84, 0x00000000 }, + { 0x00000001, 0x2fa40208, 0x00000b80, 0x00000000 }, + { 0x00000001, 0x2fa80608, 0x00000000, 0x00000002 }, + { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000fe0 }, + { 0x00000001, 0x2aa01248, 0x00000fe4, 0x00000000 }, + { 0x01000010, 0x20001240, 0x16000aa0, 0x00000000 }, + { 0x00010001, 0x2b301e48, 0x18000000, 0x00010001 }, + { 0x00010001, 0x2b341e68, 0x18000000, 0x00000000 }, + { 0x00010001, 0x2b240208, 0x00000bf0, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000002e0 }, + { 0x01000010, 0x20001240, 0x16000aa0, 0x00010001 }, + { 0x00010001, 0x2b321e48, 0x18000000, 0x00010001 }, + { 0x00010001, 0x2b361e68, 0x18000000, 0x00000000 }, + { 0x00010001, 0x2b280208, 0x00000bf4, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000290 }, + { 0x00200001, 0x2b301e48, 0x18000000, 0x00010001 }, + { 0x00200001, 0x2b341e68, 0x18000000, 0x00000000 }, + { 0x00200001, 0x2b240208, 0x00450bf0, 0x00000000 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000250 }, + { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00040004 }, + { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000210 }, + { 0x00000001, 0x2b200e28, 0x08000000, 0x00000001 }, + { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, + { 0x00200040, 0x24001a68, 0x1e450400, 0xffffffff }, + { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, + { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, + { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, + { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, + { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, + { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000160 }, + { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, + { 0x0a800031, 0x2ba00a88, 0x0e000b40, 0x02480403 }, + { 0x00200001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00000001, 0x2fa00208, 0x00000b84, 0x00000000 }, + { 0x00000001, 0x2fa40208, 0x00000b80, 0x00000000 }, + { 0x00000001, 0x2fa80608, 0x00000000, 0x00000003 }, + { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000d80 }, + { 0x00000001, 0x2aa01248, 0x00000fe4, 0x00000000 }, + { 0x01000010, 0x20001240, 0x16000aa0, 0x00000000 }, + { 0x00010001, 0x2b301e48, 0x18000000, 0x00010001 }, + { 0x00010001, 0x2b341e68, 0x18000000, 0x00000000 }, + { 0x00010001, 0x2b240208, 0x00000c18, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000080 }, + { 0x01000010, 0x20001240, 0x16000aa0, 0x00010001 }, + { 0x00010001, 0x2b321e48, 0x18000000, 0x00010001 }, + { 0x00010001, 0x2b361e68, 0x18000000, 0x00000000 }, + { 0x00010001, 0x2b280208, 0x00000c1c, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000030 }, + { 0x00200001, 0x2b301e48, 0x18000000, 0x00010001 }, + { 0x00200001, 0x2b341e68, 0x18000000, 0x00000000 }, + { 0x00200001, 0x2b240208, 0x00450c18, 0x00000000 }, + { 0x00000040, 0x24000a28, 0x0a000b00, 0x00000b20 }, + { 0x01000010, 0x20000a20, 0x0e000400, 0x00000000 }, + { 0x00110020, 0x34000000, 0x0e001400, 0x00000080 }, + { 0x02000010, 0x20000a20, 0x0e000ae0, 0x00000000 }, + { 0x00210001, 0x2b040208, 0x00450ae4, 0x00000000 }, + { 0x00210001, 0x2b240208, 0x00450ae4, 0x00000000 }, + { 0x00210001, 0x2b141248, 0x00450af4, 0x00000000 }, + { 0x00210001, 0x2b341248, 0x00450af4, 0x00000000 }, + { 0x00210001, 0x2ac00208, 0x00450ae4, 0x00000000 }, + { 0x00310001, 0x2ac00608, 0x00000000, 0x00000000 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000320 }, + { 0x00600001, 0x24000608, 0x00000000, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000af4, 0x00000000 }, + { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x24040208, 0x00000ae4, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000b14, 0x00000000 }, + { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x24040208, 0x00000b04, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000b34, 0x00000000 }, + { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x24040208, 0x00000b24, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x2ac00208, 0x00000404, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000c0 }, + { 0x00000001, 0x2fa01a68, 0x00000ae4, 0x00000000 }, + { 0x00000001, 0x2fa41a68, 0x00000b04, 0x00000000 }, + { 0x00000001, 0x2fa81a68, 0x00000b24, 0x00000000 }, + { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x000009d0 }, + { 0x00000001, 0x2ac01a68, 0x00000fe4, 0x00000000 }, + { 0x00000001, 0x2fa01a68, 0x00000ae6, 0x00000000 }, + { 0x00000001, 0x2fa41a68, 0x00000b06, 0x00000000 }, + { 0x00000001, 0x2fa81a68, 0x00000b26, 0x00000000 }, + { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000970 }, + { 0x00000001, 0x2ac21a68, 0x00000fe4, 0x00000000 }, + { 0x00600001, 0x24000608, 0x00000000, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000af6, 0x00000000 }, + { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x24040208, 0x00000ae8, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000b16, 0x00000000 }, + { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x24040208, 0x00000b08, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000b36, 0x00000000 }, + { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x24040208, 0x00000b28, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x2ac40208, 0x00000404, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000c0 }, + { 0x00000001, 0x2fa01a68, 0x00000ae8, 0x00000000 }, + { 0x00000001, 0x2fa41a68, 0x00000b08, 0x00000000 }, + { 0x00000001, 0x2fa81a68, 0x00000b28, 0x00000000 }, + { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000840 }, + { 0x00000001, 0x2ac41a68, 0x00000fe4, 0x00000000 }, + { 0x00000001, 0x2fa01a68, 0x00000aea, 0x00000000 }, + { 0x00000001, 0x2fa41a68, 0x00000b0a, 0x00000000 }, + { 0x00000001, 0x2fa81a68, 0x00000b2a, 0x00000000 }, + { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x000007e0 }, + { 0x00000001, 0x2ac61a68, 0x00000fe4, 0x00000000 }, + { 0x0040000c, 0x2a801a68, 0x1e690ac0, 0x00020002 }, + { 0x00400040, 0x2a881a68, 0x1e690a80, 0x00030003 }, + { 0x00400005, 0x2a901248, 0x16690a88, 0xfffcfffc }, + { 0x00600001, 0x25600208, 0x008d0020, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, + { 0x00600001, 0x25800608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 }, + { 0x00600001, 0x28800608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23800608, 0x00000000, 0x00000000 }, + { 0x00000005, 0x23840208, 0x06000384, 0xff000000 }, + { 0x00600001, 0x28a00208, 0x008d0380, 0x00000000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00010001 }, + { 0x00000001, 0x28a52288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28c00608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x28c02288, 0x00cf03a3, 0x00000000 }, + { 0x00000001, 0x28d00608, 0x00000000, 0x11111111 }, + { 0x00000001, 0x28dc0608, 0x00000000, 0x00010101 }, + { 0x00000001, 0x28d41248, 0x00000606, 0x00000000 }, + { 0x00400001, 0x28f00208, 0x00690608, 0x00000000 }, + { 0x00600001, 0x28e01248, 0x00ae0622, 0x00000000 }, + { 0x00000001, 0x247c1648, 0x10000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a4, 0x00010001 }, + { 0x00010001, 0x247c0e88, 0x08000000, 0x00000002 }, + { 0x00000001, 0x247d2288, 0x000000a5, 0x00000000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00200020 }, + { 0x00000001, 0x247e2288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x00000001, 0x244c0608, 0x00000000, 0x00800000 }, + { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, + { 0x0d600031, 0x21800a08, 0x0e000800, 0x10782000 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00000001, 0x28200208, 0x00000180, 0x00000000 }, + { 0x00000001, 0x28240208, 0x00000190, 0x00000000 }, + { 0x00000001, 0x28280208, 0x00000194, 0x00000000 }, + { 0x00000001, 0x282c0208, 0x00000198, 0x00000000 }, + { 0x00000001, 0x28301248, 0x0000018c, 0x00000000 }, + { 0x00000001, 0x28340208, 0x00000188, 0x00000000 }, + { 0x00000001, 0x28380208, 0x0000019c, 0x00000000 }, + { 0x00000001, 0x283c0208, 0x00000488, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 }, + { 0x00000001, 0x244c0608, 0x00000000, 0x00200700 }, + { 0x00000001, 0x24561648, 0x10000000, 0x20202020 }, + { 0x00000001, 0x24400208, 0x00000448, 0x00000000 }, + { 0x00000040, 0x24401a68, 0x1e000440, 0xfff8fff8 }, + { 0x00000040, 0x24421a68, 0x1e000442, 0xfff8fff8 }, + { 0x00000001, 0x24401e68, 0x18000000, 0xfff8fff8 }, + { 0x00000001, 0x24421e68, 0x18000000, 0xfff8fff8 }, + { 0x00000001, 0x24440208, 0x00000440, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00600060 }, + { 0x00010040, 0x24401a68, 0x1e000440, 0x00040004 }, + { 0x00010040, 0x24441a68, 0x1e000444, 0x00040004 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00100010 }, + { 0x00010040, 0x24421a68, 0x1e000442, 0x00040004 }, + { 0x00010040, 0x24461a68, 0x1e000446, 0x00040004 }, + { 0x00200040, 0x24401a68, 0x1a450440, 0x00450a90 }, + { 0x00200040, 0x24441a68, 0x1a450444, 0x00450a94 }, + { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, + { 0x00000001, 0x24600608, 0x00000000, 0x00000002 }, + { 0x00000001, 0x24642288, 0x0000009c, 0x00000000 }, + { 0x00000001, 0x24680608, 0x00000000, 0x00001212 }, + { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, + { 0x00400001, 0x45800208, 0x00000ac0, 0x00000000 }, + { 0x00400001, 0x45840208, 0x00000ac4, 0x00000000 }, + { 0x00000001, 0x28800608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28840608, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28880608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x288c0608, 0x00000000, 0x000f0f0f }, + { 0x00400001, 0x28900608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28b00608, 0x00000000, 0x00000000 }, + { 0x08600031, 0x21800a08, 0x0e000800, 0x0c784000 }, + { 0x00000001, 0x25740608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x25752288, 0x00000199, 0x00000000 }, + { 0x00000001, 0x25762288, 0x0000019a, 0x00000000 }, + { 0x00000005, 0x24001248, 0x16000180, 0x00030003 }, + { 0x00000001, 0x25742288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28800208, 0x008d01a0, 0x00000000 }, + { 0x00600001, 0x28a00208, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x28c00208, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x28e00208, 0x008d0200, 0x00000000 }, + { 0x00000001, 0x244c0608, 0x00000000, 0x00203000 }, + { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00200020 }, + { 0x00000001, 0x24662288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, + { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 }, + { 0x0d600031, 0x21800a08, 0x0e000800, 0x10786000 }, + { 0x00000040, 0x24880208, 0x06000488, 0x00000002 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00000001, 0x28200208, 0x00000180, 0x00000000 }, + { 0x00000001, 0x28240208, 0x00000198, 0x00000000 }, + { 0x00000001, 0x28280208, 0x00000188, 0x00000000 }, + { 0x00000001, 0x282c0208, 0x00000574, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0003 }, + { 0x00000040, 0x24880208, 0x06000488, 0x00000001 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d01a0, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x28600208, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x28800208, 0x008d0200, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x0a0a0403 }, + { 0x00000040, 0x24880208, 0x06000488, 0x00000008 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0240, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x0219e003 }, + { 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 }, + { 0x07800031, 0x24000a40, 0x0e000e00, 0x82000010 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x06000010, 0x20001a60, 0x1a000fa0, 0x00000fa4 }, + { 0x00010001, 0x2f601a68, 0x00000fa0, 0x00000000 }, + { 0x00110001, 0x2f601a68, 0x00000fa4, 0x00000000 }, + { 0x06000010, 0x20001a60, 0x1a000f60, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000f60, 0x00000000 }, + { 0x00110001, 0x2fe41a68, 0x00000fa8, 0x00000000 }, + { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 }, + { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa4 }, + { 0x00010001, 0x2f601a68, 0x00000fa0, 0x00000000 }, + { 0x00110001, 0x2f601a68, 0x00000fa4, 0x00000000 }, + { 0x04000010, 0x20001a60, 0x1a000f60, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000f60, 0x00000000 }, + { 0x00110001, 0x2fe41a68, 0x00000fa8, 0x00000000 }, + { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 }, + { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa4 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000070 }, + { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000fa0, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000a0 }, + { 0x04000010, 0x20001a60, 0x1a000fa4, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000fa8, 0x00000000 }, + { 0x00110001, 0x2fe41a68, 0x00000fa4, 0x00000000 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000060 }, + { 0x04000010, 0x20001a60, 0x1a000fa4, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000fa4, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000030 }, + { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000fa8, 0x00000000 }, + { 0x00110001, 0x2fe41a68, 0x00000fa0, 0x00000000 }, + { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 }, + { 0x00600001, 0x2f600608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x2f602288, 0x00000fa2, 0x00000000 }, + { 0x00000005, 0x2f641248, 0x16000fa4, 0x00030003 }, + { 0x01000010, 0x20001240, 0x16000f64, 0x00000000 }, + { 0x00010005, 0x2fe41248, 0x16000f60, 0x00030003 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000150 }, + { 0x01000010, 0x20001240, 0x16000f64, 0x00030003 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000100 }, + { 0x01000010, 0x20001240, 0x16000f64, 0x00010001 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000070 }, + { 0x00000001, 0x2f681248, 0x00000fa8, 0x00000000 }, + { 0x01000005, 0x20001240, 0x16000f68, 0x00010001 }, + { 0x00010005, 0x2fe41248, 0x16000f60, 0x00030003 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000d0 }, + { 0x00000008, 0x2f701248, 0x16000f60, 0x00020002 }, + { 0x00000005, 0x2fe41248, 0x16000f70, 0x00030003 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x000000a0 }, + { 0x00000001, 0x2f681248, 0x00000fa8, 0x00000000 }, + { 0x05000010, 0x20001240, 0x16000f68, 0x00020002 }, + { 0x00010005, 0x2fe41248, 0x16000f60, 0x00030003 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000060 }, + { 0x00000008, 0x2f701248, 0x16000f60, 0x00020002 }, + { 0x00000005, 0x2fe41248, 0x16000f70, 0x00030003 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000030 }, + { 0x00000041, 0x2f681248, 0x16000fa8, 0x00020002 }, + { 0x00000008, 0x2f701248, 0x12000f60, 0x00000f68 }, + { 0x00000005, 0x2fe41248, 0x16000f70, 0x00030003 }, + { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 }, diff --git a/src/shaders/vme/inter_bframe_haswell.asm b/src/shaders/vme/inter_bframe_haswell.asm index 9e54b9d..f8ff0af 100644 --- a/src/shaders/vme/inter_bframe_haswell.asm +++ b/src/shaders/vme/inter_bframe_haswell.asm @@ -396,7 +396,7 @@ mb_mvp_start: add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1}; cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1}; (-f0.0) jmpi (1) mb_median_start; -cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 1:d {align1}; +cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 0:d {align1}; (f0.0) mov (2) mbb_result.4<1>:ud mba_result.4<2,2,1>:ud {align1}; (f0.0) mov (2) mbc_result.4<1>:ud mba_result.4<2,2,1>:ud {align1}; (f0.0) mov (2) mbb_result.20<1>:uw mba_result.20<2,2,1>:uw {align1}; diff --git a/src/shaders/vme/inter_bframe_haswell.g75b b/src/shaders/vme/inter_bframe_haswell.g75b index 03da639..cabef20 100644 --- a/src/shaders/vme/inter_bframe_haswell.g75b +++ b/src/shaders/vme/inter_bframe_haswell.g75b @@ -186,7 +186,7 @@ { 0x00000040, 0x240014a5, 0x00000b00, 0x00000b20 }, { 0x01000010, 0x20001ca4, 0x00000400, 0x00000000 }, { 0x00110020, 0x34001c00, 0x00001400, 0x00000080 }, - { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000001 }, + { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000000 }, { 0x00210001, 0x2b040021, 0x00450ae4, 0x00000000 }, { 0x00210001, 0x2b240021, 0x00450ae4, 0x00000000 }, { 0x00210001, 0x2b140129, 0x00450af4, 0x00000000 }, diff --git a/src/shaders/vme/inter_bframe_ivb.asm b/src/shaders/vme/inter_bframe_ivb.asm index 577895c..8a75962 100644 --- a/src/shaders/vme/inter_bframe_ivb.asm +++ b/src/shaders/vme/inter_bframe_ivb.asm @@ -388,7 +388,7 @@ mb_mvp_start: add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1}; cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1}; (-f0.0) jmpi (1) mb_median_start; -cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 1:d {align1}; +cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 0:d {align1}; (f0.0) mov (2) mbb_result.4<1>:ud mba_result.4<2,2,1>:ud {align1}; (f0.0) mov (2) mbc_result.4<1>:ud mba_result.4<2,2,1>:ud {align1}; (f0.0) mov (2) mbb_result.20<1>:uw mba_result.20<2,2,1>:uw {align1}; diff --git a/src/shaders/vme/inter_bframe_ivb.g7b b/src/shaders/vme/inter_bframe_ivb.g7b index fe6f98d..adcb390 100644 --- a/src/shaders/vme/inter_bframe_ivb.g7b +++ b/src/shaders/vme/inter_bframe_ivb.g7b @@ -180,7 +180,7 @@ { 0x00000040, 0x240014a5, 0x00000b00, 0x00000b20 }, { 0x01000010, 0x20001ca4, 0x00000400, 0x00000000 }, { 0x00110020, 0x34001c00, 0x00001400, 0x00000010 }, - { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000001 }, + { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000000 }, { 0x00210001, 0x2b040021, 0x00450ae4, 0x00000000 }, { 0x00210001, 0x2b240021, 0x00450ae4, 0x00000000 }, { 0x00210001, 0x2b140129, 0x00450af4, 0x00000000 }, diff --git a/src/shaders/vme/inter_frame.asm b/src/shaders/vme/inter_frame.asm index e1b6e68..7c5cfd4 100644 --- a/src/shaders/vme/inter_frame.asm +++ b/src/shaders/vme/inter_frame.asm @@ -35,7 +35,11 @@ mov (1) read1_header.8<1>:UD BLOCK_4X16 {align1}; mov (1) read1_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */ shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* Source = (x, y) * 16 */ - + +cmp.z.f0.0 (1) null<1>:uw quality_level_ub<0,1,0>:ub LOW_QUALITY_LEVEL:uw {align1}; +(f0.0) jmpi (1) __low_quality_search; + +__high_quality_search: #ifdef DEV_SNB shl (2) vme_m0.0<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; add (1) vme_m0.0<1>:W vme_m0.0<0,1,0>:W -16:W {align1}; /* Reference = (x-16,y-12)-(x+32,y+24) */ @@ -47,8 +51,25 @@ mov (1) vme_m0.2<1>:W -12:W {align1} ; mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR + SUB_PEL_MODE_QUARTER:UD {align1}; /* 16x16 Source, 1/4 pixel, harr */ mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ -mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 32x32 */ +mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */ +jmpi __vme_msg1; + + +__low_quality_search: +#ifdef DEV_SNB +shl (2) vme_m0.0<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; +add (1) vme_m0.0<1>:W vme_m0.0<0,1,0>:W -8:W {align1}; +add (1) vme_m0.2<1>:W vme_m0.2<0,1,0>:W -8:W {align1}; +#else +mov (1) vme_m0.0<1>:W -8:W {align1} ; +mov (1) vme_m0.2<1>:W -8:W {align1} ; +#endif + +mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR + SUB_PEL_MODE_HALF:UD {align1}; /* 16x16 Source, 1/2 pixel, harr */ +mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ +mov (1) vme_m0.22<1>:UW MIN_REF_REGION_SIZE {align1}; /* Reference Width&Height, 32x32 */ +__vme_msg1: mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ; mov (1) vme_m1.4<1>:UD FB_PRUNING_ENABLE:UD {align1}; /* MV num is passed by constant buffer. R4.28 */ diff --git a/src/shaders/vme/inter_frame.g6b b/src/shaders/vme/inter_frame.g6b index ca251bb..bc7cd43 100644 --- a/src/shaders/vme/inter_frame.g6b +++ b/src/shaders/vme/inter_frame.g6b @@ -11,12 +11,21 @@ { 0x00000001, 0x242800e1, 0x00000000, 0x000f0003 }, { 0x00000001, 0x24340231, 0x00000014, 0x00000000 }, { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 }, + { 0x01000010, 0x20002e28, 0x000000a8, 0x00020002 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x0000000e }, { 0x00200009, 0x24402e29, 0x004500a0, 0x00040004 }, { 0x00000040, 0x24403dad, 0x00000440, 0xfff0fff0 }, { 0x00000040, 0x24423dad, 0x00000442, 0xfff4fff4 }, { 0x00000001, 0x244c0061, 0x00000000, 0x00203000 }, { 0x00000001, 0x24540231, 0x00000014, 0x00000000 }, { 0x00000001, 0x24560169, 0x00000000, 0x28302830 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x0000000c }, + { 0x00200009, 0x24402e29, 0x004500a0, 0x00040004 }, + { 0x00000040, 0x24403dad, 0x00000440, 0xfff8fff8 }, + { 0x00000040, 0x24423dad, 0x00000442, 0xfff8fff8 }, + { 0x00000001, 0x244c0061, 0x00000000, 0x00201000 }, + { 0x00000001, 0x24540231, 0x00000014, 0x00000000 }, + { 0x00000001, 0x24560169, 0x00000000, 0x20202020 }, { 0x00000001, 0x24600061, 0x00000000, 0x00000002 }, { 0x00000001, 0x24640061, 0x00000000, 0x40000000 }, { 0x00000001, 0x24640231, 0x0000009c, 0x00000000 }, diff --git a/src/shaders/vme/inter_frame.g7b b/src/shaders/vme/inter_frame.g7b index 5273200..2a34927 100644 --- a/src/shaders/vme/inter_frame.g7b +++ b/src/shaders/vme/inter_frame.g7b @@ -11,11 +11,19 @@ { 0x00000001, 0x242800e1, 0x00000000, 0x000f0003 }, { 0x00000001, 0x24340231, 0x00000014, 0x00000000 }, { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 }, + { 0x01000010, 0x20002e28, 0x000000a8, 0x00020002 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x0000000c }, { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 }, { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 }, { 0x00000001, 0x244c0061, 0x00000000, 0x00203000 }, { 0x00000001, 0x24540231, 0x00000014, 0x00000000 }, { 0x00000001, 0x24560169, 0x00000000, 0x28302830 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x0000000a }, + { 0x00000001, 0x244001ed, 0x00000000, 0xfff8fff8 }, + { 0x00000001, 0x244201ed, 0x00000000, 0xfff8fff8 }, + { 0x00000001, 0x244c0061, 0x00000000, 0x00201000 }, + { 0x00000001, 0x24540231, 0x00000014, 0x00000000 }, + { 0x00000001, 0x24560169, 0x00000000, 0x20202020 }, { 0x00000001, 0x24600061, 0x00000000, 0x00000002 }, { 0x00000001, 0x24640061, 0x00000000, 0x40000000 }, { 0x00000001, 0x24640231, 0x0000009c, 0x00000000 }, diff --git a/src/shaders/vme/inter_frame_gen8.asm b/src/shaders/vme/inter_frame_gen8.asm new file mode 100644 index 0000000..991d903 --- /dev/null +++ b/src/shaders/vme/inter_frame_gen8.asm @@ -0,0 +1,760 @@ +/* + * Copyright © <2013>, Intel Corporation. + * + * This program is licensed under the terms and conditions of the + * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at + * http://www.opensource.org/licenses/eclipse-1.0.php. + * + */ +// Modual name: Inter_frame_gen8.asm +// +// Make inter predition estimation for Inter-frame on gen8 +// + +// +// Now, begin source code.... +// + +#define SAVE_RET add (1) RETURN_REG<1>:ud ip:ud 32:ud +#define RETURN mov (1) ip:ud RETURN_REG<0,1,0>:ud + +/* + * __START + */ +__INTER_START: +mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1}; +mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1}; +mov (16) tmp_reg4.0<1>:UD 0x0:UD {align1} ; +mov (16) tmp_reg6.0<1>:UD 0x0:UD {align1} ; + +shl (2) read0_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ +add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D -8:W {align1}; /* X offset */ +add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D -1:W {align1}; /* Y offset */ +mov (1) read0_header.8<1>:UD BLOCK_32X1 {align1}; +mov (1) read0_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +shl (2) read1_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ +add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D -4:W {align1}; /* X offset */ +mov (1) read1_header.8<1>:UD BLOCK_4X16 {align1}; +mov (1) read1_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ +mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +mul (1) obw_m0.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1}; +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1}; +mul (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 24:UD {align1}; +mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* + * Media Read Message -- fetch Luma neighbor edge pixels + */ +/* ROW */ +mov (8) msg_reg0.0<1>:UD read0_header.0<8,8,1>:UD {align1}; +send (8) msg_ind INEP_ROW<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1}; + +/* COL */ +mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1}; +send (8) msg_ind INEP_COL0<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1}; + +/* + * Media Read Message -- fetch Chroma neighbor edge pixels + */ +/* ROW */ +shl (2) read0_header.0<1>:D orig_xy_ub<2,2,1>:UB 3:UW {align1}; /* x * 16 , y * 8 */ +mul (1) read0_header.0<1>:D read0_header.0<0,1,0>:D 2:W {align1}; +add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D -8:W {align1}; /* X offset */ +add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D -1:W {align1}; /* Y offset */ +mov (8) msg_reg0.0<1>:UD read0_header.0<8,8,1>:UD {align1}; +send (8) msg_ind CHROMA_ROW<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1}; + +/* COL */ +shl (2) read1_header.0<1>:D orig_xy_ub<2,2,1>:UB 3:UW {align1}; /* x * 16, y * 8 */ +mul (1) read1_header.0<1>:D read1_header.0<0,1,0>:D 2:W {align1}; +add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D -4:W {align1}; /* X offset */ +mov (1) read1_header.8<1>:UD BLOCK_8X4 {align1}; +mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1}; +send (8) msg_ind CHROMA_COL<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1}; + +mov (8) mb_mvp_ref.0<1>:ud 0:ud {align1}; +mov (8) mb_ref_win.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1}; +(f0.0) jmpi (1) __mb_hwdep_end; +/* read back the data for MB A */ +/* the layout of MB result is: rx.0(Available). rx.4(MVa), rX.8(MVb), rX.16(Pred_L0 flag), +* rX.18 (Pred_L1 flag), rX.20(Forward reference ID), rX.22(Backwared reference ID) +*/ +mov (8) mba_result.0<1>:ud 0x0:ud {align1}; +mov (8) mbb_result.0<1>:ud 0x0:ud {align1}; +mov (8) mbc_result.0<1>:ud 0x0:ud {align1}; +mba_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1}; +/* MB A doesn't exist. Zero MV. mba_flag is zero and ref ID = -1 */ +(f0.0) mov (2) mba_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mbb_start; +mov (1) mba_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w -1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mba_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mbb_start; + +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB A */ +/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_8, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 4 + {align1}; +/* TODO: RefID is required after multi-references are added */ +/* MV */ +mov (2) mba_result.4<1>:ud mb_mv1.8<2,2,1>:ud {align1}; +mov (1) mba_result.16<1>:w MB_PRED_FLAG {align1}; + +mbb_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1}; +/* MB B doesn't exist. Zero MV. mba_flag is zero */ +/* If MB B doesn't exist, neither MB C nor D exists */ +(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; +mov (1) mbb_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mbc_start; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB B */ +/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_8, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 4 + {align1}; +/* TODO: RefID is required after multi-references are added */ +mov (2) mbb_result.4<1>:ud mb_mv2.16<2,2,1>:ud {align1}; +mov (1) mbb_result.16<1>:w MB_PRED_FLAG {align1}; + +mbc_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_C:uw {align1}; +/* MB C doesn't exist. Zero MV. mba_flag is zero */ +/* Based on h264 spec the MB D will be replaced if MB C doesn't exist */ +(f0.0) jmpi (1) mbd_start; +mov (1) mbc_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1}; +add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB C */ +/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_8, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 4 + {align1}; +/* TODO: RefID is required after multi-references are added */ +/* Forward MV */ +mov (2) mbc_result.4<1>:ud mb_mv2.16<2,2,1>:ud {align1}; +mov (1) mbc_result.16<1>:w MB_PRED_FLAG {align1}; + +jmpi (1) mb_mvp_start; +mbd_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_D:uw {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; +mov (1) mbc_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (2) tmp_reg0.0<1>:w tmp_reg0.0<2,2,1>:w -1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; + +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB D */ +/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ub + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_8, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 4 + {align1}; + +/* TODO: RefID is required after multi-references are added */ + +/* Forward MV */ +mov (2) mbc_result.4<1>:ud mb_mv3.24<2,2,1>:ud {align1}; +mov (1) mbc_result.16<1>:w MB_PRED_FLAG {align1}; + +mb_mvp_start: +/*TODO: Add the skip prediction */ +/* Check whether both MB B and C are inavailable */ +add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1}; +cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1}; +(-f0.0) jmpi (1) mb_median_start; +cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 0:d {align1}; +(f0.0) mov (1) mbb_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; +(f0.0) mov (1) mbc_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; +(f0.0) mov (1) mbb_result.20<1>:uw mba_result.20<0,1,0>:uw {align1}; +(f0.0) mov (1) mbc_result.20<1>:uw mba_result.20<0,1,0>:uw {align1}; +(f0.0) mov (1) mb_mvp_ref.0<1>:ud mba_result.4<0,1,0>:ud {align1}; +(-f0.0) mov (1) mb_mvp_ref.0<1>:ud 0:ud {align1}; +jmpi (1) __mb_hwdep_end; + +mb_median_start: +/* check whether only one neighbour MB has the same ref ID with the current MB */ +mov (8) tmp_reg0.0<1>:ud 0:ud {align1}; +cmp.z.f0.0 (1) null:d mba_result.20<0,1,0>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mba_result.4<0,1,0>:ud {align1}; +cmp.z.f0.0 (1) null:d mbb_result.20<0,1,0>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mbb_result.4<0,1,0>:ud {align1}; +cmp.z.f0.0 (1) null:d mbc_result.20<0,1,0>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mbc_result.4<0,1,0>:ud {align1}; +cmp.e.f0.0 (1) null:d tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) mb_mvp_ref.0<1>:ud tmp_reg0.4<0,1,0>:ud {align1}; +(f0.0) jmpi (1) __mb_hwdep_end; + +mov (1) INPUT_ARG0.0<1>:w mba_result.4<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.4<1>:w mbb_result.4<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.8<1>:w mbc_result.4<0,1,0>:w {align1}; +SAVE_RET {align1}; + jmpi (1) word_imedian; +mov (1) mb_mvp_ref.0<1>:w RET_ARG<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.0<1>:w mba_result.6<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.4<1>:w mbb_result.6<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.8<1>:w mbc_result.6<0,1,0>:w {align1}; +SAVE_RET {align1}; +jmpi (1) word_imedian; +mov (1) mb_mvp_ref.2<1>:w RET_ARG<0,1,0>:w {align1}; + +__mb_hwdep_end: +asr (2) mb_ref_win.0<1>:w mb_mvp_ref.0<2,2,1>:w 2:w {align1}; +add (2) mb_ref_win.8<1>:w mb_ref_win.0<2,2,1>:w 3:w {align1}; +and (2) mb_ref_win.16<1>:uw mb_ref_win.8<2,2,1>:uw 0xFFFC:uw {align1}; +/* m2, get the MV/Mb cost passed from constant buffer when +spawning thread by MEDIA_OBJECT */ +mov (8) vme_m2<1>:UD r1.0<8,8,1>:UD {align1}; + +mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1}; + +/* m3 FWD/BWD cost center*/ +mov (8) vme_msg_3<1>:UD 0x0:UD {align1}; + +/* m4 skip center*/ +mov (8) vme_msg_4<1>:UD 0x0:UD {align1}; + +/* m5 */ +mov (1) INEP_ROW.0<1>:UD 0x0:UD {align1}; +and (1) INEP_ROW.4<1>:UD INEP_ROW.4<0,1,0>:UD 0xFF000000:UD {align1}; +mov (8) vme_msg_5<1>:UD INEP_ROW.0<8,8,1>:UD {align1}; + + +/* Use the Luma mode */ +mov (1) tmp_reg0.0<1>:UW LUMA_INTRA_MODE:UW {align1}; +mov (1) vme_msg_5.5<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; + +/* m6 */ +mov (8) vme_msg_6<1>:UD 0x0:UD {align1}; +mov (16) vme_msg_6.0<1>:UB INEP_COL0.3<32,8,4>:UB {align1}; +mov (1) vme_msg_6.16<1>:UD INTRA_PREDICTORE_MODE {align1}; + +/* the penalty for Intra mode */ +mov (1) vme_msg_6.28<1>:UD 0x010101:UD {align1}; +mov (1) vme_msg_6.20<1>:UW CHROMA_ROW.6<0,1,0>:UW {align1}; + + +/* m7 */ + +mov (4) vme_msg_7.16<1>:UD CHROMA_ROW.8<4,4,1>:UD {align1}; +mov (8) vme_msg_7.0<1>:UW CHROMA_COL.2<16,8,2>:UW {align1}; + +/* + * SIC VME message + */ + +/* m1 */ +mov (1) intra_flag<1>:UW 0x0:UW {align1} ; +and.z.f0.0 (1) null<1>:UW transform_8x8_ub<0,1,0>:UB 1:UW {align1}; +(f0.0) mov (1) intra_part_mask_ub<1>:UB LUMA_INTRA_8x8_DISABLE {align1}; + +/* assign MB intra struct from the thread payload*/ +mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1}; + +/* Disable DC HAAR component when calculating HARR SATD block */ +mov (1) tmp_reg0.0<1>:UW DC_HARR_DISABLE:UW {align1}; +mov (1) vme_m1.30<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; +mov (8) vme_msg_1<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +/* m0 */ +mov (1) vme_m0.12<1>:UD INTRA_SAD_HAAR:UD {align1}; /* 16x16 Source, Intra_harr */ +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; + +/* after verification it will be passed by using payload */ +send (8) + vme_msg_ind + vme_wb<1>:UD + null + cre( + BIND_IDX_VME, + VME_SIC_MESSAGE_TYPE + ) + mlen sic_vme_msg_length + rlen vme_wb_length + {align1}; +/* + * Oword Block Write message + */ +mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; + +mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1}; +mov (1) msg_reg1.4<1>:UD vme_wb.16<0,1,0>:UD {align1}; +mov (1) msg_reg1.8<1>:UD vme_wb.20<0,1,0>:UD {align1}; +mov (1) msg_reg1.12<1>:UD vme_wb.24<0,1,0>:UD {align1}; + +/* Distortion, Intra (17-16), */ +mov (1) msg_reg1.16<1>:UW vme_wb.12<0,1,0>:UW {align1}; + +mov (1) msg_reg1.20<1>:UD vme_wb.8<0,1,0>:UD {align1}; +/* VME clock counts */ +mov (1) msg_reg1.24<1>:UD vme_wb.28<0,1,0>:UD {align1}; + +mov (1) msg_reg1.28<1>:UD obw_m0.8<0,1,0>:UD {align1}; + +/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +/* IME search */ +cmp.z.f0.0 (1) null<1>:uw quality_level_ub<0,1,0>:ub LOW_QUALITY_LEVEL:uw {align1}; +(f0.0) jmpi (1) __low_quality_search; + +__high_quality_search: +/* M3/M4 search path */ +mov (1) vme_msg_3.0<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_3.4<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_3.8<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_3.12<1>:UD 0x100F0F0F:UD {align1}; +mov (1) vme_msg_3.16<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_3.20<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_3.24<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_3.28<1>:UD 0x100F0F0F:UD {align1}; +mov (1) vme_msg_4.0<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_4.4<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_4.8<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_4.12<1>:UD 0x000F0F0F:UD {align1}; +mov (4) vme_msg_4.16<1>:UD 0x0:UD {align1}; + +mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */ +mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */ +mov (1) vme_m0.0<1>:W -16:W {align1}; +mov (1) vme_m0.2<1>:W -12:W {align1}; + +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1}; +(f0.0) add (1) vme_m0.0<1>:w vme_m0.0<0,1,0>:w 12:w {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1}; +(f0.0) add (1) vme_m0.2<1>:w vme_m0.2<0,1,0>:w 8:w {align1}; +jmpi (1) __vme_msg; + +__low_quality_search: +/* M3/M4 search path */ +mov (1) vme_msg_3.0<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_3.4<1>:UD 0x100F0F0F:UD {align1}; +mov (1) vme_msg_3.8<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_3.12<1>:UD 0x000F0F0F:UD {align1}; +mov (4) vme_msg_3.16<1>:UD 0x0:UD {align1}; +mov (8) vme_msg_4.16<1>:UD 0x0:UD {align1}; + +mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */ +mov (1) vme_m0.22<1>:UW MIN_REF_REGION_SIZE {align1}; /* Reference Width&Height, 32x32 */ +mov (1) vme_m0.0<1>:W -8:W {align1}; +mov (1) vme_m0.2<1>:W -8:W {align1}; + +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1}; +(f0.0) add (1) vme_m0.0<1>:w vme_m0.0<0,1,0>:w 4:w {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1}; +(f0.0) add (1) vme_m0.2<1>:w vme_m0.2<0,1,0>:w 4:w {align1}; + +__vme_msg: +mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1}; +add (2) vme_m0.0<1>:w vme_m0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1}; +add (2) vme_m0.4<1>:w vme_m0.4<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1}; +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; + +mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ; +/* the Max MV number is passed by constant buffer */ +mov (1) vme_m1.4<1>:UB r4.28<0,1,0>:UB {align1}; +mov (1) vme_m1.8<1>:UD START_CENTER + SEARCH_PATH_LEN:UD {align1}; +mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +/* Setup the Cost center */ +/* currently four 8x8 share the same cost center */ +mov (4) vme_m3.0<2>:ud mb_mvp_ref.0<0,1,0>:ud {align1}; +mov (4) vme_m3.4<2>:ud mb_mvp_ref.0<0,1,0>:ud {align1}; + +mov (8) vme_msg_3<1>:UD vme_m3.0<8,8,1>:UD {align1}; +mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1}; + +/* M4/M5 search path */ +mov (1) vme_msg_4.0<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_4.4<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_4.8<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_4.12<1>:UD 0x100F0F0F:UD {align1}; +mov (1) vme_msg_4.16<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_4.20<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_4.24<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_4.28<1>:UD 0x100F0F0F:UD {align1}; + +mov (1) vme_msg_5.0<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_5.4<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_5.8<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_5.12<1>:UD 0x000F0F0F:UD {align1}; + +mov (4) vme_msg_5.16<1>:UD 0x0:UD {align1}; + +send (8) + vme_msg_ind + vme_wb<1>:UD + null + vme( + BIND_IDX_VME, + 0, + 0, + VME_IME_MESSAGE_TYPE + ) + mlen ime_vme_msg_length + rlen vme_wb_length {align1}; + +/* Set Macroblock-shape/mode for FBR */ + +mov (1) vme_m2.20<1>:UD 0x0:UD {align1}; +mov (1) vme_m2.21<1>:UB vme_wb.25<0,1,0>:UB {align1}; +mov (1) vme_m2.22<1>:UB vme_wb.26<0,1,0>:UB {align1}; + +and (1) tmp_reg0.0<1>:UW vme_wb.0<0,1,0>:UW 0x03:UW {align1}; +mov (1) vme_m2.20<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; + +/* Send FBR message into CRE */ + +mov (8) vme_msg_4.0<1>:UD vme_wb1.0<8,8,1>:UD {align1}; +mov (8) vme_msg_5.0<1>:ud vme_wb2.0<8,8,1>:ud {align1}; +mov (8) vme_msg_6.0<1>:ud vme_wb3.0<8,8,1>:ud {align1}; +mov (8) vme_msg_7.0<1>:ud vme_wb4.0<8,8,1>:ud {align1}; + +mov (1) vme_m0.12<1>:UD INTER_SAD_HAAR + SUB_PEL_MODE_QUARTER + FBR_BME_DISABLE:UD {align1}; /* 16x16 Source, 1/4 pixel, harr, BME disable */ +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; +mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +mov (8) vme_msg_2.0<1>:UD vme_m2.0<8,8,1>:UD {align1}; +mov (8) vme_msg_3.0<1>:UD vme_m3.0<8,8,1>:UD {align1}; + +/* after verification it will be passed by using payload */ +send (8) + vme_msg_ind + vme_wb<1>:UD + null + cre( + BIND_IDX_VME, + VME_FBR_MESSAGE_TYPE + ) + mlen fbr_vme_msg_length + rlen vme_wb_length + {align1}; + +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x02:UD {align1}; +mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; +/* write FME info */ +mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1}; + +mov (1) msg_reg1.4<1>:UD vme_wb.24<0,1,0>:UD {align1}; +/* Inter distortion of FME */ +mov (1) msg_reg1.8<1>:UD vme_wb.8<0,1,0>:UD {align1}; + +mov (1) msg_reg1.12<1>:UD vme_m2.20<0,1,0>:UD {align1}; + +/* bind index 3, write oword (16bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_0, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +/* Write FME/BME MV */ +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x01:UD {align1}; +mov (8) msg_reg0.0<1>:UD obw_m0.0<8,8,1>:UD {align1}; + + +mov (8) msg_reg1.0<1>:UD vme_wb1.0<8,8,1>:UD {align1}; +mov (8) msg_reg2.0<1>:ud vme_wb2.0<8,8,1>:ud {align1}; +mov (8) msg_reg3.0<1>:ud vme_wb3.0<8,8,1>:ud {align1}; +mov (8) msg_reg4.0<1>:ud vme_wb4.0<8,8,1>:ud {align1}; +/* bind index 3, write 8 oword (128 bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_8, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 5 + rlen obw_wb_length + {align1}; + +/* Write FME/BME RefID */ +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x08:UD {align1}; +mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; + +mov (8) msg_reg1.0<1>:UD vme_wb6.0<8,8,1>:UD {align1}; + +/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +/* Issue message fence so that the previous write message is committed */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_FENCE, + OBR_MF_COMMIT, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; + +__EXIT: +/* + * kill thread + */ +mov (8) ts_msg_reg0<1>:UD r0<8,8,1>:UD {align1}; +send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT}; + + + nop ; + nop ; +/* Compare three word data to get the min value */ +word_imin: + cmp.le.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1}; + (f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.0<0,1,0>:w {align1}; + (-f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.4<0,1,0>:w {align1}; + cmp.le.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w TEMP_VAR0.0<0,1,0>:w {align1}; + (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1}; + RETURN {align1}; + +/* Compare three word data to get the max value */ +word_imax: + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1}; + (f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.0<0,1,0>:w {align1}; + (-f0.0) mov (1) TEMP_VAR0.0<1>:w INPUT_ARG0.4<0,1,0>:w {align1}; + cmp.ge.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w TEMP_VAR0.0<0,1,0>:w {align1}; + (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1}; + RETURN {align1}; + +word_imedian: + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1}; + (f0.0) jmpi (1) cmp_a_ge_b; + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1}; + (f0.0) jmpi (1) cmp_end; + cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1}; + jmpi (1) cmp_end; +cmp_a_ge_b: + cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1}; + (f0.0) jmpi (1) cmp_end; + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1}; +cmp_end: + RETURN {align1}; + diff --git a/src/shaders/vme/inter_frame_gen8.g8a b/src/shaders/vme/inter_frame_gen8.g8a new file mode 100644 index 0000000..f514dd3 --- /dev/null +++ b/src/shaders/vme/inter_frame_gen8.g8a @@ -0,0 +1,2 @@ +#include "vme8.inc" +#include "inter_frame_gen8.asm" diff --git a/src/shaders/vme/inter_frame_gen8.g8b b/src/shaders/vme/inter_frame_gen8.g8b new file mode 100644 index 0000000..d0cc25d --- /dev/null +++ b/src/shaders/vme/inter_frame_gen8.g8b @@ -0,0 +1,327 @@ + { 0x00800001, 0x24000608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24400608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24800608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24c00608, 0x00000000, 0x00000000 }, + { 0x00200009, 0x24002228, 0x164500a0, 0x00040004 }, + { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 }, + { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff }, + { 0x00000001, 0x24080e08, 0x08000000, 0x0000001f }, + { 0x00000001, 0x24142288, 0x00000014, 0x00000000 }, + { 0x00200009, 0x24202228, 0x164500a0, 0x00040004 }, + { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc }, + { 0x00000001, 0x24280e08, 0x08000000, 0x000f0003 }, + { 0x00000001, 0x24342288, 0x00000014, 0x00000000 }, + { 0x00200009, 0x24482248, 0x164500a0, 0x00040004 }, + { 0x00000001, 0x24542288, 0x00000014, 0x00000000 }, + { 0x00000041, 0x24881208, 0x220000a2, 0x000000a1 }, + { 0x00000040, 0x24880208, 0x22000488, 0x000000a0 }, + { 0x00000041, 0x24880208, 0x06000488, 0x00000018 }, + { 0x00000001, 0x24942288, 0x00000014, 0x00000000 }, + { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 }, + { 0x04600031, 0x23800a88, 0x0e000800, 0x02190004 }, + { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 }, + { 0x04600031, 0x23a00a88, 0x0e000800, 0x02290004 }, + { 0x00200009, 0x24002228, 0x164500a0, 0x00030003 }, + { 0x00000041, 0x24000a28, 0x1e000400, 0x00020002 }, + { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 }, + { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff }, + { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 }, + { 0x04600031, 0x26000a88, 0x0e000800, 0x02190006 }, + { 0x00200009, 0x24202228, 0x164500a0, 0x00030003 }, + { 0x00000041, 0x24200a28, 0x1e000420, 0x00020002 }, + { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc }, + { 0x00000001, 0x24280e08, 0x08000000, 0x00070003 }, + { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 }, + { 0x04600031, 0x26200a88, 0x0e000800, 0x02190006 }, + { 0x00600001, 0x2ac00608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2a800608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20001240, 0x160000a6, 0x00040004 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000750 }, + { 0x00600001, 0x2ae00608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2b000608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2b200608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00600060 }, + { 0x00210001, 0x2af41e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000f0 }, + { 0x00000001, 0x2ae00e28, 0x08000000, 0x00000001 }, + { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, + { 0x00000040, 0x24001a68, 0x1e000400, 0xffffffff }, + { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, + { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, + { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, + { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, + { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, + { 0x00210001, 0x2af41e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 }, + { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, + { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02480403 }, + { 0x00200001, 0x2ae40208, 0x00450bc8, 0x00000000 }, + { 0x00000001, 0x2af01e68, 0x18000000, 0x00010001 }, + { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00100010 }, + { 0x00210001, 0x2b141e68, 0x18000000, 0xffffffff }, + { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000360 }, + { 0x00000001, 0x2b000e28, 0x08000000, 0x00000001 }, + { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, + { 0x00000040, 0x24021a68, 0x1e000402, 0xffffffff }, + { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, + { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, + { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, + { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, + { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, + { 0x00210001, 0x2b141e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 }, + { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, + { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02480403 }, + { 0x00200001, 0x2b040208, 0x00450bf0, 0x00000000 }, + { 0x00000001, 0x2b101e68, 0x18000000, 0x00010001 }, + { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00080008 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000110 }, + { 0x00000001, 0x2b200e28, 0x08000000, 0x00000001 }, + { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, + { 0x00000040, 0x24021a68, 0x1e000402, 0xffffffff }, + { 0x00000040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, + { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, + { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, + { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, + { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, + { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000180 }, + { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, + { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02480403 }, + { 0x00200001, 0x2b240208, 0x00450bf0, 0x00000000 }, + { 0x00000001, 0x2b301e68, 0x18000000, 0x00010001 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000130 }, + { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00040004 }, + { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000f0 }, + { 0x00000001, 0x2b200e28, 0x08000000, 0x00000001 }, + { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, + { 0x00200040, 0x24001a68, 0x1e450400, 0xffffffff }, + { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, + { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, + { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, + { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, + { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, + { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 }, + { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, + { 0x0a800031, 0x2ba00a88, 0x0e000b40, 0x02480403 }, + { 0x00200001, 0x2b240208, 0x00450c18, 0x00000000 }, + { 0x00000001, 0x2b301e68, 0x18000000, 0x00010001 }, + { 0x00000040, 0x24000a28, 0x0a000b00, 0x00000b20 }, + { 0x01000010, 0x20000a20, 0x0e000400, 0x00000000 }, + { 0x00110020, 0x34000000, 0x0e001400, 0x00000080 }, + { 0x02000010, 0x20000a20, 0x0e000ae0, 0x00000000 }, + { 0x00010001, 0x2b040208, 0x00000ae4, 0x00000000 }, + { 0x00010001, 0x2b240208, 0x00000ae4, 0x00000000 }, + { 0x00010001, 0x2b141248, 0x00000af4, 0x00000000 }, + { 0x00010001, 0x2b341248, 0x00000af4, 0x00000000 }, + { 0x00010001, 0x2ac00208, 0x00000ae4, 0x00000000 }, + { 0x00110001, 0x2ac00608, 0x00000000, 0x00000000 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000190 }, + { 0x00600001, 0x24000608, 0x00000000, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000af4, 0x00000000 }, + { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x24040208, 0x00000ae4, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000b14, 0x00000000 }, + { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x24040208, 0x00000b04, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000b34, 0x00000000 }, + { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x24040208, 0x00000b24, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x2ac00208, 0x00000404, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000c0 }, + { 0x00000001, 0x2fa01a68, 0x00000ae4, 0x00000000 }, + { 0x00000001, 0x2fa41a68, 0x00000b04, 0x00000000 }, + { 0x00000001, 0x2fa81a68, 0x00000b24, 0x00000000 }, + { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000a20 }, + { 0x00000001, 0x2ac01a68, 0x00000fe4, 0x00000000 }, + { 0x00000001, 0x2fa01a68, 0x00000ae6, 0x00000000 }, + { 0x00000001, 0x2fa41a68, 0x00000b06, 0x00000000 }, + { 0x00000001, 0x2fa81a68, 0x00000b26, 0x00000000 }, + { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x000009c0 }, + { 0x00000001, 0x2ac21a68, 0x00000fe4, 0x00000000 }, + { 0x0020000c, 0x2a801a68, 0x1e450ac0, 0x00020002 }, + { 0x00200040, 0x2a881a68, 0x1e450a80, 0x00030003 }, + { 0x00200005, 0x2a901248, 0x16450a88, 0xfffcfffc }, + { 0x00600001, 0x25600208, 0x008d0020, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, + { 0x00600001, 0x28600608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28800608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23800608, 0x00000000, 0x00000000 }, + { 0x00000005, 0x23840208, 0x06000384, 0xff000000 }, + { 0x00600001, 0x28a00208, 0x008d0380, 0x00000000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00010001 }, + { 0x00000001, 0x28a52288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28c00608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x28c02288, 0x00cf03a3, 0x00000000 }, + { 0x00000001, 0x28d00608, 0x00000000, 0x11111111 }, + { 0x00000001, 0x28dc0608, 0x00000000, 0x00010101 }, + { 0x00000001, 0x28d41248, 0x00000606, 0x00000000 }, + { 0x00400001, 0x28f00208, 0x00690608, 0x00000000 }, + { 0x00600001, 0x28e01248, 0x00ae0622, 0x00000000 }, + { 0x00000001, 0x247c1648, 0x10000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a4, 0x00010001 }, + { 0x00010001, 0x247c0e88, 0x08000000, 0x00000002 }, + { 0x00000001, 0x247d2288, 0x000000a5, 0x00000000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00200020 }, + { 0x00000001, 0x247e2288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x00000001, 0x244c0608, 0x00000000, 0x00800000 }, + { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, + { 0x0d600031, 0x21800a08, 0x0e000800, 0x10782000 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00000001, 0x28200208, 0x00000180, 0x00000000 }, + { 0x00000001, 0x28240208, 0x00000190, 0x00000000 }, + { 0x00000001, 0x28280208, 0x00000194, 0x00000000 }, + { 0x00000001, 0x282c0208, 0x00000198, 0x00000000 }, + { 0x00000001, 0x28301248, 0x0000018c, 0x00000000 }, + { 0x00000001, 0x28340208, 0x00000188, 0x00000000 }, + { 0x00000001, 0x28380208, 0x0000019c, 0x00000000 }, + { 0x00000001, 0x283c0208, 0x00000488, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 }, + { 0x01000010, 0x20002240, 0x160000a7, 0x00020002 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000160 }, + { 0x00000001, 0x28600608, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28640608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28680608, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x286c0608, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28700608, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28740608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28780608, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x287c0608, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28800608, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28840608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28880608, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x288c0608, 0x00000000, 0x000f0f0f }, + { 0x00400001, 0x28900608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x244c0608, 0x00000000, 0x00200000 }, + { 0x00000001, 0x24561648, 0x10000000, 0x28302830 }, + { 0x00000001, 0x24401e68, 0x18000000, 0xfff0fff0 }, + { 0x00000001, 0x24421e68, 0x18000000, 0xfff4fff4 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00600060 }, + { 0x00010040, 0x24401a68, 0x1e000440, 0x000c000c }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00100010 }, + { 0x00010040, 0x24421a68, 0x1e000442, 0x00080008 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x000000e0 }, + { 0x00000001, 0x28600608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28640608, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28680608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x286c0608, 0x00000000, 0x000f0f0f }, + { 0x00400001, 0x28700608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28900608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x244c0608, 0x00000000, 0x00200000 }, + { 0x00000001, 0x24561648, 0x10000000, 0x20202020 }, + { 0x00000001, 0x24401e68, 0x18000000, 0xfff8fff8 }, + { 0x00000001, 0x24421e68, 0x18000000, 0xfff8fff8 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00600060 }, + { 0x00010040, 0x24401a68, 0x1e000440, 0x00040004 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00100010 }, + { 0x00010040, 0x24421a68, 0x1e000442, 0x00040004 }, + { 0x00000001, 0x24440208, 0x00000440, 0x00000000 }, + { 0x00200040, 0x24401a68, 0x1a450440, 0x00450a90 }, + { 0x00200040, 0x24441a68, 0x1a450444, 0x00450a90 }, + { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, + { 0x00000001, 0x24600608, 0x00000000, 0x00000002 }, + { 0x00000001, 0x24642288, 0x0000009c, 0x00000000 }, + { 0x00000001, 0x24680608, 0x00000000, 0x30003030 }, + { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x00400001, 0x45800208, 0x00000ac0, 0x00000000 }, + { 0x00400001, 0x45840208, 0x00000ac0, 0x00000000 }, + { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, + { 0x00000001, 0x28800608, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28840608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28880608, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x288c0608, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28900608, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28940608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28980608, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x289c0608, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28a00608, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28a40608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28a80608, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x28ac0608, 0x00000000, 0x000f0f0f }, + { 0x00400001, 0x28b00608, 0x00000000, 0x00000000 }, + { 0x08600031, 0x21800a08, 0x0e000800, 0x0c784000 }, + { 0x00000001, 0x25740608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x25752288, 0x00000199, 0x00000000 }, + { 0x00000001, 0x25762288, 0x0000019a, 0x00000000 }, + { 0x00000005, 0x24001248, 0x16000180, 0x00030003 }, + { 0x00000001, 0x25742288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28800208, 0x008d01a0, 0x00000000 }, + { 0x00600001, 0x28a00208, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x28c00208, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x28e00208, 0x008d0200, 0x00000000 }, + { 0x00000001, 0x244c0608, 0x00000000, 0x00243000 }, + { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, + { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 }, + { 0x0d600031, 0x21800a08, 0x0e000800, 0x10786000 }, + { 0x00000040, 0x24880208, 0x06000488, 0x00000002 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00000001, 0x28200208, 0x00000180, 0x00000000 }, + { 0x00000001, 0x28240208, 0x00000198, 0x00000000 }, + { 0x00000001, 0x28280208, 0x00000188, 0x00000000 }, + { 0x00000001, 0x282c0208, 0x00000574, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0003 }, + { 0x00000040, 0x24880208, 0x06000488, 0x00000001 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d01a0, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x28600208, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x28800208, 0x008d0200, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x0a0a0403 }, + { 0x00000040, 0x24880208, 0x06000488, 0x00000008 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0240, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x0219e003 }, + { 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 }, + { 0x07800031, 0x24000a40, 0x0e000e00, 0x82000010 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x06000010, 0x20001a60, 0x1a000fa0, 0x00000fa4 }, + { 0x00010001, 0x2f601a68, 0x00000fa0, 0x00000000 }, + { 0x00110001, 0x2f601a68, 0x00000fa4, 0x00000000 }, + { 0x06000010, 0x20001a60, 0x1a000f60, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000f60, 0x00000000 }, + { 0x00110001, 0x2fe41a68, 0x00000fa8, 0x00000000 }, + { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 }, + { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa4 }, + { 0x00010001, 0x2f601a68, 0x00000fa0, 0x00000000 }, + { 0x00110001, 0x2f601a68, 0x00000fa4, 0x00000000 }, + { 0x04000010, 0x20001a60, 0x1a000f60, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000f60, 0x00000000 }, + { 0x00110001, 0x2fe41a68, 0x00000fa8, 0x00000000 }, + { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 }, + { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa4 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000070 }, + { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000fa0, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000a0 }, + { 0x04000010, 0x20001a60, 0x1a000fa4, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000fa8, 0x00000000 }, + { 0x00110001, 0x2fe41a68, 0x00000fa4, 0x00000000 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000060 }, + { 0x04000010, 0x20001a60, 0x1a000fa4, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000fa4, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000030 }, + { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000fa8, 0x00000000 }, + { 0x00110001, 0x2fe41a68, 0x00000fa0, 0x00000000 }, + { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 }, diff --git a/src/shaders/vme/inter_frame_haswell.asm b/src/shaders/vme/inter_frame_haswell.asm index 6305c3c..399125a 100644 --- a/src/shaders/vme/inter_frame_haswell.asm +++ b/src/shaders/vme/inter_frame_haswell.asm @@ -329,7 +329,7 @@ mb_mvp_start: add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1}; cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1}; (-f0.0) jmpi (1) mb_median_start; -cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 1:d {align1}; +cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 0:d {align1}; (f0.0) mov (1) mbb_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; (f0.0) mov (1) mbc_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; (f0.0) mov (1) mbb_result.20<1>:uw mba_result.20<0,1,0>:uw {align1}; @@ -475,24 +475,58 @@ send (16) {align1}; /* IME search */ -mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */ -mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */ - -mov (1) vme_m0.0<1>:UD vme_m0.8<0,1,0>:UD {align1}; +cmp.z.f0.0 (1) null<1>:uw quality_level_ub<0,1,0>:ub LOW_QUALITY_LEVEL:uw {align1}; +(f0.0) jmpi (1) __low_quality_search; -add (1) vme_m0.0<1>:W vme_m0.0<0,1,0>:W -16:W {align1}; /* Reference = (x-16,y-12)-(x+32,y+28) */ -add (1) vme_m0.2<1>:W vme_m0.2<0,1,0>:W -12:W {align1}; +__high_quality_search: +/* M3/M4 search path */ +mov (1) vme_msg_3.0<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_3.4<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_3.8<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_3.12<1>:UD 0x100F0F0F:UD {align1}; +mov (1) vme_msg_3.16<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_3.20<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_3.24<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_3.28<1>:UD 0x100F0F0F:UD {align1}; +mov (1) vme_msg_4.0<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_4.4<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_4.8<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_4.12<1>:UD 0x000F0F0F:UD {align1}; +mov (4) vme_msg_4.16<1>:UD 0x0:UD {align1}; +mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */ +mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */ mov (1) vme_m0.0<1>:W -16:W {align1}; mov (1) vme_m0.2<1>:W -12:W {align1}; -mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1}; - and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1}; (f0.0) add (1) vme_m0.0<1>:w vme_m0.0<0,1,0>:w 12:w {align1}; and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1}; (f0.0) add (1) vme_m0.2<1>:w vme_m0.2<0,1,0>:w 8:w {align1}; - + +jmpi (1) __vme_msg; + +__low_quality_search: +/* M3/M4 search path */ +mov (1) vme_msg_3.0<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_3.4<1>:UD 0x100F0F0F:UD {align1}; +mov (1) vme_msg_3.8<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_3.12<1>:UD 0x000F0F0F:UD {align1}; +mov (4) vme_msg_3.16<1>:UD 0x0:UD {align1}; +mov (8) vme_msg_4.16<1>:UD 0x0:UD {align1}; + +mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */ +mov (1) vme_m0.22<1>:UW MIN_REF_REGION_SIZE {align1}; /* Reference Width&Height, 32x32 */ +mov (1) vme_m0.0<1>:W -8:W {align1}; +mov (1) vme_m0.2<1>:W -8:W {align1}; + +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1}; +(f0.0) add (1) vme_m0.0<1>:w vme_m0.0<0,1,0>:w 4:w {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1}; +(f0.0) add (1) vme_m0.2<1>:w vme_m0.2<0,1,0>:w 4:w {align1}; + +__vme_msg: +mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1}; add (2) vme_m0.0<1>:w vme_m0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1}; add (2) vme_m0.4<1>:w vme_m0.4<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1}; mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; @@ -507,23 +541,6 @@ mov (1) vme_m1.20<1>:ud mb_mvp_ref.0<0,1,0>:ud {align1}; mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1}; -/* M3/M4 search path */ - -mov (1) vme_msg_3.0<1>:UD 0x01010101:UD {align1}; -mov (1) vme_msg_3.4<1>:UD 0x10010101:UD {align1}; -mov (1) vme_msg_3.8<1>:UD 0x0F0F0F0F:UD {align1}; -mov (1) vme_msg_3.12<1>:UD 0x100F0F0F:UD {align1}; -mov (1) vme_msg_3.16<1>:UD 0x01010101:UD {align1}; -mov (1) vme_msg_3.20<1>:UD 0x10010101:UD {align1}; -mov (1) vme_msg_3.24<1>:UD 0x0F0F0F0F:UD {align1}; -mov (1) vme_msg_3.28<1>:UD 0x100F0F0F:UD {align1}; - -mov (1) vme_msg_4.0<1>:UD 0x01010101:UD {align1}; -mov (1) vme_msg_4.4<1>:UD 0x10010101:UD {align1}; -mov (1) vme_msg_4.8<1>:UD 0x0F0F0F0F:UD {align1}; -mov (1) vme_msg_4.12<1>:UD 0x000F0F0F:UD {align1}; - -mov (4) vme_msg_4.16<1>:UD 0x0:UD {align1}; send (8) vme_msg_ind diff --git a/src/shaders/vme/inter_frame_haswell.g75b b/src/shaders/vme/inter_frame_haswell.g75b index d9d791d..1a60c51 100644 --- a/src/shaders/vme/inter_frame_haswell.g75b +++ b/src/shaders/vme/inter_frame_haswell.g75b @@ -120,7 +120,7 @@ { 0x00000040, 0x240014a5, 0x00000b00, 0x00000b20 }, { 0x01000010, 0x20001ca4, 0x00000400, 0x00000000 }, { 0x00110020, 0x34001c00, 0x00001400, 0x00000080 }, - { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000001 }, + { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000000 }, { 0x00010001, 0x2b040021, 0x00000ae4, 0x00000000 }, { 0x00010001, 0x2b240021, 0x00000ae4, 0x00000000 }, { 0x00010001, 0x2b140129, 0x00000af4, 0x00000000 }, @@ -145,13 +145,13 @@ { 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x00000850 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000930 }, { 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 }, { 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 }, { 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000007f0 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000008d0 }, { 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 }, { 0x0020000c, 0x2a803dad, 0x00450ac0, 0x00020002 }, { 0x00200040, 0x2a883dad, 0x00450a80, 0x00030003 }, @@ -192,18 +192,45 @@ { 0x00000001, 0x28380021, 0x0000019c, 0x00000000 }, { 0x00000001, 0x283c0021, 0x00000488, 0x00000000 }, { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0203 }, + { 0x01000010, 0x20002e28, 0x000000a7, 0x00020002 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000160 }, + { 0x00000001, 0x28600061, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28640061, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28680061, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x286c0061, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28700061, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28740061, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28780061, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x287c0061, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28800061, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28840061, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28880061, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x288c0061, 0x00000000, 0x000f0f0f }, + { 0x00400001, 0x28900061, 0x00000000, 0x00000000 }, { 0x00000001, 0x244c0061, 0x00000000, 0x00200000 }, { 0x00000001, 0x24560169, 0x00000000, 0x28302830 }, - { 0x00000001, 0x24400021, 0x00000448, 0x00000000 }, - { 0x00000040, 0x24403dad, 0x00000440, 0xfff0fff0 }, - { 0x00000040, 0x24423dad, 0x00000442, 0xfff4fff4 }, { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 }, { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 }, - { 0x00000001, 0x24440021, 0x00000440, 0x00000000 }, { 0x01000005, 0x20002e28, 0x000000a5, 0x00600060 }, { 0x00010040, 0x24403dad, 0x00000440, 0x000c000c }, { 0x01000005, 0x20002e28, 0x000000a5, 0x00100010 }, { 0x00010040, 0x24423dad, 0x00000442, 0x00080008 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000000e0 }, + { 0x00000001, 0x28600061, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28640061, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28680061, 0x00000000, 0x10010101 }, + { 0x00000001, 0x286c0061, 0x00000000, 0x000f0f0f }, + { 0x00400001, 0x28700061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28900061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x244c0061, 0x00000000, 0x00200000 }, + { 0x00000001, 0x24560169, 0x00000000, 0x20202020 }, + { 0x00000001, 0x244001ed, 0x00000000, 0xfff8fff8 }, + { 0x00000001, 0x244201ed, 0x00000000, 0xfff8fff8 }, + { 0x01000005, 0x20002e28, 0x000000a5, 0x00600060 }, + { 0x00010040, 0x24403dad, 0x00000440, 0x00040004 }, + { 0x01000005, 0x20002e28, 0x000000a5, 0x00100010 }, + { 0x00010040, 0x24423dad, 0x00000442, 0x00040004 }, + { 0x00000001, 0x24440021, 0x00000440, 0x00000000 }, { 0x00200040, 0x244035ad, 0x00450440, 0x00450a90 }, { 0x00200040, 0x244435ad, 0x00450444, 0x00450a90 }, { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 }, @@ -214,19 +241,6 @@ { 0x00000001, 0x24740021, 0x00000ac0, 0x00000000 }, { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 }, { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 }, - { 0x00000001, 0x28600061, 0x00000000, 0x01010101 }, - { 0x00000001, 0x28640061, 0x00000000, 0x10010101 }, - { 0x00000001, 0x28680061, 0x00000000, 0x0f0f0f0f }, - { 0x00000001, 0x286c0061, 0x00000000, 0x100f0f0f }, - { 0x00000001, 0x28700061, 0x00000000, 0x01010101 }, - { 0x00000001, 0x28740061, 0x00000000, 0x10010101 }, - { 0x00000001, 0x28780061, 0x00000000, 0x0f0f0f0f }, - { 0x00000001, 0x287c0061, 0x00000000, 0x100f0f0f }, - { 0x00000001, 0x28800061, 0x00000000, 0x01010101 }, - { 0x00000001, 0x28840061, 0x00000000, 0x10010101 }, - { 0x00000001, 0x28880061, 0x00000000, 0x0f0f0f0f }, - { 0x00000001, 0x288c0061, 0x00000000, 0x000f0f0f }, - { 0x00400001, 0x28900061, 0x00000000, 0x00000000 }, { 0x08600031, 0x21801ca1, 0x00000800, 0x0a784000 }, { 0x00000001, 0x25740061, 0x00000000, 0x00000000 }, { 0x00000001, 0x25750231, 0x00000199, 0x00000000 }, diff --git a/src/shaders/vme/inter_frame_ivb.asm b/src/shaders/vme/inter_frame_ivb.asm index b5cafdd..46f2b4b 100644 --- a/src/shaders/vme/inter_frame_ivb.asm +++ b/src/shaders/vme/inter_frame_ivb.asm @@ -323,7 +323,7 @@ mb_mvp_start: add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1}; cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1}; (-f0.0) jmpi (1) mb_median_start; -cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 1:d {align1}; +cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 0:d {align1}; (f0.0) mov (1) mbb_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; (f0.0) mov (1) mbc_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; (f0.0) mov (1) mbb_result.20<1>:uw mba_result.20<0,1,0>:uw {align1}; @@ -391,12 +391,14 @@ mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1}; /* M0 */ /* IME search */ +cmp.z.f0.0 (1) null<1>:uw quality_level_ub<0,1,0>:ub LOW_QUALITY_LEVEL:uw {align1}; +(f0.0) jmpi (1) __low_quality_search; + +__high_quality_search: mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR + SUB_PEL_MODE_QUARTER:UD {align1}; /* 16x16 Source, 1/4 pixel, harr */ mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */ -mov (1) vme_m0.0<1>:UD vme_m0.8<0,1,0>:UD {align1}; - mov (1) vme_m0.0<1>:W -16:W {align1}; mov (1) vme_m0.2<1>:W -12:W {align1}; @@ -405,6 +407,22 @@ and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1}; (f0.0) add (1) vme_m0.2<1>:w vme_m0.2<0,1,0>:w 8:w {align1}; +jmpi __vme_msg; + +__low_quality_search: +mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR + SUB_PEL_MODE_HALF:UD {align1}; +/* 16x16 Source, 1/2 pixel, harr */ +mov (1) vme_m0.22<1>:UW MIN_REF_REGION_SIZE {align1}; /* Reference Width&Height, 32x32 */ + +mov (1) vme_m0.0<1>:W -8:W {align1}; +mov (1) vme_m0.2<1>:W -8:W {align1}; + +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1}; +(f0.0) add (1) vme_m0.0<1>:w vme_m0.0<0,1,0>:w 4:w {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1}; +(f0.0) add (1) vme_m0.2<1>:w vme_m0.2<0,1,0>:w 4:w {align1}; + +__vme_msg: mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1}; add (2) vme_m0.0<1>:w vme_m0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1}; add (2) vme_m0.4<1>:w vme_m0.4<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1}; diff --git a/src/shaders/vme/inter_frame_ivb.g7b b/src/shaders/vme/inter_frame_ivb.g7b index 1bb41b2..7ed38c5 100644 --- a/src/shaders/vme/inter_frame_ivb.g7b +++ b/src/shaders/vme/inter_frame_ivb.g7b @@ -116,7 +116,7 @@ { 0x00000040, 0x240014a5, 0x00000b00, 0x00000b20 }, { 0x01000010, 0x20001ca4, 0x00000400, 0x00000000 }, { 0x00110020, 0x34001c00, 0x00001400, 0x00000010 }, - { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000001 }, + { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000000 }, { 0x00010001, 0x2b040021, 0x00000ae4, 0x00000000 }, { 0x00010001, 0x2b240021, 0x00000ae4, 0x00000000 }, { 0x00010001, 0x2b140129, 0x00000af4, 0x00000000 }, @@ -141,13 +141,13 @@ { 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000000bc }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000000d0 }, { 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 }, { 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 }, { 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000000b0 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000000c4 }, { 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 }, { 0x0020000c, 0x2a803dad, 0x00450ac0, 0x00020002 }, { 0x00200040, 0x2a883dad, 0x00450a80, 0x00030003 }, @@ -163,15 +163,25 @@ { 0x01000005, 0x20002e28, 0x000000a4, 0x00010001 }, { 0x00010001, 0x247c0171, 0x00000000, 0x00020002 }, { 0x00000001, 0x247d0231, 0x000000a5, 0x00000000 }, + { 0x01000010, 0x20002e28, 0x000000a7, 0x00020002 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000012 }, { 0x00000001, 0x244c0061, 0x00000000, 0x00203000 }, { 0x00000001, 0x24560169, 0x00000000, 0x28302830 }, - { 0x00000001, 0x24400021, 0x00000448, 0x00000000 }, { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 }, { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 }, { 0x01000005, 0x20002e28, 0x000000a5, 0x00600060 }, { 0x00010040, 0x24403dad, 0x00000440, 0x000c000c }, { 0x01000005, 0x20002e28, 0x000000a5, 0x00100010 }, { 0x00010040, 0x24423dad, 0x00000442, 0x00080008 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000010 }, + { 0x00000001, 0x244c0061, 0x00000000, 0x00201000 }, + { 0x00000001, 0x24560169, 0x00000000, 0x20202020 }, + { 0x00000001, 0x244001ed, 0x00000000, 0xfff8fff8 }, + { 0x00000001, 0x244201ed, 0x00000000, 0xfff8fff8 }, + { 0x01000005, 0x20002e28, 0x000000a5, 0x00600060 }, + { 0x00010040, 0x24403dad, 0x00000440, 0x00040004 }, + { 0x01000005, 0x20002e28, 0x000000a5, 0x00100010 }, + { 0x00010040, 0x24423dad, 0x00000442, 0x00040004 }, { 0x00000001, 0x24440021, 0x00000440, 0x00000000 }, { 0x00200040, 0x244035ad, 0x00450440, 0x00450a90 }, { 0x00200040, 0x244435ad, 0x00450444, 0x00450a90 }, diff --git a/src/shaders/vme/intra_frame_gen8.asm b/src/shaders/vme/intra_frame_gen8.asm new file mode 100644 index 0000000..682d146 --- /dev/null +++ b/src/shaders/vme/intra_frame_gen8.asm @@ -0,0 +1,185 @@ +/* + * Copyright © <2010>, Intel Corporation. + * + * This program is licensed under the terms and conditions of the + * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at + * http://www.opensource.org/licenses/eclipse-1.0.php. + * + */ +// Modual name: IntraFrame_gen8.asm +// +// Make intra predition estimation for Intra frame on Gen8 +// + +// +// Now, begin source code.... +// + +/* + * __START + */ +__INTRA_START: +mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1}; +mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1}; +mov (16) tmp_reg4.0<1>:UD 0x0:UD {align1} ; +mov (16) tmp_reg6.0<1>:UD 0x0:UD {align1} ; + +shl (2) read0_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ +add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D -8:W {align1}; /* X offset */ +add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D -1:W {align1}; /* Y offset */ +mov (1) read0_header.8<1>:UD BLOCK_32X1 {align1}; +mov (1) read0_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +shl (2) read1_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ +add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D -4:W {align1}; /* X offset */ +mov (1) read1_header.8<1>:UD BLOCK_4X16 {align1}; +mov (1) read1_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ +mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +mul (1) obw_m0.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1}; +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1}; +mul (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x02:UD {align1}; +mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* + * Media Read Message -- fetch Luma neighbor edge pixels + */ +/* ROW */ +mov (8) msg_reg0.0<1>:UD read0_header.0<8,8,1>:UD {align1}; +send (8) msg_ind INEP_ROW<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1}; + +/* COL */ +mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1}; +send (8) msg_ind INEP_COL0<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1}; + +/* + * Media Read Message -- fetch Chroma neighbor edge pixels + */ +/* ROW */ +shl (2) read0_header.0<1>:D orig_xy_ub<2,2,1>:UB 3:UW {align1}; /* x * 16 , y * 8 */ +mul (1) read0_header.0<1>:D read0_header.0<0,1,0>:D 2:W {align1}; +add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D -8:W {align1}; /* X offset */ +add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D -1:W {align1}; /* Y offset */ +mov (8) msg_reg0.0<1>:UD read0_header.0<8,8,1>:UD {align1}; +send (8) msg_ind CHROMA_ROW<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1}; + +/* COL */ +shl (2) read1_header.0<1>:D orig_xy_ub<2,2,1>:UB 3:UW {align1}; /* x * 16, y * 8 */ +mul (1) read1_header.0<1>:D read1_header.0<0,1,0>:D 2:W {align1}; +add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D -4:W {align1}; /* X offset */ +mov (1) read1_header.8<1>:UD BLOCK_8X4 {align1}; +mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1}; +send (8) msg_ind CHROMA_COL<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1}; + +/* m2, get the MV/Mb cost passed by constant buffer +when creating EU thread by MEDIA_OBJECT */ +mov (8) vme_msg_2<1>:UD r1.0<8,8,1>:UD {align1}; + +/* m3. This is changed for FWD/BWD cost center */ +mov (8) vme_msg_3<1>:UD 0x0:UD {align1}; + +/* m4.*/ +mov (8) vme_msg_4<1>:ud 0x0:ud {align1}; + +/* m5 */ +mov (1) INEP_ROW.0<1>:UD 0x0:UD {align1}; +and (1) INEP_ROW.4<1>:UD INEP_ROW.4<0,1,0>:UD 0xFF000000:UD {align1}; +mov (8) vme_msg_5<1>:UD INEP_ROW.0<8,8,1>:UD {align1}; + +mov (1) tmp_reg0.0<1>:UW LUMA_CHROMA_MODE:UW {align1}; +/* Use the Luma mode */ +mov (1) vme_msg_5.5<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; + +/* m6 */ +mov (8) vme_msg_6<1>:UD 0x0:UD {align1}; +mov (16) vme_msg_6.0<1>:UB INEP_COL0.3<32,8,4>:UB {align1}; +mov (1) vme_msg_6.16<1>:UD INTRA_PREDICTORE_MODE {align1}; + +/* the penalty for Intra mode */ +mov (1) vme_msg_6.28<1>:UD 0x010101:UD {align1}; +mov (1) vme_msg_6.20<1>:UW CHROMA_ROW.6<0,1,0>:UW {align1}; + + +/* m7 */ + +mov (4) vme_msg_7.16<1>:UD CHROMA_ROW.8<4,4,1>:UD {align1}; +mov (8) vme_msg_7.0<1>:UW CHROMA_COL.2<16,8,2>:UW {align1}; + +/* + * VME message + */ + +/* m1 */ +mov (1) intra_flag<1>:UW 0x0:UW {align1} ; +and.z.f0.0 (1) null<1>:UW transform_8x8_ub<0,1,0>:UB 1:UW {align1}; +(f0.0) mov (1) intra_part_mask_ub<1>:UB LUMA_INTRA_8x8_DISABLE {align1}; + +/* assign MB intra struct from the thread payload*/ +mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1}; + +/* Disable DC HAAR component when calculating HARR SATD block */ +mov (1) tmp_reg0.0<1>:UW DC_HARR_DISABLE:UW {align1}; +mov (1) vme_m1.30<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; + +mov (8) vme_msg_1<1>:UD vme_m1.0<8,8,1>:UD {align1}; +/* m0 */ +/* 16x16 Source, Intra_harr */ +add (1) vme_m0.12<1>:UD vme_m0.12<0,1,0>:ud INTRA_SAD_HAAR:UD {align1}; +mov (8) vme_msg_0<1>:UD vme_m0.0<8,8,1>:UD {align1}; + +/* after verification it will be passed by using payload */ +send (8) + vme_msg_ind + vme_wb<1>:UD + null + cre( + BIND_IDX_VME, + VME_SIC_MESSAGE_TYPE + ) + mlen sic_vme_msg_length + rlen vme_wb_length + {align1}; +/* + * Oword Block Write message + */ +mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; + +mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1}; +mov (1) msg_reg1.4<1>:UD vme_wb.16<0,1,0>:UD {align1}; +mov (1) msg_reg1.8<1>:UD vme_wb.20<0,1,0>:UD {align1}; +mov (1) msg_reg1.12<1>:UD vme_wb.24<0,1,0>:UD {align1}; + +/* Distortion, Intra (17-16), */ +mov (1) msg_reg1.16<1>:UW vme_wb.12<0,1,0>:UW {align1}; + +mov (1) msg_reg1.20<1>:UD vme_wb.8<0,1,0>:UD {align1}; +/* VME clock counts */ +mov (1) msg_reg1.24<1>:UD vme_wb.28<0,1,0>:UD {align1}; + +mov (1) msg_reg1.28<1>:UD obw_m0.8<0,1,0>:UD {align1}; + +/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +__EXIT: +/* + * kill thread + */ +mov (8) ts_msg_reg0<1>:UD r0<8,8,1>:UD {align1}; +send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT}; diff --git a/src/shaders/vme/intra_frame_gen8.g8a b/src/shaders/vme/intra_frame_gen8.g8a new file mode 100644 index 0000000..859c72c --- /dev/null +++ b/src/shaders/vme/intra_frame_gen8.g8a @@ -0,0 +1,2 @@ +#include "vme8.inc" +#include "intra_frame_gen8.asm" diff --git a/src/shaders/vme/intra_frame_gen8.g8b b/src/shaders/vme/intra_frame_gen8.g8b new file mode 100644 index 0000000..56c7283 --- /dev/null +++ b/src/shaders/vme/intra_frame_gen8.g8b @@ -0,0 +1,72 @@ + { 0x00800001, 0x24000608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24400608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24800608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24c00608, 0x00000000, 0x00000000 }, + { 0x00200009, 0x24002228, 0x164500a0, 0x00040004 }, + { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 }, + { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff }, + { 0x00000001, 0x24080e08, 0x08000000, 0x0000001f }, + { 0x00000001, 0x24142288, 0x00000014, 0x00000000 }, + { 0x00200009, 0x24202228, 0x164500a0, 0x00040004 }, + { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc }, + { 0x00000001, 0x24280e08, 0x08000000, 0x000f0003 }, + { 0x00000001, 0x24342288, 0x00000014, 0x00000000 }, + { 0x00200009, 0x24482248, 0x164500a0, 0x00040004 }, + { 0x00000001, 0x24542288, 0x00000014, 0x00000000 }, + { 0x00000041, 0x24881208, 0x220000a2, 0x000000a1 }, + { 0x00000040, 0x24880208, 0x22000488, 0x000000a0 }, + { 0x00000041, 0x24880208, 0x06000488, 0x00000002 }, + { 0x00000001, 0x24942288, 0x00000014, 0x00000000 }, + { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 }, + { 0x04600031, 0x23800a88, 0x0e000800, 0x02190004 }, + { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 }, + { 0x04600031, 0x23a00a88, 0x0e000800, 0x02290004 }, + { 0x00200009, 0x24002228, 0x164500a0, 0x00030003 }, + { 0x00000041, 0x24000a28, 0x1e000400, 0x00020002 }, + { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 }, + { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff }, + { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 }, + { 0x04600031, 0x26000a88, 0x0e000800, 0x02190006 }, + { 0x00200009, 0x24202228, 0x164500a0, 0x00030003 }, + { 0x00000041, 0x24200a28, 0x1e000420, 0x00020002 }, + { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc }, + { 0x00000001, 0x24280e08, 0x08000000, 0x00070003 }, + { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 }, + { 0x04600031, 0x26200a88, 0x0e000800, 0x02190006 }, + { 0x00600001, 0x28400208, 0x008d0020, 0x00000000 }, + { 0x00600001, 0x28600608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28800608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23800608, 0x00000000, 0x00000000 }, + { 0x00000005, 0x23840208, 0x06000384, 0xff000000 }, + { 0x00600001, 0x28a00208, 0x008d0380, 0x00000000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00000000 }, + { 0x00000001, 0x28a52288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28c00608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x28c02288, 0x00cf03a3, 0x00000000 }, + { 0x00000001, 0x28d00608, 0x00000000, 0x11111111 }, + { 0x00000001, 0x28dc0608, 0x00000000, 0x00010101 }, + { 0x00000001, 0x28d41248, 0x00000606, 0x00000000 }, + { 0x00400001, 0x28f00208, 0x00690608, 0x00000000 }, + { 0x00600001, 0x28e01248, 0x00ae0622, 0x00000000 }, + { 0x00000001, 0x247c1648, 0x10000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a4, 0x00010001 }, + { 0x00010001, 0x247c0e88, 0x08000000, 0x00000002 }, + { 0x00000001, 0x247d2288, 0x000000a5, 0x00000000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00200020 }, + { 0x00000001, 0x247e2288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x00000040, 0x244c0208, 0x0600044c, 0x00800000 }, + { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, + { 0x0d600031, 0x21800a08, 0x0e000800, 0x10782000 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00000001, 0x28200208, 0x00000180, 0x00000000 }, + { 0x00000001, 0x28240208, 0x00000190, 0x00000000 }, + { 0x00000001, 0x28280208, 0x00000194, 0x00000000 }, + { 0x00000001, 0x282c0208, 0x00000198, 0x00000000 }, + { 0x00000001, 0x28301248, 0x0000018c, 0x00000000 }, + { 0x00000001, 0x28340208, 0x00000188, 0x00000000 }, + { 0x00000001, 0x28380208, 0x0000019c, 0x00000000 }, + { 0x00000001, 0x283c0208, 0x00000488, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 }, + { 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 }, + { 0x07800031, 0x24000a40, 0x0e000e00, 0x82000010 }, diff --git a/src/shaders/vme/mpeg2_inter_frame.g7a b/src/shaders/vme/mpeg2_inter_frame.g7a deleted file mode 100644 index 937ea9b..0000000 --- a/src/shaders/vme/mpeg2_inter_frame.g7a +++ /dev/null @@ -1,3 +0,0 @@ -#include "vme.inc" -#include "vme7_mpeg2.inc" -#include "inter_frame.asm" diff --git a/src/shaders/vme/mpeg2_inter_frame.g7b b/src/shaders/vme/mpeg2_inter_frame.g7b deleted file mode 100644 index 40aeb3f..0000000 --- a/src/shaders/vme/mpeg2_inter_frame.g7b +++ /dev/null @@ -1,105 +0,0 @@ - { 0x00800001, 0x24000061, 0x00000000, 0x00000000 }, - { 0x00800001, 0x24400061, 0x00000000, 0x00000000 }, - { 0x00800001, 0x24600061, 0x00000000, 0x00000000 }, - { 0x00200009, 0x24002e25, 0x004500a0, 0x00040004 }, - { 0x00000040, 0x24003ca5, 0x00000400, 0xfff8fff8 }, - { 0x00000040, 0x24043ca5, 0x00000404, 0xffffffff }, - { 0x00000001, 0x240800e1, 0x00000000, 0x0000001f }, - { 0x00000001, 0x24140231, 0x00000014, 0x00000000 }, - { 0x00200009, 0x24202e25, 0x004500a0, 0x00040004 }, - { 0x00000040, 0x24203ca5, 0x00000420, 0xfffcfffc }, - { 0x00000001, 0x242800e1, 0x00000000, 0x000f0003 }, - { 0x00000001, 0x24340231, 0x00000014, 0x00000000 }, - { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 }, - { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 }, - { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 }, - { 0x00000001, 0x244c0061, 0x00000000, 0x7e203000 }, - { 0x00000001, 0x24540231, 0x00000014, 0x00000000 }, - { 0x00000001, 0x24560169, 0x00000000, 0x28302830 }, - { 0x00000001, 0x24600061, 0x00000000, 0x00000002 }, - { 0x00000001, 0x24640061, 0x00000000, 0x40000000 }, - { 0x00000001, 0x24640231, 0x0000009c, 0x00000000 }, - { 0x00000001, 0x24680061, 0x00000000, 0x30003030 }, - { 0x00000041, 0x24884521, 0x000000a2, 0x000000a1 }, - { 0x00000040, 0x24884421, 0x00000488, 0x000000a0 }, - { 0x00000041, 0x24880c21, 0x00000488, 0x0000000a }, - { 0x00000001, 0x24940231, 0x00000014, 0x00000000 }, - { 0x00600001, 0x28000021, 0x008d0400, 0x00000000 }, - { 0x04600031, 0x22401cb1, 0x00000800, 0x02190004 }, - { 0x00600001, 0x28000021, 0x008d0420, 0x00000000 }, - { 0x04600031, 0x22801cb1, 0x00000800, 0x02290004 }, - { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 }, - { 0x00000001, 0x247c0169, 0x00000000, 0x00000000 }, - { 0x01000005, 0x20002e28, 0x000000a4, 0x00010001 }, - { 0x00010001, 0x247c00f1, 0x00000000, 0x00000002 }, - { 0x02000010, 0x20002e28, 0x000000a0, 0x00000000 }, - { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000060 }, - { 0x02000010, 0x20002e28, 0x000000a1, 0x00000000 }, - { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000010 }, - { 0x02000041, 0x20004628, 0x000000a0, 0x000000a1 }, - { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000004 }, - { 0x00000040, 0x25202e2d, 0x000000a0, 0x00010001 }, - { 0x00000040, 0x2520352d, 0x000000a2, 0x00004520 }, - { 0x02000041, 0x200045a0, 0x00000520, 0x000000a1 }, - { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000008 }, - { 0x02000005, 0x20002e28, 0x000000a4, 0x00020002 }, - { 0x00010005, 0x247d1e31, 0x0000047d, 0x000000e0 }, - { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 }, - { 0x00600001, 0x28400061, 0x00000000, 0x00000000 }, - { 0x00000001, 0x22400061, 0x00000000, 0x00000000 }, - { 0x00000005, 0x22440c21, 0x00000244, 0xff000000 }, - { 0x00600001, 0x28600021, 0x008d0240, 0x00000000 }, - { 0x00600001, 0x288000e1, 0x00000000, 0x00000000 }, - { 0x00800001, 0x28800231, 0x00cf0283, 0x00000000 }, - { 0x00000001, 0x28900061, 0x00000000, 0x11111111 }, - { 0x08600031, 0x21801cbd, 0x00000800, 0x0a686000 }, - { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 }, - { 0x00600001, 0x24a00021, 0x008d01a0, 0x00000000 }, - { 0x00600001, 0x24c00021, 0x008d01c0, 0x00000000 }, - { 0x00600001, 0x24e00021, 0x008d01e0, 0x00000000 }, - { 0x00600001, 0x25000021, 0x008d0200, 0x00000000 }, - { 0x00600001, 0x28200021, 0x008d04a0, 0x00000000 }, - { 0x00600001, 0x28400021, 0x008d04c0, 0x00000000 }, - { 0x00600001, 0x28600021, 0x008d04e0, 0x00000000 }, - { 0x00600001, 0x28800021, 0x008d0500, 0x00000000 }, - { 0x0a800031, 0x20001cac, 0x00000800, 0x0a0a0403 }, - { 0x00000040, 0x28080c21, 0x00000488, 0x00000008 }, - { 0x01000005, 0x20000c20, 0x00000180, 0x00002000 }, - { 0x00110020, 0x34001c00, 0x00001400, 0x0000001c }, - { 0x00000001, 0x25420169, 0x00000000, 0x00000000 }, - { 0x00000001, 0x25440061, 0x00000000, 0x00000000 }, - { 0x00010005, 0x25422d29, 0x00000182, 0x00200020 }, - { 0x00010008, 0x25422d29, 0x00200542, 0x00050005 }, - { 0x00010041, 0x25442d21, 0x00000542, 0x00600060 }, - { 0x00010040, 0x25442c21, 0x00000544, 0x00200020 }, - { 0x00010009, 0x25422d29, 0x00000542, 0x00050005 }, - { 0x00010040, 0x25422d29, 0x00000542, 0x00400040 }, - { 0x00000040, 0x25422d29, 0x00000542, 0x000e000e }, - { 0x00000001, 0x28200129, 0x00000180, 0x00000000 }, - { 0x00000001, 0x28220129, 0x00000542, 0x00000000 }, - { 0x00000001, 0x28240021, 0x0000019c, 0x00000000 }, - { 0x00000001, 0x28280021, 0x00000544, 0x00000000 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x00000008 }, - { 0x00000001, 0x28200021, 0x00000180, 0x00000000 }, - { 0x00000001, 0x28240021, 0x00000190, 0x00000000 }, - { 0x00000001, 0x28280021, 0x00000194, 0x00000000 }, - { 0x00000001, 0x282c0021, 0x00000198, 0x00000000 }, - { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0003 }, - { 0x00000040, 0x20a02e31, 0x000000a0, 0x00010001 }, - { 0x00000040, 0x24482d29, 0x00000448, 0x00100010 }, - { 0x01000010, 0x20004528, 0x000000a2, 0x000000a0 }, - { 0x00010001, 0x20a00171, 0x00000000, 0x00000000 }, - { 0x00010040, 0x20a12e31, 0x000000a1, 0x00010001 }, - { 0x00010001, 0x24480169, 0x00000000, 0x00000000 }, - { 0x00010040, 0x244a2d29, 0x0000044a, 0x00100010 }, - { 0x00200009, 0x24002e25, 0x004500a0, 0x00040004 }, - { 0x00000040, 0x24003ca5, 0x00000400, 0xfff8fff8 }, - { 0x00000040, 0x24043ca5, 0x00000404, 0xffffffff }, - { 0x00200009, 0x24202e25, 0x004500a0, 0x00040004 }, - { 0x00000040, 0x24203ca5, 0x00000420, 0xfffcfffc }, - { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 }, - { 0x00000040, 0x24882c21, 0x00000488, 0x000a000a }, - { 0x01000040, 0x20a63dad, 0x020000a6, 0xffffffff }, - { 0x00110020, 0x34001c00, 0x02001400, 0xffffff66 }, - { 0x00600001, 0x28000021, 0x008d0000, 0x00000000 }, - { 0x07800031, 0x24001ca8, 0x00000800, 0x82000010 }, diff --git a/src/shaders/vme/mpeg2_inter_gen8.asm b/src/shaders/vme/mpeg2_inter_gen8.asm new file mode 100644 index 0000000..6dd8599 --- /dev/null +++ b/src/shaders/vme/mpeg2_inter_gen8.asm @@ -0,0 +1,868 @@ +/* + * Copyright © <2013>, Intel Corporation. + * + * This program is licensed under the terms and conditions of the + * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at + * http://www.opensource.org/licenses/eclipse-1.0.php. + * + */ +// Modual name: mpeg2_inter_gen8.asm +// +// Make inter predition estimation for MPEG2 Inter-frame on gen8 +// + +// +// Now, begin source code.... +// + +#define SAVE_RET add (1) RETURN_REG<1>:ud ip:ud 32:ud +#define RETURN mov (1) ip:ud RETURN_REG<0,1,0>:ud + +/* + * __START + */ +__INTER_START: +mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1}; +mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1}; +mov (16) tmp_reg4.0<1>:UD 0x0:UD {align1} ; +mov (16) tmp_reg6.0<1>:UD 0x0:UD {align1} ; + + +shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ +mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +mul (1) obw_m0.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1}; +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1}; +mul (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 24:UD {align1}; +mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + + +shl (2) pic_ref.0<1>:uw r4.24<2,2,1>:uw 4:uw {align1}; +mov (2) pic_ref.16<1>:uw r4.20<2,2,1>:uw {align1}; +mov (8) mb_mvp_ref.0<1>:ud 0:ud {align1}; +mov (8) mb_ref_win.0<1>:ud 0:ud {align1}; +mov (8) mba_result.0<1>:ud 0x0:ud {align1}; +mov (8) mbb_result.0<1>:ud 0x0:ud {align1}; +mov (8) mbc_result.0<1>:ud 0x0:ud {align1}; + +and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1}; +(f0.0) jmpi (1) __mb_hwdep_end; +/* read back the data for MB A */ +/* the layout of MB result is: rx.0(Available). rx.4(MVa), rX.8(MVb), rX.16(Pred_L0 flag), +* rX.18 (Pred_L1 flag), rX.20(Forward reference ID), rX.22(Backwared reference ID) +*/ +mba_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1}; +/* MB A doesn't exist. Zero MV. mba_flag is zero and ref ID = -1 */ +(f0.0) mov (2) mba_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mbb_start; +mov (1) mba_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w -1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mba_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mbb_start; + +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB A */ +/* bind index 3, read 2 oword (32 bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_2, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; +/* TODO: RefID is required after multi-references are added */ +/* MV */ +mov (2) mba_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1}; +mov (1) mba_result.16<1>:w MB_PRED_FLAG {align1}; + +mbb_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1}; +/* MB B doesn't exist. Zero MV. mba_flag is zero */ +/* If MB B doesn't exist, neither MB C nor D exists */ +(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; +mov (1) mbb_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mbc_start; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB B */ +/* bind index 3, read 2 oword (32bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_2, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; +/* TODO: RefID is required after multi-references are added */ +mov (2) mbb_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1}; +mov (1) mbb_result.16<1>:w MB_PRED_FLAG {align1}; + +mbc_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_C:uw {align1}; +/* MB C doesn't exist. Zero MV. mba_flag is zero */ +/* Based on h264 spec the MB D will be replaced if MB C doesn't exist */ +(f0.0) jmpi (1) mbd_start; +mov (1) mbc_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1}; +add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB C */ +/* bind index 3, read 2 oword (32bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_2, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; +/* TODO: RefID is required after multi-references are added */ +/* Forward MV */ +mov (2) mbc_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1}; +mov (1) mbc_result.16<1>:w MB_PRED_FLAG {align1}; + +jmpi (1) mb_mvp_start; +mbd_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_D:uw {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; +mov (1) mbc_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (2) tmp_reg0.0<1>:w tmp_reg0.0<2,2,1>:w -1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; + +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB D */ +/* bind index 3, read 2 oword (32bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ub + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_2, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; + +/* TODO: RefID is required after multi-references are added */ + +/* Forward MV */ +mov (2) mbc_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1}; +mov (1) mbc_result.16<1>:w MB_PRED_FLAG {align1}; + +mb_mvp_start: +/*TODO: Add the skip prediction */ +/* Check whether both MB B and C are inavailable */ +add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1}; +cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1}; +(-f0.0) jmpi (1) mb_median_start; +cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 0:d {align1}; +(f0.0) mov (1) mbb_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; +(f0.0) mov (1) mbc_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; +(f0.0) mov (1) mbb_result.20<1>:uw mba_result.20<0,1,0>:uw {align1}; +(f0.0) mov (1) mbc_result.20<1>:uw mba_result.20<0,1,0>:uw {align1}; +(f0.0) mov (1) mb_mvp_ref.0<1>:ud mba_result.4<0,1,0>:ud {align1}; +(-f0.0) mov (1) mb_mvp_ref.0<1>:ud 0:ud {align1}; +jmpi (1) __mb_hwdep_end; + +mb_median_start: +/* check whether only one neighbour MB has the same ref ID with the current MB */ +mov (8) tmp_reg0.0<1>:ud 0:ud {align1}; +cmp.z.f0.0 (1) null:d mba_result.20<0,1,0>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mba_result.4<0,1,0>:ud {align1}; +cmp.z.f0.0 (1) null:d mbb_result.20<0,1,0>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mbb_result.4<0,1,0>:ud {align1}; +cmp.z.f0.0 (1) null:d mbc_result.20<0,1,0>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mbc_result.4<0,1,0>:ud {align1}; +cmp.e.f0.0 (1) null:d tmp_reg0.0<0,1,0>:w 1:w {align1}; +(f0.0) mov (1) mb_mvp_ref.0<1>:ud tmp_reg0.4<0,1,0>:ud {align1}; +(f0.0) jmpi (1) __mb_hwdep_end; + +mov (1) INPUT_ARG0.0<1>:w mba_result.4<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.4<1>:w mbb_result.4<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.8<1>:w mbc_result.4<0,1,0>:w {align1}; +SAVE_RET {align1}; + jmpi (1) word_imedian; +mov (1) mb_mvp_ref.0<1>:w RET_ARG<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.0<1>:w mba_result.6<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.4<1>:w mbb_result.6<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.8<1>:w mbc_result.6<0,1,0>:w {align1}; +SAVE_RET {align1}; +jmpi (1) word_imedian; +mov (1) mb_mvp_ref.2<1>:w RET_ARG<0,1,0>:w {align1}; + +__mb_hwdep_end: + +mov (2) mv_cc_ref.0<1>:w mba_result.4<2,2,1>:w {align1}; + +/* Calibrate the ref window for MPEG2 */ +mov (1) vme_m0.0<1>:W -16:W {align1}; +mov (1) vme_m0.2<1>:W -12:W {align1}; + +mov (1) INPUT_ARG0.0<1>:ud vme_m0.0<0,1,0>:ud {align1}; +mov (1) INPUT_ARG0.8<1>:ud vme_m0.8<0,1,0>:ud {align1}; +mov (8) INPUT_ARG1.0<1>:ud pic_ref.0<8,8,1>:ud {align1}; + +SAVE_RET {align1}; +jmpi (1) ref_boundary_check; +mov (2) vme_m0.0<1>:w RET_ARG<2,2,1>:w {align1}; + +/* m2, get the MV/Mb cost passed from constant buffer when +spawning thread by MEDIA_OBJECT */ +mov (8) vme_m2<1>:UD r1.0<8,8,1>:UD {align1}; + +mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1}; + +/* m3 FWD/BWD cost center*/ +mov (8) vme_msg_3<1>:UD 0x0:UD {align1}; + +/* m4 skip center*/ +mov (8) vme_msg_4<1>:UD 0x0:UD {align1}; + +/* m5 */ +mov (8) vme_msg_5<1>:UD 0x0:UD {align1}; + + +/* Use the Luma mode */ +mov (1) tmp_reg0.0<1>:UW LUMA_INTRA_MODE:UW {align1}; +mov (1) vme_msg_5.5<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; + +mov (1) tmp_reg0.0<1>:UW INTRA16_DC_PRED:UW {align1}; +mov (1) vme_msg_5.4<1>:ub tmp_reg0.0<0,1,0>:UB {align1}; + +/* m6 */ +mov (8) vme_msg_6<1>:UD 0x0:UD {align1}; +mov (1) vme_msg_6.16<1>:UD INTRA_PREDICTORE_MODE {align1}; + +/* the penalty for Intra mode */ +mov (1) vme_msg_6.28<1>:UD 0x010101:UD {align1}; + + +/* m7 */ + +mov (8) vme_msg_7.0<1>:ud 0x0:ud {align1}; + +/* + * SIC VME message + */ + +/* Disable Intra8x8/Intra4x4 Intra-prediction */ +/* m1 */ +mov (8) vme_m1.0<1>:ud 0x0:UD {align1}; + +mov (1) intra_flag<1>:UW 0x0:UW {align1} ; +mov (1) tmp_reg0.0<1>:uw LUMA_INTRA_8x8_DISABLE:uw {align1}; +add (1) tmp_reg0.0<1>:uw tmp_reg0.0<0,1,0>:uw LUMA_INTRA_4x4_DISABLE:uw {align1}; +mov (1) intra_part_mask_ub<1>:UB tmp_reg0.0<0,1,0>:ub {align1}; + +/* assign MB intra struct from the thread payload*/ +mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1}; + +/* Enable DC HAAR component when calculating HARR SATD block */ +mov (1) tmp_reg0.0<1>:UW DC_HARR_ENABLE:UW {align1}; +mov (1) vme_m1.30<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; +mov (8) vme_msg_1<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +/* m0 */ +mov (1) vme_m0.12<1>:UD INTRA_SAD_HAAR:UD {align1}; /* 16x16 Source, Intra_harr */ +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; + +/* after verification it will be passed by using payload */ +send (8) + vme_msg_ind + vme_wb<1>:UD + null + cre( + BIND_IDX_VME, + VME_SIC_MESSAGE_TYPE + ) + mlen sic_vme_msg_length + rlen vme_wb_length + {align1}; +/* + * Oword Block Write message + */ +mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; + +mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1}; +mov (1) msg_reg1.4<1>:UD vme_wb.16<0,1,0>:UD {align1}; +mov (1) msg_reg1.8<1>:UD vme_wb.20<0,1,0>:UD {align1}; +mov (1) msg_reg1.12<1>:UD vme_wb.24<0,1,0>:UD {align1}; + +/* Distortion, Intra (17-16), */ +mov (1) msg_reg1.16<1>:UW vme_wb.12<0,1,0>:UW {align1}; + +mov (1) msg_reg1.20<1>:UD vme_wb.8<0,1,0>:UD {align1}; +/* VME clock counts */ +mov (1) msg_reg1.24<1>:UD vme_wb.28<0,1,0>:UD {align1}; + +mov (1) msg_reg1.28<1>:UD obw_m0.8<0,1,0>:UD {align1}; + +/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +/* IME search */ +mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */ +mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */ + +mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1}; + +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; + +mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ; +/* the Max MV number is passed by constant buffer */ +mov (1) vme_m1.4<1>:UB r4.28<0,1,0>:UB {align1}; +mov (1) vme_m1.8<1>:UD START_CENTER + SEARCH_PATH_LEN:UD {align1}; +mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +/* Setup the Cost center */ +/* currently four 8x8 share the same cost center */ +mov (4) vme_m3.0<2>:ud mv_cc_ref.0<0,1,0>:ud {align1}; +mov (4) vme_m3.4<2>:ud mv_cc_ref.0<0,1,0>:ud {align1}; + +mov (8) vme_msg_3<1>:UD vme_m3.0<8,8,1>:UD {align1}; +mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1}; + +/* M4/M5 search path */ +mov (1) vme_msg_4.0<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_4.4<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_4.8<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_4.12<1>:UD 0x100F0F0F:UD {align1}; +mov (1) vme_msg_4.16<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_4.20<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_4.24<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_4.28<1>:UD 0x100F0F0F:UD {align1}; + +mov (1) vme_msg_5.0<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_5.4<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_5.8<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_5.12<1>:UD 0x000F0F0F:UD {align1}; + +mov (4) vme_msg_5.16<1>:UD 0x0:UD {align1}; + +send (8) + vme_msg_ind + vme_wb<1>:UD + null + vme( + BIND_IDX_VME, + 0, + 0, + VME_IME_MESSAGE_TYPE + ) + mlen ime_vme_msg_length + rlen vme_wb_length {align1}; + +/* Set Macroblock-shape/mode for FBR */ + +mov (1) vme_m2.20<1>:UD 0x0:UD {align1}; +mov (1) vme_m2.21<1>:UB vme_wb.25<0,1,0>:UB {align1}; +mov (1) vme_m2.22<1>:UB vme_wb.26<0,1,0>:UB {align1}; + +and (1) tmp_reg0.0<1>:UW vme_wb.0<0,1,0>:UW 0x03:UW {align1}; +mov (1) vme_m2.20<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; + +/* Send FBR message into CRE */ + +mov (8) vme_msg_4.0<1>:UD vme_wb1.0<8,8,1>:UD {align1}; +mov (8) vme_msg_5.0<1>:ud vme_wb2.0<8,8,1>:ud {align1}; +mov (8) vme_msg_6.0<1>:ud vme_wb3.0<8,8,1>:ud {align1}; +mov (8) vme_msg_7.0<1>:ud vme_wb4.0<8,8,1>:ud {align1}; + +mov (1) vme_m0.12<1>:UD INTER_SAD_HAAR + SUB_PEL_MODE_HALF + FBR_BME_DISABLE:UD {align1}; /* 16x16 Source, 1/2 pixel, harr, BME disable */ +/* Bilinear filter */ +mov (1) tmp_reg0.0<1>:uw 0x04:uw {align1}; +add (1) vme_m1.30<1>:ub vme_m1.30<0,1,0>:ub tmp_reg0.0<0,1,0>:ub {align1}; + +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; +mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +mov (8) vme_msg_2.0<1>:UD vme_m2.0<8,8,1>:UD {align1}; +mov (8) vme_msg_3.0<1>:UD vme_m3.0<8,8,1>:UD {align1}; + +/* after verification it will be passed by using payload */ +send (8) + vme_msg_ind + vme_wb<1>:UD + null + cre( + BIND_IDX_VME, + VME_FBR_MESSAGE_TYPE + ) + mlen fbr_vme_msg_length + rlen vme_wb_length + {align1}; + +and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1}; +(-f0.0) jmpi (1) vme_run_again; +nop; +vme_mv_output: + +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x02:UD {align1}; +mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; +/* write FME info */ +mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1}; + +mov (1) msg_reg1.4<1>:UD vme_wb.24<0,1,0>:UD {align1}; +/* Inter distortion of FME */ +mov (1) msg_reg1.8<1>:UD vme_wb.8<0,1,0>:UD {align1}; + +mov (1) msg_reg1.12<1>:UD vme_m2.20<0,1,0>:UD {align1}; + +/* bind index 3, write oword (16bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_0, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +/* Write FME/BME MV */ +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x01:UD {align1}; +mov (8) msg_reg0.0<1>:UD obw_m0.0<8,8,1>:UD {align1}; + + +mov (8) msg_reg1.0<1>:UD vme_wb1.0<8,8,1>:UD {align1}; +mov (8) msg_reg2.0<1>:ud vme_wb2.0<8,8,1>:ud {align1}; +mov (8) msg_reg3.0<1>:ud vme_wb3.0<8,8,1>:ud {align1}; +mov (8) msg_reg4.0<1>:ud vme_wb4.0<8,8,1>:ud {align1}; +/* bind index 3, write 2 oword (32 bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +/* Write FME/BME RefID */ +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x08:UD {align1}; +mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; + +mov (8) msg_reg1.0<1>:UD vme_wb6.0<8,8,1>:UD {align1}; + +/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +/* Issue message fence so that the previous write message is committed */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_FENCE, + OBR_MF_COMMIT, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; + +__EXIT: +/* + * kill thread + */ +mov (8) ts_msg_reg0<1>:UD r0<8,8,1>:UD {align1}; +send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT}; + + + nop ; + nop ; + +word_imedian: + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1}; + (f0.0) jmpi (1) cmp_a_ge_b; + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1}; + (f0.0) jmpi (1) cmp_end; + cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1}; + jmpi (1) cmp_end; +cmp_a_ge_b: + cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1}; + (f0.0) jmpi (1) cmp_end; + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1}; +cmp_end: + RETURN {align1}; + +nop; +nop; +ref_boundary_check: + +/* The left/up coordinate of reference window */ +add (2) TEMP_VAR0.0<1>:w INPUT_ARG0.8<2,2,1>:w INPUT_ARG0.0<2,2,1>:w {align1}; +/* The right/bottom coordinate of reference window */ +add (1) TEMP_VAR0.16<1>:w TEMP_VAR0.0<0,1,0>:w 48:w {align1}; +add (1) TEMP_VAR0.18<1>:w TEMP_VAR0.2<0,1,0>:w 40:w {align1}; + +/* Firstly the MV range is checked */ +mul (2) TEMP_VAR1.16<1>:w INPUT_ARG1.16<2,2,1>:w -1:w {align1}; +add (2) TEMP_VAR1.0<1>:w INPUT_ARG0.8<2,2,1>:w TEMP_VAR1.16<2,2,1>:w {align1}; +add (2) TEMP_VAR1.4<1>:w INPUT_ARG0.8<2,2,1>:w INPUT_ARG1.16<2,2,1>:w {align1}; + +cmp.l.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w TEMP_VAR1.0<0,1,0>:w {align1}; +(f0.0) mov (1) TEMP_VAR0.0<1>:w TEMP_VAR1.0<0,1,0>:w {align1}; +cmp.g.f0.0 (1) null:w TEMP_VAR0.16<0,1,0>:w TEMP_VAR1.4<0,1,0>:w {align1}; +(f0.0) add (1) TEMP_VAR0.0<1>:w TEMP_VAR1.4<0,1,0>:w -48:w {align1}; +cmp.l.f0.0 (1) null:w TEMP_VAR0.2<0,1,0>:w TEMP_VAR1.2<0,1,0>:w {align1}; +(f0.0) mov (1) TEMP_VAR0.2<1>:w TEMP_VAR1.2<0,1,0>:w {align1}; +cmp.g.f0.0 (1) null:w TEMP_VAR0.18<0,1,0>:w TEMP_VAR1.6<0,1,0>:w {align1}; +(f0.0) add (1) TEMP_VAR0.2<1>:w TEMP_VAR1.6<0,1,0>:w -40:w {align1}; + +x_left_cmp: + cmp.l.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w 0:w {align1}; + (-f0.0) jmpi (1) x_right_cmp; + (f0.0) mov (1) TEMP_VAR0.0<1>:w 0:w {align1}; + jmpi (1) y_top_cmp; +x_right_cmp: + cmp.g.f0.0 (1) null:w TEMP_VAR0.16<0,1,0>:w INPUT_ARG1.0<0,1,0>:w {align1}; + (-f0.0) jmpi (1) y_top_cmp; + (f0.0) add (1) TEMP_VAR0.0<1>:w INPUT_ARG1.0<0,1,0>:w -48:w {align1}; +y_top_cmp: + cmp.l.f0.0 (1) null:w TEMP_VAR0.2<0,1,0>:w 0:w {align1}; + (-f0.0) jmpi (1) y_bottom_cmp; + (f0.0) mov (1) TEMP_VAR0.2<1>:w 0:w {align1}; + jmpi (1) y_bottom_end; +y_bottom_cmp: + cmp.g.f0.0 (1) null:w TEMP_VAR0.18<0,1,0>:w INPUT_ARG1.2<0,1,0>:w {align1}; + (f0.0) add (1) TEMP_VAR0.2<1>:w INPUT_ARG1.2<0,1,0>:w -40:w {align1}; + +y_bottom_end: +mul (2) TEMP_VAR1.0<1>:w INPUT_ARG0.8<2,2,1>:w -1:w {align1}; +add (2) RET_ARG<1>:w TEMP_VAR0.0<2,2,1>:w TEMP_VAR1.0<2,2,1>:w {align1}; + RETURN {align1}; +nop; +nop; + +vme_run_again: + +asr (2) mb_ref_win.0<1>:w mb_mvp_ref.0<2,2,1>:w 2:w {align1}; +mov (2) tmp_reg0.0<1>:w mb_ref_win.0<2,2,1>:w {align1}; +add (2) mb_ref_win.8<1>:w mb_ref_win.0<2,2,1>:w 3:w {align1}; +and (2) mb_ref_win.16<1>:uw mb_ref_win.8<2,2,1>:uw 0xFFFC:uw {align1}; + +cmp.l.f0.0 (1) null:w tmp_reg0.0<0,1,0>:w 0:w {align1}; +(f0.0) mul (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w -1:w {align1}; +cmp.l.f0.0 (1) null:w tmp_reg0.2<0,1,0>:w 0:w {align1}; +(f0.0) mul (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1}; + +cmp.ge.f0.0 (1) null:w tmp_reg0.0<0,1,0>:w 4:w {align1}; +(f0.0) jmpi (1) vme_start; +cmp.ge.f0.0 (1) null:w tmp_reg0.2<0,1,0>:w 4:w {align1}; +(f0.0) jmpi (1) vme_start; + +jmpi (1) vme_done; + +vme_start: + mov (8) tmp_vme_wb0.0<1>:ud vme_wb0.0<8,8,1>:ud {align1}; + mov (8) tmp_vme_wb1.0<1>:ud vme_wb1.0<8,8,1>:ud {align1}; + +/* Calibrate the ref window for MPEG2 */ +mov (1) vme_m0.0<1>:W -16:W {align1}; +mov (1) vme_m0.2<1>:W -12:W {align1}; +mov (1) INPUT_ARG0.8<1>:ud vme_m0.8<0,1,0>:ud {align1}; +add (2) INPUT_ARG0.0<1>:w vme_m0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1}; +mov (8) INPUT_ARG1.0<1>:ud pic_ref.0<8,8,1>:ud {align1}; + +SAVE_RET {align1}; +jmpi (1) ref_boundary_check; +mov (2) vme_m0.0<1>:w RET_ARG<2,2,1>:w {align1}; + +/* IME search */ +mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */ +mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */ + +mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1}; + +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; + +mov (8) vme_m1.0<1>:ud 0x0:UD {align1}; + +mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ; +/* the Max MV number is passed by constant buffer */ +mov (1) vme_m1.4<1>:UB r4.28<0,1,0>:UB {align1}; +mov (1) vme_m1.8<1>:UD START_CENTER + SEARCH_PATH_LEN:UD {align1}; +mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +/* Setup the Cost center */ +/* currently four 8x8 share the same cost center */ +mov (4) vme_m3.0<2>:ud mv_cc_ref.0<0,1,0>:ud {align1}; +mov (4) vme_m3.4<2>:ud mv_cc_ref.0<0,1,0>:ud {align1}; + +mov (8) vme_msg_3<1>:UD vme_m3.0<8,8,1>:UD {align1}; +mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1}; + +/* M4/M5 search path */ +mov (1) vme_msg_4.0<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_4.4<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_4.8<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_4.12<1>:UD 0x100F0F0F:UD {align1}; +mov (1) vme_msg_4.16<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_4.20<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_4.24<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_4.28<1>:UD 0x100F0F0F:UD {align1}; + +mov (1) vme_msg_5.0<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_5.4<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_5.8<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_5.12<1>:UD 0x000F0F0F:UD {align1}; + +mov (4) vme_msg_5.16<1>:UD 0x0:UD {align1}; + +send (8) + vme_msg_ind + vme_wb<1>:UD + null + vme( + BIND_IDX_VME, + 0, + 0, + VME_IME_MESSAGE_TYPE + ) + mlen ime_vme_msg_length + rlen vme_wb_length {align1}; + +/* Set Macroblock-shape/mode for FBR */ + +mov (1) vme_m2.20<1>:UD 0x0:UD {align1}; +mov (1) vme_m2.21<1>:UB vme_wb.25<0,1,0>:UB {align1}; +mov (1) vme_m2.22<1>:UB vme_wb.26<0,1,0>:UB {align1}; + +and (1) tmp_reg0.0<1>:UW vme_wb.0<0,1,0>:UW 0x03:UW {align1}; +mov (1) vme_m2.20<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; + +/* Send FBR message into CRE */ + +mov (8) vme_msg_4.0<1>:UD vme_wb1.0<8,8,1>:UD {align1}; +mov (8) vme_msg_5.0<1>:ud vme_wb2.0<8,8,1>:ud {align1}; +mov (8) vme_msg_6.0<1>:ud vme_wb3.0<8,8,1>:ud {align1}; +mov (8) vme_msg_7.0<1>:ud vme_wb4.0<8,8,1>:ud {align1}; + +mov (1) vme_m0.12<1>:UD INTER_SAD_HAAR + SUB_PEL_MODE_HALF + FBR_BME_DISABLE:UD {align1}; /* 16x16 Source, 1/2 pixel, harr, BME disable */ +/* Bilinear filter */ +mov (1) tmp_reg0.0<1>:uw 0x04:uw {align1}; +add (1) vme_m1.30<1>:ub vme_m1.30<0,1,0>:ub tmp_reg0.0<0,1,0>:ub {align1}; + +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; +mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +mov (8) vme_msg_2.0<1>:UD vme_m2.0<8,8,1>:UD {align1}; +mov (8) vme_msg_3.0<1>:UD vme_m3.0<8,8,1>:UD {align1}; + +/* after verification it will be passed by using payload */ +send (8) + vme_msg_ind + vme_wb<1>:UD + null + cre( + BIND_IDX_VME, + VME_FBR_MESSAGE_TYPE + ) + mlen fbr_vme_msg_length + rlen vme_wb_length + {align1}; + +cmp.l.f0.0 (1) null:uw vme_wb0.8<0,1,0>:uw tmp_vme_wb0.8<0,1,0>:uw {align1}; +(f0.0) jmpi (1) vme_done; +mov (8) vme_wb0.0<1>:ud tmp_vme_wb0.0<8,8,1>:ud {align1}; +mov (8) vme_wb1.0<1>:ud tmp_vme_wb1.0<8,8,1>:ud {align1}; + +vme_done: + jmpi (1) vme_mv_output; +nop; +nop; +nop; + diff --git a/src/shaders/vme/mpeg2_inter_gen8.g8a b/src/shaders/vme/mpeg2_inter_gen8.g8a new file mode 100644 index 0000000..26f94a7 --- /dev/null +++ b/src/shaders/vme/mpeg2_inter_gen8.g8a @@ -0,0 +1,3 @@ +#include "vme8.inc" +#include "vme75_mpeg2.inc" +#include "mpeg2_inter_gen8.asm" diff --git a/src/shaders/vme/mpeg2_inter_gen8.g8b b/src/shaders/vme/mpeg2_inter_gen8.g8b new file mode 100644 index 0000000..6686c9f --- /dev/null +++ b/src/shaders/vme/mpeg2_inter_gen8.g8b @@ -0,0 +1,371 @@ + { 0x00800001, 0x24000608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24400608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24800608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24c00608, 0x00000000, 0x00000000 }, + { 0x00200009, 0x24482248, 0x164500a0, 0x00040004 }, + { 0x00000001, 0x24542288, 0x00000014, 0x00000000 }, + { 0x00000041, 0x24881208, 0x220000a2, 0x000000a1 }, + { 0x00000040, 0x24880208, 0x22000488, 0x000000a0 }, + { 0x00000041, 0x24880208, 0x06000488, 0x00000018 }, + { 0x00000001, 0x24942288, 0x00000014, 0x00000000 }, + { 0x00200009, 0x2a401248, 0x16450098, 0x00040004 }, + { 0x00200001, 0x2a501248, 0x00450094, 0x00000000 }, + { 0x00600001, 0x2ac00608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2a800608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2ae00608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2b000608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2b200608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20001240, 0x160000a6, 0x00040004 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000720 }, + { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00600060 }, + { 0x00210001, 0x2af41e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000f0 }, + { 0x00000001, 0x2ae00e28, 0x08000000, 0x00000001 }, + { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, + { 0x00000040, 0x24001a68, 0x1e000400, 0xffffffff }, + { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, + { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, + { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, + { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, + { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, + { 0x00210001, 0x2af41e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 }, + { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, + { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02180203 }, + { 0x00200001, 0x2ae40208, 0x00450ba0, 0x00000000 }, + { 0x00000001, 0x2af01e68, 0x18000000, 0x00010001 }, + { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00100010 }, + { 0x00210001, 0x2b141e68, 0x18000000, 0xffffffff }, + { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000360 }, + { 0x00000001, 0x2b000e28, 0x08000000, 0x00000001 }, + { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, + { 0x00000040, 0x24021a68, 0x1e000402, 0xffffffff }, + { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, + { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, + { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, + { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, + { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, + { 0x00210001, 0x2b141e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 }, + { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, + { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02180203 }, + { 0x00200001, 0x2b040208, 0x00450ba0, 0x00000000 }, + { 0x00000001, 0x2b101e68, 0x18000000, 0x00010001 }, + { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00080008 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000110 }, + { 0x00000001, 0x2b200e28, 0x08000000, 0x00000001 }, + { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, + { 0x00000040, 0x24021a68, 0x1e000402, 0xffffffff }, + { 0x00000040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, + { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, + { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, + { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, + { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, + { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000180 }, + { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, + { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02180203 }, + { 0x00200001, 0x2b240208, 0x00450ba0, 0x00000000 }, + { 0x00000001, 0x2b301e68, 0x18000000, 0x00010001 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000130 }, + { 0x00600001, 0x2b400608, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a5, 0x00040004 }, + { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000f0 }, + { 0x00000001, 0x2b200e28, 0x08000000, 0x00000001 }, + { 0x00200001, 0x24002248, 0x004500a0, 0x00000000 }, + { 0x00200040, 0x24001a68, 0x1e450400, 0xffffffff }, + { 0x00000041, 0x2b481208, 0x120000a2, 0x00000402 }, + { 0x00000040, 0x2b480208, 0x12000b48, 0x00000400 }, + { 0x00000041, 0x2b480208, 0x06000b48, 0x00000018 }, + { 0x00000001, 0x2b542288, 0x00000014, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280303 }, + { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, + { 0x00210001, 0x2b341e68, 0x18000000, 0xffffffff }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 }, + { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, + { 0x0a800031, 0x2ba00a88, 0x0e000b40, 0x02180203 }, + { 0x00200001, 0x2b240208, 0x00450ba0, 0x00000000 }, + { 0x00000001, 0x2b301e68, 0x18000000, 0x00010001 }, + { 0x00000040, 0x24000a28, 0x0a000b00, 0x00000b20 }, + { 0x01000010, 0x20000a20, 0x0e000400, 0x00000000 }, + { 0x00110020, 0x34000000, 0x0e001400, 0x00000080 }, + { 0x02000010, 0x20000a20, 0x0e000ae0, 0x00000000 }, + { 0x00010001, 0x2b040208, 0x00000ae4, 0x00000000 }, + { 0x00010001, 0x2b240208, 0x00000ae4, 0x00000000 }, + { 0x00010001, 0x2b141248, 0x00000af4, 0x00000000 }, + { 0x00010001, 0x2b341248, 0x00000af4, 0x00000000 }, + { 0x00010001, 0x2ac00208, 0x00000ae4, 0x00000000 }, + { 0x00110001, 0x2ac00608, 0x00000000, 0x00000000 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000190 }, + { 0x00600001, 0x24000608, 0x00000000, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000af4, 0x00000000 }, + { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x24040208, 0x00000ae4, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000b14, 0x00000000 }, + { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x24040208, 0x00000b04, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000b34, 0x00000000 }, + { 0x00010040, 0x24001a68, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x24040208, 0x00000b24, 0x00000000 }, + { 0x01000010, 0x20001a20, 0x1e000400, 0x00010001 }, + { 0x00010001, 0x2ac00208, 0x00000404, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000c0 }, + { 0x00000001, 0x2fa01a68, 0x00000ae4, 0x00000000 }, + { 0x00000001, 0x2fa41a68, 0x00000b04, 0x00000000 }, + { 0x00000001, 0x2fa81a68, 0x00000b24, 0x00000000 }, + { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000780 }, + { 0x00000001, 0x2ac01a68, 0x00000fe4, 0x00000000 }, + { 0x00000001, 0x2fa01a68, 0x00000ae6, 0x00000000 }, + { 0x00000001, 0x2fa41a68, 0x00000b06, 0x00000000 }, + { 0x00000001, 0x2fa81a68, 0x00000b26, 0x00000000 }, + { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000720 }, + { 0x00000001, 0x2ac21a68, 0x00000fe4, 0x00000000 }, + { 0x00200001, 0x2a201a68, 0x00450ae4, 0x00000000 }, + { 0x00000001, 0x24401e68, 0x18000000, 0xfff0fff0 }, + { 0x00000001, 0x24421e68, 0x18000000, 0xfff4fff4 }, + { 0x00000001, 0x2fa00208, 0x00000440, 0x00000000 }, + { 0x00000001, 0x2fa80208, 0x00000448, 0x00000000 }, + { 0x00600001, 0x2fc00208, 0x008d0a40, 0x00000000 }, + { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x000007b0 }, + { 0x00200001, 0x24401a68, 0x00450fe4, 0x00000000 }, + { 0x00600001, 0x25600208, 0x008d0020, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, + { 0x00600001, 0x28600608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28800608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28a00608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00010001 }, + { 0x00000001, 0x28a52288, 0x00000400, 0x00000000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00bb00bb }, + { 0x00000001, 0x28a42288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28c00608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x28d00608, 0x00000000, 0x11111111 }, + { 0x00000001, 0x28dc0608, 0x00000000, 0x00010101 }, + { 0x00600001, 0x28e00608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x24600608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x247c1648, 0x10000000, 0x00000000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00020002 }, + { 0x00000040, 0x24001248, 0x16000400, 0x00040004 }, + { 0x00000001, 0x247c2288, 0x00000400, 0x00000000 }, + { 0x00000001, 0x247d2288, 0x000000a5, 0x00000000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00000000 }, + { 0x00000001, 0x247e2288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x00000001, 0x244c0608, 0x00000000, 0x00800000 }, + { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, + { 0x0d600031, 0x21800a08, 0x0e000800, 0x10782000 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00000001, 0x28200208, 0x00000180, 0x00000000 }, + { 0x00000001, 0x28240208, 0x00000190, 0x00000000 }, + { 0x00000001, 0x28280208, 0x00000194, 0x00000000 }, + { 0x00000001, 0x282c0208, 0x00000198, 0x00000000 }, + { 0x00000001, 0x28301248, 0x0000018c, 0x00000000 }, + { 0x00000001, 0x28340208, 0x00000188, 0x00000000 }, + { 0x00000001, 0x28380208, 0x0000019c, 0x00000000 }, + { 0x00000001, 0x283c0208, 0x00000488, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 }, + { 0x00000001, 0x244c0608, 0x00000000, 0x7e200000 }, + { 0x00000001, 0x24561648, 0x10000000, 0x28302830 }, + { 0x00000001, 0x24440208, 0x00000440, 0x00000000 }, + { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, + { 0x00000001, 0x24600608, 0x00000000, 0x00000002 }, + { 0x00000001, 0x24642288, 0x0000009c, 0x00000000 }, + { 0x00000001, 0x24680608, 0x00000000, 0x30003030 }, + { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x00400001, 0x45800208, 0x00000a20, 0x00000000 }, + { 0x00400001, 0x45840208, 0x00000a20, 0x00000000 }, + { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, + { 0x00000001, 0x28800608, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28840608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28880608, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x288c0608, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28900608, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28940608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28980608, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x289c0608, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28a00608, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28a40608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28a80608, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x28ac0608, 0x00000000, 0x000f0f0f }, + { 0x00400001, 0x28b00608, 0x00000000, 0x00000000 }, + { 0x08600031, 0x21800a08, 0x0e000800, 0x0c784000 }, + { 0x00000001, 0x25740608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x25752288, 0x00000199, 0x00000000 }, + { 0x00000001, 0x25762288, 0x0000019a, 0x00000000 }, + { 0x00000005, 0x24001248, 0x16000180, 0x00030003 }, + { 0x00000001, 0x25742288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28800208, 0x008d01a0, 0x00000000 }, + { 0x00600001, 0x28a00208, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x28c00208, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x28e00208, 0x008d0200, 0x00000000 }, + { 0x00000001, 0x244c0608, 0x00000000, 0x00241000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00040004 }, + { 0x00000040, 0x247e2288, 0x2200047e, 0x00000400 }, + { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, + { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 }, + { 0x0d600031, 0x21800a08, 0x0e000800, 0x10786000 }, + { 0x01000005, 0x20001240, 0x160000a6, 0x00040004 }, + { 0x00110020, 0x34000000, 0x0e001400, 0x000004a0 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000040, 0x24880208, 0x06000488, 0x00000002 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00000001, 0x28200208, 0x00000180, 0x00000000 }, + { 0x00000001, 0x28240208, 0x00000198, 0x00000000 }, + { 0x00000001, 0x28280208, 0x00000188, 0x00000000 }, + { 0x00000001, 0x282c0208, 0x00000574, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0003 }, + { 0x00000040, 0x24880208, 0x06000488, 0x00000001 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d01a0, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x28600208, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x28800208, 0x008d0200, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 }, + { 0x00000040, 0x24880208, 0x06000488, 0x00000008 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0240, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x0219e003 }, + { 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 }, + { 0x07800031, 0x24000a40, 0x0e000e00, 0x82000010 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa4 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000070 }, + { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000fa0, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000a0 }, + { 0x04000010, 0x20001a60, 0x1a000fa4, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000fa8, 0x00000000 }, + { 0x00110001, 0x2fe41a68, 0x00000fa4, 0x00000000 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000060 }, + { 0x04000010, 0x20001a60, 0x1a000fa4, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000fa4, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000030 }, + { 0x04000010, 0x20001a60, 0x1a000fa0, 0x00000fa8 }, + { 0x00010001, 0x2fe41a68, 0x00000fa8, 0x00000000 }, + { 0x00110001, 0x2fe41a68, 0x00000fa0, 0x00000000 }, + { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00200040, 0x2f601a68, 0x1a450fa8, 0x00450fa0 }, + { 0x00000040, 0x2f701a68, 0x1e000f60, 0x00300030 }, + { 0x00000040, 0x2f721a68, 0x1e000f62, 0x00280028 }, + { 0x00200041, 0x2f901a68, 0x1e450fd0, 0xffffffff }, + { 0x00200040, 0x2f801a68, 0x1a450fa8, 0x00450f90 }, + { 0x00200040, 0x2f841a68, 0x1a450fa8, 0x00450fd0 }, + { 0x05000010, 0x20001a60, 0x1a000f60, 0x00000f80 }, + { 0x00010001, 0x2f601a68, 0x00000f80, 0x00000000 }, + { 0x03000010, 0x20001a60, 0x1a000f70, 0x00000f84 }, + { 0x00010040, 0x2f601a68, 0x1e000f84, 0xffd0ffd0 }, + { 0x05000010, 0x20001a60, 0x1a000f62, 0x00000f82 }, + { 0x00010001, 0x2f621a68, 0x00000f82, 0x00000000 }, + { 0x03000010, 0x20001a60, 0x1a000f72, 0x00000f86 }, + { 0x00010040, 0x2f621a68, 0x1e000f86, 0xffd8ffd8 }, + { 0x05000010, 0x20001a60, 0x1e000f60, 0x00000000 }, + { 0x00110020, 0x34000000, 0x0e001400, 0x00000020 }, + { 0x00010001, 0x2f601e68, 0x18000000, 0x00000000 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000030 }, + { 0x03000010, 0x20001a60, 0x1a000f70, 0x00000fc0 }, + { 0x00110020, 0x34000000, 0x0e001400, 0x00000010 }, + { 0x00010040, 0x2f601a68, 0x1e000fc0, 0xffd0ffd0 }, + { 0x05000010, 0x20001a60, 0x1e000f62, 0x00000000 }, + { 0x00110020, 0x34000000, 0x0e001400, 0x00000020 }, + { 0x00010001, 0x2f621e68, 0x18000000, 0x00000000 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000020 }, + { 0x03000010, 0x20001a60, 0x1a000f72, 0x00000fc2 }, + { 0x00010040, 0x2f621a68, 0x1e000fc2, 0xffd8ffd8 }, + { 0x00200041, 0x2f801a68, 0x1e450fa8, 0xffffffff }, + { 0x00200040, 0x2fe41a68, 0x1a450f60, 0x00450f80 }, + { 0x00000001, 0x34000200, 0x00000fe0, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0020000c, 0x2a801a68, 0x1e450ac0, 0x00020002 }, + { 0x00200001, 0x24001a68, 0x00450a80, 0x00000000 }, + { 0x00200040, 0x2a881a68, 0x1e450a80, 0x00030003 }, + { 0x00200005, 0x2a901248, 0x16450a88, 0xfffcfffc }, + { 0x05000010, 0x20001a60, 0x1e000400, 0x00000000 }, + { 0x00010041, 0x24001a68, 0x1e000400, 0xffffffff }, + { 0x05000010, 0x20001a60, 0x1e000402, 0x00000000 }, + { 0x00010041, 0x24021a68, 0x1e000402, 0xffffffff }, + { 0x04000010, 0x20001a60, 0x1e000400, 0x00040004 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000030 }, + { 0x04000010, 0x20001a60, 0x1e000402, 0x00040004 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000010 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x000003a0 }, + { 0x00600001, 0x2c800208, 0x008d0180, 0x00000000 }, + { 0x00600001, 0x2ca00208, 0x008d01a0, 0x00000000 }, + { 0x00000001, 0x24401e68, 0x18000000, 0xfff0fff0 }, + { 0x00000001, 0x24421e68, 0x18000000, 0xfff4fff4 }, + { 0x00000001, 0x2fa80208, 0x00000448, 0x00000000 }, + { 0x00200040, 0x2fa01a68, 0x1a450440, 0x00450a90 }, + { 0x00600001, 0x2fc00208, 0x008d0a40, 0x00000000 }, + { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, + { 0x00000020, 0x34000000, 0x0e001400, 0xfffffca0 }, + { 0x00200001, 0x24401a68, 0x00450fe4, 0x00000000 }, + { 0x00000001, 0x244c0608, 0x00000000, 0x7e200000 }, + { 0x00000001, 0x24561648, 0x10000000, 0x28302830 }, + { 0x00000001, 0x24440208, 0x00000440, 0x00000000 }, + { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, + { 0x00600001, 0x24600608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x24600608, 0x00000000, 0x00000002 }, + { 0x00000001, 0x24642288, 0x0000009c, 0x00000000 }, + { 0x00000001, 0x24680608, 0x00000000, 0x30003030 }, + { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x00400001, 0x45800208, 0x00000a20, 0x00000000 }, + { 0x00400001, 0x45840208, 0x00000a20, 0x00000000 }, + { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, + { 0x00000001, 0x28800608, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28840608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28880608, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x288c0608, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28900608, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28940608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28980608, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x289c0608, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28a00608, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28a40608, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28a80608, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x28ac0608, 0x00000000, 0x000f0f0f }, + { 0x00400001, 0x28b00608, 0x00000000, 0x00000000 }, + { 0x08600031, 0x21800a08, 0x0e000800, 0x0c784000 }, + { 0x00000001, 0x25740608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x25752288, 0x00000199, 0x00000000 }, + { 0x00000001, 0x25762288, 0x0000019a, 0x00000000 }, + { 0x00000005, 0x24001248, 0x16000180, 0x00030003 }, + { 0x00000001, 0x25742288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28800208, 0x008d01a0, 0x00000000 }, + { 0x00600001, 0x28a00208, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x28c00208, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x28e00208, 0x008d0200, 0x00000000 }, + { 0x00000001, 0x244c0608, 0x00000000, 0x00241000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00040004 }, + { 0x00000040, 0x247e2288, 0x2200047e, 0x00000400 }, + { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x00600001, 0x28400208, 0x008d0560, 0x00000000 }, + { 0x00600001, 0x28600208, 0x008d0580, 0x00000000 }, + { 0x0d600031, 0x21800a08, 0x0e000800, 0x10786000 }, + { 0x05000010, 0x20001240, 0x12000188, 0x00000c88 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000020 }, + { 0x00600001, 0x21800208, 0x008d0c80, 0x00000000 }, + { 0x00600001, 0x21a00208, 0x008d0ca0, 0x00000000 }, + { 0x00000020, 0x34000000, 0x0e001400, 0xfffff6f0 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/src/shaders/vme/mpeg2_inter_haswell.asm b/src/shaders/vme/mpeg2_inter_haswell.asm new file mode 100644 index 0000000..c224cf0 --- /dev/null +++ b/src/shaders/vme/mpeg2_inter_haswell.asm @@ -0,0 +1,860 @@ +/* + * Copyright © <2010>, Intel Corporation. + * + * This program is licensed under the terms and conditions of the + * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at + * http://www.opensource.org/licenses/eclipse-1.0.php. + * Author : Zhao Yakui <yakui.zhao@intel.com> + */ +// Modual name: mpeg2_inter_haswell.asm +// +// Make MPEG2 inter predition estimation for Inter-frame on Haswell +// + +// +// Now, begin source code.... +// + +#define SAVE_RET add (1) RETURN_REG<1>:ud ip:ud 32:ud +#define RETURN mov (1) ip:ud RETURN_REG<0,1,0>:ud + +/* + * __START + */ +__INTER_START: +mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1}; +mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1}; +mov (16) tmp_reg4.0<1>:UD 0x0:UD {align1} ; +mov (16) tmp_reg6.0<1>:UD 0x0:UD {align1} ; + +shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ +mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +mul (1) obw_m0.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1}; +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1}; +mul (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 24:UD {align1}; +mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +shl (2) pic_ref.0<1>:uw r4.24<2,2,1>:uw 4:uw {align1}; +mov (2) pic_ref.16<1>:uw r4.20<2,2,1>:uw {align1}; +mov (8) mb_mvp_ref.0<1>:ud 0:ud {align1}; +mov (8) mb_ref_win.0<1>:ud 0:ud {align1}; +mov (8) mba_result.0<1>:ud 0x0:ud {align1}; +mov (8) mbb_result.0<1>:ud 0x0:ud {align1}; +mov (8) mbc_result.0<1>:ud 0x0:ud {align1}; + +and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1}; +(f0.0) jmpi (1) __mb_hwdep_end; +/* read back the data for MB A */ +/* the layout of MB result is: rx.0(Available). rx.4(MVa), rX.8(MVb), rX.16(Pred_L0 flag), +* rX.18 (Pred_L1 flag), rX.20(Forward reference ID), rX.22(Backwared reference ID) +*/ +mba_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1}; +/* MB A doesn't exist. Zero MV. mba_flag is zero and ref ID = -1 */ +(f0.0) mov (2) mba_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mbb_start; +mov (1) mba_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w -1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mba_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mbb_start; + +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB A */ +/* bind index 3, read 2 oword (16bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_2, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; +/* TODO: RefID is required after multi-references are added */ +/* MV */ +mov (2) mba_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1}; +mov (1) mba_result.16<1>:w MB_PRED_FLAG {align1}; + +mbb_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1}; +/* MB B doesn't exist. Zero MV. mba_flag is zero */ +/* If MB B doesn't exist, neither MB C nor D exists */ +(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; +mov (1) mbb_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mbc_start; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB B */ +/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_2, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; +/* TODO: RefID is required after multi-references are added */ +mov (2) mbb_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1}; +mov (1) mbb_result.16<1>:w MB_PRED_FLAG {align1}; + +mbc_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_C:uw {align1}; +/* MB C doesn't exist. Zero MV. mba_flag is zero */ +/* Based on h264 spec the MB D will be replaced if MB C doesn't exist */ +(f0.0) jmpi (1) mbd_start; +mov (1) mbc_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1}; +add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB C */ +/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_2, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; +/* TODO: RefID is required after multi-references are added */ +/* Forward MV */ +mov (2) mbc_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1}; +mov (1) mbc_result.16<1>:w MB_PRED_FLAG {align1}; + +jmpi (1) mb_mvp_start; +mbd_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_D:uw {align1}; +(f0.0) jmpi (1) mb_mvp_start; +mov (1) mbc_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (2) tmp_reg0.0<1>:w tmp_reg0.0<2,2,1>:w -1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_4, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 2 + {align1}; + +cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; + +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; +/* Read MV for MB D */ +/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ub + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_2, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; + +/* TODO: RefID is required after multi-references are added */ + +/* Forward MV */ +mov (2) mbc_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1}; +mov (1) mbc_result.16<1>:w MB_PRED_FLAG {align1}; + +mb_mvp_start: +/*TODO: Add the skip prediction */ +/* Check whether both MB B and C are inavailable */ +add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1}; +cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1}; +(-f0.0) jmpi (1) mb_median_start; +cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 0:d {align1}; +(f0.0) mov (1) mbb_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; +(f0.0) mov (1) mbc_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; +(f0.0) mov (1) mbb_result.20<1>:uw mba_result.20<0,1,0>:uw {align1}; +(f0.0) mov (1) mbc_result.20<1>:uw mba_result.20<0,1,0>:uw {align1}; +(f0.0) mov (1) mb_mvp_ref.0<1>:ud mba_result.4<0,1,0>:ud {align1}; +(-f0.0) mov (1) mb_mvp_ref.0<1>:ud 0:ud {align1}; +jmpi (1) __mb_hwdep_end; + +mb_median_start: +/* check whether only one neighbour MB has the same ref ID with the current MB */ +mov (8) tmp_reg0.0<1>:ud 0:ud {align1}; +cmp.z.f0.0 (1) null:d mba_result.20<1>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<1>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mba_result.4<0,1,0>:ud {align1}; +cmp.z.f0.0 (1) null:d mbb_result.20<1>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<1>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mbb_result.4<0,1,0>:ud {align1}; +cmp.z.f0.0 (1) null:d mbc_result.20<1>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<1>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mbc_result.4<0,1,0>:ud {align1}; +cmp.e.f0.0 (1) null:d tmp_reg0.0<1>:w 1:w {align1}; +(f0.0) mov (1) mb_mvp_ref.0<1>:ud tmp_reg0.4<0,1,0>:ud {align1}; +(f0.0) jmpi (1) __mb_hwdep_end; + +mov (1) INPUT_ARG0.0<1>:w mba_result.4<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.4<1>:w mbb_result.4<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.8<1>:w mbc_result.4<0,1,0>:w {align1}; +SAVE_RET {align1}; + jmpi (1) word_imedian; +mov (1) mb_mvp_ref.0<1>:w RET_ARG<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.0<1>:w mba_result.6<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.4<1>:w mbb_result.6<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.8<1>:w mbc_result.6<0,1,0>:w {align1}; +SAVE_RET {align1}; +jmpi (1) word_imedian; +mov (1) mb_mvp_ref.2<1>:w RET_ARG<0,1,0>:w {align1}; + +__mb_hwdep_end: + +mov (2) mv_cc_ref.0<1>:w mba_result.4<2,2,1>:w {align1}; + +/* Calibrate the ref window for MPEG2 */ +mov (1) vme_m0.0<1>:W -16:W {align1}; +mov (1) vme_m0.2<1>:W -12:W {align1}; + +mov (1) INPUT_ARG0.0<1>:ud vme_m0.0<0,1,0>:ud {align1}; +mov (1) INPUT_ARG0.8<1>:ud vme_m0.8<0,1,0>:ud {align1}; +mov (8) INPUT_ARG1.0<1>:ud pic_ref.0<8,8,1>:ud {align1}; + +SAVE_RET {align1}; +jmpi (1) ref_boundary_check; +mov (2) vme_m0.0<1>:w RET_ARG<2,2,1>:w {align1}; + +/* m2, get the MV/Mb cost passed from constant buffer when +spawning thread by MEDIA_OBJECT */ +mov (8) vme_m2<1>:UD r1.0<8,8,1>:UD {align1}; + +mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1}; + +/* m3 */ +mov (8) vme_msg_3<1>:UD 0x0:UD {align1}; + +/* the neighbour pixel is zero for MPEG2 Intra-prediction */ + +/* m4 */ +mov (8) vme_msg_4<1>:UD 0:UD {align1}; +mov (1) tmp_reg0.0<1>:UW LUMA_INTRA_MODE:UW {align1}; +/* Use the Luma mode */ +mov (1) vme_msg_4.5<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; +mov (1) tmp_reg0.0<1>:UW INTRA16_DC_PRED:UW {align1}; +mov (1) vme_msg_4.4<1>:ub tmp_reg0.0<0,1,0>:UB {align1}; + +/* m5 */ +mov (8) vme_msg_5<1>:UD 0x0:UD {align1}; +mov (1) vme_msg_5.16<1>:UD INTRA_PREDICTORE_MODE {align1}; + +/* the penalty for Intra mode */ +mov (1) vme_msg_5.28<1>:UD 0x010101:UD {align1}; + + +/* m6 */ +mov (8) vme_msg_6.0<1>:UD 0:Ud {align1}; + +/* + * SIC VME message + */ +/* m0 */ +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; + +/* Disable Intra8x8/Intra4x4 Intra-prediction */ +/* m1 */ +mov (8) vme_m1.0<1>:ud 0x0:UD {align1}; + +mov (1) intra_flag<1>:UW 0x0:UW {align1} ; +mov (1) tmp_reg0.0<1>:uw LUMA_INTRA_8x8_DISABLE:uw {align1}; +add (1) tmp_reg0.0<1>:uw tmp_reg0.0<0,1,0>:uw LUMA_INTRA_4x4_DISABLE:uw {align1}; +mov (1) intra_part_mask_ub<1>:UB tmp_reg0.0<0,1,0>:ub {align1}; + +/* assign MB intra struct from the thread payload*/ +mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1}; + +/* Enable DC HAAR component when calculating HARR SATD block */ +mov (1) tmp_reg0.0<1>:UW DC_HARR_ENABLE:UW {align1}; +mov (1) vme_m1.30<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; + +mov (1) vme_m0.12<1>:UD INTRA_SAD_HAAR:UD {align1}; /* 16x16 Source, Intra_harr */ +/* m0 */ +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; +mov (8) vme_msg_1<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +/* after verification it will be passed by using payload */ +send (8) + vme_msg_ind + vme_wb<1>:UD + null + cre( + BIND_IDX_VME, + VME_SIC_MESSAGE_TYPE + ) + mlen sic_vme_msg_length + rlen vme_wb_length + {align1}; + +/* + * Oword Block Write message + */ +mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; + +mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1}; +mov (1) msg_reg1.4<1>:UD vme_wb.16<0,1,0>:UD {align1}; +mov (1) msg_reg1.8<1>:UD vme_wb.20<0,1,0>:UD {align1}; +mov (1) msg_reg1.12<1>:UD vme_wb.24<0,1,0>:UD {align1}; + +/* Distortion, Intra (17-16), */ +mov (1) msg_reg1.16<1>:UW vme_wb.12<0,1,0>:UW {align1}; + +mov (1) msg_reg1.20<1>:UD vme_wb.8<0,1,0>:UD {align1}; +/* VME clock counts */ +mov (1) msg_reg1.24<1>:UD vme_wb.28<0,1,0>:UD {align1}; + +mov (1) msg_reg1.28<1>:UD obw_m0.8<0,1,0>:UD {align1}; + +/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +/* IME search */ +mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */ +mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */ + +mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1}; + +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; + +mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ; +/* the Max MV number is passed by constant buffer */ +mov (1) vme_m1.4<1>:UB r4.28<0,1,0>:UB {align1}; +mov (1) vme_m1.8<1>:UD START_CENTER + SEARCH_PATH_LEN:UD {align1}; +/* Set the MV cost center */ +mov (1) vme_m1.16<1>:ud mv_cc_ref.0<0,1,0>:ud {align1}; +mov (1) vme_m1.20<1>:ud mv_cc_ref.0<0,1,0>:ud {align1}; + +mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1}; +/* M3/M4 search path */ + +mov (1) vme_msg_3.0<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_3.4<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_3.8<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_3.12<1>:UD 0x100F0F0F:UD {align1}; +mov (1) vme_msg_3.16<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_3.20<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_3.24<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_3.28<1>:UD 0x100F0F0F:UD {align1}; + +mov (1) vme_msg_4.0<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_4.4<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_4.8<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_4.12<1>:UD 0x000F0F0F:UD {align1}; + +mov (4) vme_msg_4.16<1>:UD 0x0:UD {align1}; + +send (8) + vme_msg_ind + vme_wb<1>:UD + null + vme( + BIND_IDX_VME, + 0, + 0, + VME_IME_MESSAGE_TYPE + ) + mlen ime_vme_msg_length + rlen vme_wb_length {align1}; + +/* Set Macroblock-shape/mode for FBR */ + +mov (1) vme_m2.20<1>:UD 0x0:UD {align1}; +mov (1) vme_m2.21<1>:UB vme_wb.25<0,1,0>:UB {align1}; +mov (1) vme_m2.22<1>:UB vme_wb.26<0,1,0>:UB {align1}; + +and (1) tmp_reg0.0<1>:UW vme_wb.0<0,1,0>:UW 0x03:UW {align1}; +mov (1) vme_m2.20<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; + +/* Send FBR message into CRE */ + +mov (8) vme_msg_3.0<1>:UD vme_wb1.0<8,8,1>:UD {align1}; +mov (8) vme_msg_4.0<1>:ud vme_wb2.0<8,8,1>:ud {align1}; +mov (8) vme_msg_5.0<1>:ud vme_wb3.0<8,8,1>:ud {align1}; +mov (8) vme_msg_6.0<1>:ud vme_wb4.0<8,8,1>:ud {align1}; + +mov (1) vme_m0.12<1>:UD INTER_SAD_HAAR + SUB_PEL_MODE_HALF + FBR_BME_DISABLE:UD {align1}; /* 16x16 Source, 1/2 pixel, harr, BME disable */ + +/* Bilinear filter */ +mov (1) tmp_reg0.0<1>:uw 0x04:uw {align1}; +add (1) vme_m1.30<1>:ub vme_m1.30<0,1,0>:ub tmp_reg0.0<0,1,0>:ub {align1}; + +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; +mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +mov (8) vme_msg_2.0<1>:UD vme_m2.0<8,8,1>:UD {align1}; + +/* after verification it will be passed by using payload */ +send (8) + vme_msg_ind + vme_wb<1>:UD + null + cre( + BIND_IDX_VME, + VME_FBR_MESSAGE_TYPE + ) + mlen fbr_vme_msg_length + rlen vme_wb_length + {align1}; + +and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1}; +(-f0.0) jmpi (1) vme_run_again; +nop; +vme_mv_output: + +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x02:UD {align1}; +mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; +/* write FME info */ +mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1}; + +mov (1) msg_reg1.4<1>:UD vme_wb.24<0,1,0>:UD {align1}; +/* Inter distortion of FME */ +mov (1) msg_reg1.8<1>:UD vme_wb.8<0,1,0>:UD {align1}; + +mov (1) msg_reg1.12<1>:UD vme_m2.20<0,1,0>:UD {align1}; + +/* bind index 3, write oword (16bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_0, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + + +/* Write FME/BME MV */ +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x01:UD {align1}; +mov (8) msg_reg0.0<1>:UD obw_m0.0<8,8,1>:UD {align1}; + + +mov (8) msg_reg1.0<1>:UD vme_wb1.0<8,8,1>:UD {align1}; +mov (8) msg_reg2.0<1>:ud vme_wb2.0<8,8,1>:ud {align1}; +mov (8) msg_reg3.0<1>:ud vme_wb3.0<8,8,1>:ud {align1}; +mov (8) msg_reg4.0<1>:ud vme_wb4.0<8,8,1>:ud {align1}; +/* bind index 3, write 8 oword (128 bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +/* Write FME/BME RefID */ +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x08:UD {align1}; +mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; + +mov (8) msg_reg1.0<1>:UD vme_wb6.0<8,8,1>:UD {align1}; + +/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + + +/* Issue message fence so that the previous write message is committed */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_FENCE, + OBR_MF_COMMIT, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; + +__EXIT: +/* + * kill thread + */ +mov (8) ts_msg_reg0<1>:UD r0<8,8,1>:UD {align1}; +send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT}; + + nop ; + nop ; + +word_imedian: + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1}; + (f0.0) jmpi (1) cmp_a_ge_b; + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1}; + (f0.0) jmpi (1) cmp_end; + cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1}; + jmpi (1) cmp_end; +cmp_a_ge_b: + cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1}; + (f0.0) jmpi (1) cmp_end; + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1}; +cmp_end: + RETURN {align1}; + +nop; +nop; + +ref_boundary_check: + +/* The left/up coordinate of reference window */ +add (2) TEMP_VAR0.0<1>:w INPUT_ARG0.8<2,2,1>:w INPUT_ARG0.0<2,2,1>:w {align1}; +/* The right/bottom coordinate of reference window */ +add (1) TEMP_VAR0.16<1>:w TEMP_VAR0.0<0,1,0>:w 48:w {align1}; +add (1) TEMP_VAR0.18<1>:w TEMP_VAR0.2<0,1,0>:w 40:w {align1}; + +/* Firstly the MV range is checked */ +mul (2) TEMP_VAR1.16<1>:w INPUT_ARG1.16<2,2,1>:w -1:w {align1}; +add (2) TEMP_VAR1.0<1>:w INPUT_ARG0.8<2,2,1>:w TEMP_VAR1.16<2,2,1>:w {align1}; +add (2) TEMP_VAR1.4<1>:w INPUT_ARG0.8<2,2,1>:w INPUT_ARG1.16<2,2,1>:w {align1}; + +cmp.l.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w TEMP_VAR1.0<0,1,0>:w {align1}; +(f0.0) mov (1) TEMP_VAR0.0<1>:w TEMP_VAR1.0<0,1,0>:w {align1}; +cmp.g.f0.0 (1) null:w TEMP_VAR0.16<0,1,0>:w TEMP_VAR1.4<0,1,0>:w {align1}; +(f0.0) add (1) TEMP_VAR0.0<1>:w TEMP_VAR1.4<0,1,0>:w -48:w {align1}; +cmp.l.f0.0 (1) null:w TEMP_VAR0.2<0,1,0>:w TEMP_VAR1.2<0,1,0>:w {align1}; +(f0.0) mov (1) TEMP_VAR0.2<1>:w TEMP_VAR1.2<0,1,0>:w {align1}; +cmp.g.f0.0 (1) null:w TEMP_VAR0.18<0,1,0>:w TEMP_VAR1.6<0,1,0>:w {align1}; +(f0.0) add (1) TEMP_VAR0.2<1>:w TEMP_VAR1.6<0,1,0>:w -40:w {align1}; + +x_left_cmp: + cmp.l.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w 0:w {align1}; + (-f0.0) jmpi (1) x_right_cmp; + (f0.0) mov (1) TEMP_VAR0.0<1>:w 0:w {align1}; + jmpi (1) y_top_cmp; +x_right_cmp: + cmp.g.f0.0 (1) null:w TEMP_VAR0.16<0,1,0>:w INPUT_ARG1.0<0,1,0>:w {align1}; + (-f0.0) jmpi (1) y_top_cmp; + (f0.0) add (1) TEMP_VAR0.0<1>:w INPUT_ARG1.0<0,1,0>:w -48:w {align1}; +y_top_cmp: + cmp.l.f0.0 (1) null:w TEMP_VAR0.2<0,1,0>:w 0:w {align1}; + (-f0.0) jmpi (1) y_bottom_cmp; + (f0.0) mov (1) TEMP_VAR0.2<1>:w 0:w {align1}; + jmpi (1) y_bottom_end; +y_bottom_cmp: + cmp.g.f0.0 (1) null:w TEMP_VAR0.18<0,1,0>:w INPUT_ARG1.2<0,1,0>:w {align1}; + (f0.0) add (1) TEMP_VAR0.2<1>:w INPUT_ARG1.2<0,1,0>:w -40:w {align1}; + +y_bottom_end: +mul (2) TEMP_VAR1.0<1>:w INPUT_ARG0.8<2,2,1>:w -1:w {align1}; +add (2) RET_ARG<1>:w TEMP_VAR0.0<2,2,1>:w TEMP_VAR1.0<2,2,1>:w {align1}; + RETURN {align1}; +nop; +nop; + +vme_run_again: + +asr (2) mb_ref_win.0<1>:w mb_mvp_ref.0<2,2,1>:w 2:w {align1}; +mov (2) tmp_reg0.0<1>:w mb_ref_win.0<2,2,1>:w {align1}; +add (2) mb_ref_win.8<1>:w mb_ref_win.0<2,2,1>:w 3:w {align1}; +and (2) mb_ref_win.16<1>:uw mb_ref_win.8<2,2,1>:uw 0xFFFC:uw {align1}; + +cmp.l.f0.0 (1) null:w tmp_reg0.0<0,1,0>:w 0:w {align1}; +(f0.0) mul (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w -1:w {align1}; +cmp.l.f0.0 (1) null:w tmp_reg0.2<0,1,0>:w 0:w {align1}; +(f0.0) mul (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1}; + +cmp.ge.f0.0 (1) null:w tmp_reg0.0<0,1,0>:w 4:w {align1}; +(f0.0) jmpi (1) vme_start; +cmp.ge.f0.0 (1) null:w tmp_reg0.2<0,1,0>:w 4:w {align1}; +(f0.0) jmpi (1) vme_start; + +jmpi (1) vme_done; + +vme_start: + mov (8) tmp_vme_wb0.0<1>:ud vme_wb0.0<8,8,1>:ud {align1}; + mov (8) tmp_vme_wb1.0<1>:ud vme_wb1.0<8,8,1>:ud {align1}; + +/* Calibrate the ref window for MPEG2 */ +mov (1) vme_m0.0<1>:W -16:W {align1}; +mov (1) vme_m0.2<1>:W -12:W {align1}; +mov (4) INPUT_ARG0.0<1>:ud vme_m0.0<4,4,1>:ud {align1}; +add (2) INPUT_ARG0.0<1>:w INPUT_ARG0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1}; +mov (8) INPUT_ARG1.0<1>:ud pic_ref.0<8,8,1>:ud {align1}; + +SAVE_RET {align1}; +jmpi (1) ref_boundary_check; +mov (2) vme_m0.0<1>:w RET_ARG<2,2,1>:w {align1}; + +/* IME search */ +mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */ +mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */ + +mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1}; + +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; + +mov (8) vme_m1.0<1>:ud 0x0:UD {align1}; + +mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ; +/* the Max MV number is passed by constant buffer */ +mov (1) vme_m1.4<1>:UB r4.28<0,1,0>:UB {align1}; +mov (1) vme_m1.8<1>:UD START_CENTER + SEARCH_PATH_LEN:UD {align1}; +/* Set the MV cost center */ +mov (1) vme_m1.16<1>:ud mv_cc_ref.0<0,1,0>:ud {align1}; +mov (1) vme_m1.20<1>:ud mv_cc_ref.0<0,1,0>:ud {align1}; +mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1}; +/* M3/M4 search path */ + +mov (1) vme_msg_3.0<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_3.4<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_3.8<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_3.12<1>:UD 0x100F0F0F:UD {align1}; +mov (1) vme_msg_3.16<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_3.20<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_3.24<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_3.28<1>:UD 0x100F0F0F:UD {align1}; + +mov (1) vme_msg_4.0<1>:UD 0x01010101:UD {align1}; +mov (1) vme_msg_4.4<1>:UD 0x10010101:UD {align1}; +mov (1) vme_msg_4.8<1>:UD 0x0F0F0F0F:UD {align1}; +mov (1) vme_msg_4.12<1>:UD 0x000F0F0F:UD {align1}; + +mov (4) vme_msg_4.16<1>:UD 0x0:UD {align1}; + +send (8) + vme_msg_ind + vme_wb<1>:UD + null + vme( + BIND_IDX_VME, + 0, + 0, + VME_IME_MESSAGE_TYPE + ) + mlen ime_vme_msg_length + rlen vme_wb_length {align1}; + +/* Set Macroblock-shape/mode for FBR */ + +mov (1) vme_m2.20<1>:UD 0x0:UD {align1}; +mov (1) vme_m2.21<1>:UB vme_wb.25<0,1,0>:UB {align1}; +mov (1) vme_m2.22<1>:UB vme_wb.26<0,1,0>:UB {align1}; + +and (1) tmp_reg0.0<1>:UW vme_wb.0<0,1,0>:UW 0x03:UW {align1}; +mov (1) vme_m2.20<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; + +/* Send FBR message into CRE */ + +mov (8) vme_msg_3.0<1>:UD vme_wb1.0<8,8,1>:UD {align1}; +mov (8) vme_msg_4.0<1>:ud vme_wb2.0<8,8,1>:ud {align1}; +mov (8) vme_msg_5.0<1>:ud vme_wb3.0<8,8,1>:ud {align1}; +mov (8) vme_msg_6.0<1>:ud vme_wb4.0<8,8,1>:ud {align1}; + +mov (1) vme_m0.12<1>:UD INTER_SAD_HAAR + SUB_PEL_MODE_HALF + FBR_BME_DISABLE:UD {align1}; /* 16x16 Source, 1/2 pixel, harr, BME disable */ + +/* Bilinear filter */ +mov (1) tmp_reg0.0<1>:uw 0x04:uw {align1}; +add (1) vme_m1.30<1>:ub vme_m1.30<0,1,0>:ub tmp_reg0.0<0,1,0>:ub {align1}; + +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; +mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; + +mov (8) vme_msg_2.0<1>:UD vme_m2.0<8,8,1>:UD {align1}; + +/* after verification it will be passed by using payload */ +send (8) + vme_msg_ind + vme_wb<1>:UD + null + cre( + BIND_IDX_VME, + VME_FBR_MESSAGE_TYPE + ) + mlen fbr_vme_msg_length + rlen vme_wb_length + {align1}; + +cmp.l.f0.0 (1) null:uw vme_wb0.8<0,1,0>:uw tmp_vme_wb0.8<0,1,0>:uw {align1}; +(f0.0) jmpi (1) vme_done; +mov (8) vme_wb0.0<1>:ud tmp_vme_wb0.0<8,8,1>:ud {align1}; +mov (8) vme_wb1.0<1>:ud tmp_vme_wb1.0<8,8,1>:ud {align1}; + +vme_done: + jmpi (1) vme_mv_output; +nop; +nop; +nop; + diff --git a/src/shaders/vme/mpeg2_inter_frame_haswell.g75a b/src/shaders/vme/mpeg2_inter_haswell.g75a index 662c76f..355812c 100644 --- a/src/shaders/vme/mpeg2_inter_frame_haswell.g75a +++ b/src/shaders/vme/mpeg2_inter_haswell.g75a @@ -1,3 +1,3 @@ #include "vme75.inc" #include "vme75_mpeg2.inc" -#include "inter_frame_haswell.asm" +#include "mpeg2_inter_haswell.asm" diff --git a/src/shaders/vme/mpeg2_inter_frame_haswell.g75b b/src/shaders/vme/mpeg2_inter_haswell.g75b index 35175c7..25c629d 100644 --- a/src/shaders/vme/mpeg2_inter_frame_haswell.g75b +++ b/src/shaders/vme/mpeg2_inter_haswell.g75b @@ -2,44 +2,21 @@ { 0x00800001, 0x24400061, 0x00000000, 0x00000000 }, { 0x00800001, 0x24800061, 0x00000000, 0x00000000 }, { 0x00800001, 0x24c00061, 0x00000000, 0x00000000 }, - { 0x00200009, 0x24002e25, 0x004500a0, 0x00040004 }, - { 0x00000040, 0x24003ca5, 0x00000400, 0xfff8fff8 }, - { 0x00000040, 0x24043ca5, 0x00000404, 0xffffffff }, - { 0x00000001, 0x240800e1, 0x00000000, 0x0000001f }, - { 0x00000001, 0x24140231, 0x00000014, 0x00000000 }, - { 0x00200009, 0x24202e25, 0x004500a0, 0x00040004 }, - { 0x00000040, 0x24203ca5, 0x00000420, 0xfffcfffc }, - { 0x00000001, 0x242800e1, 0x00000000, 0x000f0003 }, - { 0x00000001, 0x24340231, 0x00000014, 0x00000000 }, { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 }, { 0x00000001, 0x24540231, 0x00000014, 0x00000000 }, { 0x00000041, 0x24884521, 0x000000a2, 0x000000a1 }, { 0x00000040, 0x24884421, 0x00000488, 0x000000a0 }, { 0x00000041, 0x24880c21, 0x00000488, 0x00000018 }, { 0x00000001, 0x24940231, 0x00000014, 0x00000000 }, - { 0x00600001, 0x28000021, 0x008d0400, 0x00000000 }, - { 0x04600031, 0x23801cb1, 0x00000800, 0x02190004 }, - { 0x00600001, 0x28000021, 0x008d0420, 0x00000000 }, - { 0x04600031, 0x23a01cb1, 0x00000800, 0x02290004 }, - { 0x00200009, 0x24002e25, 0x004500a0, 0x00030003 }, - { 0x00000041, 0x24003ca5, 0x00000400, 0x00020002 }, - { 0x00000040, 0x24003ca5, 0x00000400, 0xfff8fff8 }, - { 0x00000040, 0x24043ca5, 0x00000404, 0xffffffff }, - { 0x00600001, 0x28000021, 0x008d0400, 0x00000000 }, - { 0x04600031, 0x26001cb1, 0x00000800, 0x02190006 }, - { 0x00200009, 0x24202e25, 0x004500a0, 0x00030003 }, - { 0x00000041, 0x24203ca5, 0x00000420, 0x00020002 }, - { 0x00000040, 0x24203ca5, 0x00000420, 0xfffcfffc }, - { 0x00000001, 0x242800e1, 0x00000000, 0x00070003 }, - { 0x00600001, 0x28000021, 0x008d0420, 0x00000000 }, - { 0x04600031, 0x26201cb1, 0x00000800, 0x02190006 }, + { 0x00200009, 0x2a402d29, 0x00450098, 0x00040004 }, + { 0x00200001, 0x2a500129, 0x00450094, 0x00000000 }, { 0x00600001, 0x2ac00061, 0x00000000, 0x00000000 }, { 0x00600001, 0x2a800061, 0x00000000, 0x00000000 }, - { 0x01000005, 0x20002d28, 0x000000a6, 0x00040004 }, - { 0x00010020, 0x34001c00, 0x00001400, 0x00000740 }, { 0x00600001, 0x2ae00061, 0x00000000, 0x00000000 }, { 0x00600001, 0x2b000061, 0x00000000, 0x00000000 }, { 0x00600001, 0x2b200061, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002d28, 0x000000a6, 0x00040004 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000710 }, { 0x00600001, 0x2b400061, 0x00000000, 0x00000000 }, { 0x01000005, 0x20002e28, 0x000000a5, 0x00600060 }, { 0x00210001, 0x2af401ed, 0x00000000, 0xffffffff }, @@ -56,8 +33,8 @@ { 0x00210001, 0x2af401ed, 0x00000000, 0xffffffff }, { 0x00010020, 0x34001c00, 0x00001400, 0x00000040 }, { 0x00000040, 0x2b480c21, 0x00000b48, 0x00000003 }, - { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02480403 }, - { 0x00200001, 0x2ae40021, 0x00450bc8, 0x00000000 }, + { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02180203 }, + { 0x00200001, 0x2ae40021, 0x00450ba0, 0x00000000 }, { 0x00000001, 0x2af001ed, 0x00000000, 0x00010001 }, { 0x00600001, 0x2b400061, 0x00000000, 0x00000000 }, { 0x01000005, 0x20002e28, 0x000000a5, 0x00100010 }, @@ -76,8 +53,8 @@ { 0x00210001, 0x2b1401ed, 0x00000000, 0xffffffff }, { 0x00010020, 0x34001c00, 0x00001400, 0x00000040 }, { 0x00000040, 0x2b480c21, 0x00000b48, 0x00000003 }, - { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02480403 }, - { 0x00200001, 0x2b040021, 0x00450bf0, 0x00000000 }, + { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02180203 }, + { 0x00200001, 0x2b040021, 0x00450ba0, 0x00000000 }, { 0x00000001, 0x2b1001ed, 0x00000000, 0x00010001 }, { 0x00600001, 0x2b400061, 0x00000000, 0x00000000 }, { 0x01000005, 0x20002e28, 0x000000a5, 0x00080008 }, @@ -95,8 +72,8 @@ { 0x00210001, 0x2b3401ed, 0x00000000, 0xffffffff }, { 0x00010020, 0x34001c00, 0x00001400, 0x00000170 }, { 0x00000040, 0x2b480c21, 0x00000b48, 0x00000003 }, - { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02480403 }, - { 0x00200001, 0x2b240021, 0x00450bf0, 0x00000000 }, + { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02180203 }, + { 0x00200001, 0x2b240021, 0x00450ba0, 0x00000000 }, { 0x00000001, 0x2b3001ed, 0x00000000, 0x00010001 }, { 0x00000020, 0x34001c00, 0x00001400, 0x00000120 }, { 0x00600001, 0x2b400061, 0x00000000, 0x00000000 }, @@ -114,13 +91,13 @@ { 0x00210001, 0x2b3401ed, 0x00000000, 0xffffffff }, { 0x00010020, 0x34001c00, 0x00001400, 0x00000040 }, { 0x00000040, 0x2b480c21, 0x00000b48, 0x00000003 }, - { 0x0a800031, 0x2ba01cb1, 0x00000b40, 0x02480403 }, - { 0x00200001, 0x2b240021, 0x00450c18, 0x00000000 }, + { 0x0a800031, 0x2ba01cb1, 0x00000b40, 0x02180203 }, + { 0x00200001, 0x2b240021, 0x00450ba0, 0x00000000 }, { 0x00000001, 0x2b3001ed, 0x00000000, 0x00010001 }, { 0x00000040, 0x240014a5, 0x00000b00, 0x00000b20 }, { 0x01000010, 0x20001ca4, 0x00000400, 0x00000000 }, { 0x00110020, 0x34001c00, 0x00001400, 0x00000080 }, - { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000001 }, + { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000000 }, { 0x00010001, 0x2b040021, 0x00000ae4, 0x00000000 }, { 0x00010001, 0x2b240021, 0x00000ae4, 0x00000000 }, { 0x00010001, 0x2b140129, 0x00000af4, 0x00000000 }, @@ -145,38 +122,43 @@ { 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x00000850 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000760 }, { 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 }, { 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 }, { 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000007f0 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000700 }, { 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 }, - { 0x0020000c, 0x2a803dad, 0x00450ac0, 0x00020002 }, - { 0x00200040, 0x2a883dad, 0x00450a80, 0x00030003 }, - { 0x00200005, 0x2a902d29, 0x00450a88, 0xfffcfffc }, + { 0x00200001, 0x2a2001ad, 0x00450ae4, 0x00000000 }, + { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 }, + { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 }, + { 0x00000001, 0x2fa00021, 0x00000440, 0x00000000 }, + { 0x00000001, 0x2fa80021, 0x00000448, 0x00000000 }, + { 0x00600001, 0x2fc00021, 0x008d0a40, 0x00000000 }, + { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000790 }, + { 0x00200001, 0x244001ad, 0x00450fe4, 0x00000000 }, { 0x00600001, 0x25600021, 0x008d0020, 0x00000000 }, { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 }, { 0x00600001, 0x28600061, 0x00000000, 0x00000000 }, - { 0x00000001, 0x23800061, 0x00000000, 0x00000000 }, - { 0x00000005, 0x23840c21, 0x00000384, 0xff000000 }, - { 0x00600001, 0x28800021, 0x008d0380, 0x00000000 }, + { 0x00600001, 0x28800061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x24000169, 0x00000000, 0x00010001 }, + { 0x00000001, 0x28850231, 0x00000400, 0x00000000 }, + { 0x00000001, 0x24000169, 0x00000000, 0x00bb00bb }, + { 0x00000001, 0x28840231, 0x00000400, 0x00000000 }, { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 }, - { 0x00800001, 0x28a00231, 0x00cf03a3, 0x00000000 }, { 0x00000001, 0x28b00061, 0x00000000, 0x11111111 }, { 0x00000001, 0x28bc0061, 0x00000000, 0x00010101 }, - { 0x00000001, 0x28b40129, 0x00000606, 0x00000000 }, - { 0x00400001, 0x28d00021, 0x00690608, 0x00000000 }, - { 0x00600001, 0x28c00129, 0x00ae0622, 0x00000000 }, + { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 }, { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 }, - { 0x00000001, 0x24000169, 0x00000000, 0x00010001 }, - { 0x00000001, 0x28850231, 0x00000400, 0x00000000 }, + { 0x00600001, 0x24600061, 0x00000000, 0x00000000 }, { 0x00000001, 0x247c0169, 0x00000000, 0x00000000 }, - { 0x01000005, 0x20002e28, 0x000000a4, 0x00010001 }, - { 0x00010001, 0x247c00f1, 0x00000000, 0x00000002 }, + { 0x00000001, 0x24000169, 0x00000000, 0x00020002 }, + { 0x00000040, 0x24002d29, 0x00000400, 0x00040004 }, + { 0x00000001, 0x247c0231, 0x00000400, 0x00000000 }, { 0x00000001, 0x247d0231, 0x000000a5, 0x00000000 }, - { 0x00000001, 0x24000169, 0x00000000, 0x00200020 }, + { 0x00000001, 0x24000169, 0x00000000, 0x00000000 }, { 0x00000001, 0x247e0231, 0x00000400, 0x00000000 }, { 0x00000001, 0x244c0061, 0x00000000, 0x00800000 }, { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 }, @@ -194,24 +176,13 @@ { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0203 }, { 0x00000001, 0x244c0061, 0x00000000, 0x7e200000 }, { 0x00000001, 0x24560169, 0x00000000, 0x28302830 }, - { 0x00000001, 0x24400021, 0x00000448, 0x00000000 }, - { 0x00000040, 0x24403dad, 0x00000440, 0xfff0fff0 }, - { 0x00000040, 0x24423dad, 0x00000442, 0xfff4fff4 }, - { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 }, - { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 }, { 0x00000001, 0x24440021, 0x00000440, 0x00000000 }, - { 0x01000005, 0x20002e28, 0x000000a5, 0x00600060 }, - { 0x00010040, 0x24403dad, 0x00000440, 0x000c000c }, - { 0x01000005, 0x20002e28, 0x000000a5, 0x00100010 }, - { 0x00010040, 0x24423dad, 0x00000442, 0x00080008 }, - { 0x00200040, 0x244035ad, 0x00450440, 0x00450a90 }, - { 0x00200040, 0x244435ad, 0x00450444, 0x00450a90 }, { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 }, { 0x00000001, 0x24600061, 0x00000000, 0x00000002 }, { 0x00000001, 0x24640231, 0x0000009c, 0x00000000 }, { 0x00000001, 0x24680061, 0x00000000, 0x30003030 }, - { 0x00000001, 0x24700021, 0x00000ac0, 0x00000000 }, - { 0x00000001, 0x24740021, 0x00000ac0, 0x00000000 }, + { 0x00000001, 0x24700021, 0x00000a20, 0x00000000 }, + { 0x00000001, 0x24740021, 0x00000a20, 0x00000000 }, { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 }, { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 }, { 0x00000001, 0x28600061, 0x00000000, 0x01010101 }, @@ -237,11 +208,16 @@ { 0x00600001, 0x28800021, 0x008d01c0, 0x00000000 }, { 0x00600001, 0x28a00021, 0x008d01e0, 0x00000000 }, { 0x00600001, 0x28c00021, 0x008d0200, 0x00000000 }, - { 0x00000001, 0x244c0061, 0x00000000, 0x00243000 }, + { 0x00000001, 0x244c0061, 0x00000000, 0x00241000 }, + { 0x00000001, 0x24000169, 0x00000000, 0x00040004 }, + { 0x00000040, 0x247e4631, 0x0000047e, 0x00000400 }, { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 }, { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 }, { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 }, { 0x0d600031, 0x21801ca1, 0x00000800, 0x0e786000 }, + { 0x01000005, 0x20002d28, 0x000000a6, 0x00040004 }, + { 0x00110020, 0x34001c00, 0x00001400, 0x000004a0 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, { 0x00000040, 0x24880c21, 0x00000488, 0x00000002 }, { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 }, { 0x00000001, 0x28200021, 0x00000180, 0x00000000 }, @@ -255,7 +231,7 @@ { 0x00600001, 0x28400021, 0x008d01c0, 0x00000000 }, { 0x00600001, 0x28600021, 0x008d01e0, 0x00000000 }, { 0x00600001, 0x28800021, 0x008d0200, 0x00000000 }, - { 0x0a800031, 0x20001cac, 0x00000800, 0x0a0a0403 }, + { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0203 }, { 0x00000040, 0x24880c21, 0x00000488, 0x00000008 }, { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 }, { 0x00600001, 0x28200021, 0x008d0240, 0x00000000 }, @@ -265,20 +241,6 @@ { 0x07800031, 0x24001ca8, 0x00000e00, 0x82000010 }, { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, - { 0x06000010, 0x200035ac, 0x00000fa0, 0x00000fa4 }, - { 0x00010001, 0x2f6001ad, 0x00000fa0, 0x00000000 }, - { 0x00110001, 0x2f6001ad, 0x00000fa4, 0x00000000 }, - { 0x06000010, 0x200035ac, 0x00000f60, 0x00000fa8 }, - { 0x00010001, 0x2fe401ad, 0x00000f60, 0x00000000 }, - { 0x00110001, 0x2fe401ad, 0x00000fa8, 0x00000000 }, - { 0x00000001, 0x34000020, 0x00000fe0, 0x00000000 }, - { 0x04000010, 0x200035ac, 0x00000fa0, 0x00000fa4 }, - { 0x00010001, 0x2f6001ad, 0x00000fa0, 0x00000000 }, - { 0x00110001, 0x2f6001ad, 0x00000fa4, 0x00000000 }, - { 0x04000010, 0x200035ac, 0x00000f60, 0x00000fa8 }, - { 0x00010001, 0x2fe401ad, 0x00000f60, 0x00000000 }, - { 0x00110001, 0x2fe401ad, 0x00000fa8, 0x00000000 }, - { 0x00000001, 0x34000020, 0x00000fe0, 0x00000000 }, { 0x04000010, 0x200035ac, 0x00000fa0, 0x00000fa4 }, { 0x00010020, 0x34001c00, 0x00001400, 0x00000070 }, { 0x04000010, 0x200035ac, 0x00000fa0, 0x00000fa8 }, @@ -295,3 +257,110 @@ { 0x00010001, 0x2fe401ad, 0x00000fa8, 0x00000000 }, { 0x00110001, 0x2fe401ad, 0x00000fa0, 0x00000000 }, { 0x00000001, 0x34000020, 0x00000fe0, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00200040, 0x2f6035ad, 0x00450fa8, 0x00450fa0 }, + { 0x00000040, 0x2f703dad, 0x00000f60, 0x00300030 }, + { 0x00000040, 0x2f723dad, 0x00000f62, 0x00280028 }, + { 0x00200041, 0x2f903dad, 0x00450fd0, 0xffffffff }, + { 0x00200040, 0x2f8035ad, 0x00450fa8, 0x00450f90 }, + { 0x00200040, 0x2f8435ad, 0x00450fa8, 0x00450fd0 }, + { 0x05000010, 0x200035ac, 0x00000f60, 0x00000f80 }, + { 0x00010001, 0x2f6001ad, 0x00000f80, 0x00000000 }, + { 0x03000010, 0x200035ac, 0x00000f70, 0x00000f84 }, + { 0x00010040, 0x2f603dad, 0x00000f84, 0xffd0ffd0 }, + { 0x05000010, 0x200035ac, 0x00000f62, 0x00000f82 }, + { 0x00010001, 0x2f6201ad, 0x00000f82, 0x00000000 }, + { 0x03000010, 0x200035ac, 0x00000f72, 0x00000f86 }, + { 0x00010040, 0x2f623dad, 0x00000f86, 0xffd8ffd8 }, + { 0x05000010, 0x20003dac, 0x00000f60, 0x00000000 }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000020 }, + { 0x00010001, 0x2f6001ed, 0x00000000, 0x00000000 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000030 }, + { 0x03000010, 0x200035ac, 0x00000f70, 0x00000fc0 }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000010 }, + { 0x00010040, 0x2f603dad, 0x00000fc0, 0xffd0ffd0 }, + { 0x05000010, 0x20003dac, 0x00000f62, 0x00000000 }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000020 }, + { 0x00010001, 0x2f6201ed, 0x00000000, 0x00000000 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000020 }, + { 0x03000010, 0x200035ac, 0x00000f72, 0x00000fc2 }, + { 0x00010040, 0x2f623dad, 0x00000fc2, 0xffd8ffd8 }, + { 0x00200041, 0x2f803dad, 0x00450fa8, 0xffffffff }, + { 0x00200040, 0x2fe435ad, 0x00450f60, 0x00450f80 }, + { 0x00000001, 0x34000020, 0x00000fe0, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0020000c, 0x2a803dad, 0x00450ac0, 0x00020002 }, + { 0x00200001, 0x240001ad, 0x00450a80, 0x00000000 }, + { 0x00200040, 0x2a883dad, 0x00450a80, 0x00030003 }, + { 0x00200005, 0x2a902d29, 0x00450a88, 0xfffcfffc }, + { 0x05000010, 0x20003dac, 0x00000400, 0x00000000 }, + { 0x00010041, 0x24003dad, 0x00000400, 0xffffffff }, + { 0x05000010, 0x20003dac, 0x00000402, 0x00000000 }, + { 0x00010041, 0x24023dad, 0x00000402, 0xffffffff }, + { 0x04000010, 0x20003dac, 0x00000400, 0x00040004 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000030 }, + { 0x04000010, 0x20003dac, 0x00000402, 0x00040004 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000010 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000380 }, + { 0x00600001, 0x2c800021, 0x008d0180, 0x00000000 }, + { 0x00600001, 0x2ca00021, 0x008d01a0, 0x00000000 }, + { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 }, + { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 }, + { 0x00400001, 0x2fa00021, 0x00690440, 0x00000000 }, + { 0x00200040, 0x2fa035ad, 0x00450fa0, 0x00450a90 }, + { 0x00600001, 0x2fc00021, 0x008d0a40, 0x00000000 }, + { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, + { 0x00000020, 0x34001c00, 0x00001400, 0xfffffca0 }, + { 0x00200001, 0x244001ad, 0x00450fe4, 0x00000000 }, + { 0x00000001, 0x244c0061, 0x00000000, 0x7e200000 }, + { 0x00000001, 0x24560169, 0x00000000, 0x28302830 }, + { 0x00000001, 0x24440021, 0x00000440, 0x00000000 }, + { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 }, + { 0x00600001, 0x24600061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x24600061, 0x00000000, 0x00000002 }, + { 0x00000001, 0x24640231, 0x0000009c, 0x00000000 }, + { 0x00000001, 0x24680061, 0x00000000, 0x30003030 }, + { 0x00000001, 0x24700021, 0x00000a20, 0x00000000 }, + { 0x00000001, 0x24740021, 0x00000a20, 0x00000000 }, + { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 }, + { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 }, + { 0x00000001, 0x28600061, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28640061, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28680061, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x286c0061, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28700061, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28740061, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28780061, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x287c0061, 0x00000000, 0x100f0f0f }, + { 0x00000001, 0x28800061, 0x00000000, 0x01010101 }, + { 0x00000001, 0x28840061, 0x00000000, 0x10010101 }, + { 0x00000001, 0x28880061, 0x00000000, 0x0f0f0f0f }, + { 0x00000001, 0x288c0061, 0x00000000, 0x000f0f0f }, + { 0x00400001, 0x28900061, 0x00000000, 0x00000000 }, + { 0x08600031, 0x21801ca1, 0x00000800, 0x0a784000 }, + { 0x00000001, 0x25740061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x25750231, 0x00000199, 0x00000000 }, + { 0x00000001, 0x25760231, 0x0000019a, 0x00000000 }, + { 0x00000005, 0x24002d29, 0x00000180, 0x00030003 }, + { 0x00000001, 0x25740231, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28600021, 0x008d01a0, 0x00000000 }, + { 0x00600001, 0x28800021, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x28a00021, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x28c00021, 0x008d0200, 0x00000000 }, + { 0x00000001, 0x244c0061, 0x00000000, 0x00241000 }, + { 0x00000001, 0x24000169, 0x00000000, 0x00040004 }, + { 0x00000040, 0x247e4631, 0x0000047e, 0x00000400 }, + { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 }, + { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 }, + { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 }, + { 0x0d600031, 0x21801ca1, 0x00000800, 0x0e786000 }, + { 0x05000010, 0x20002528, 0x00000188, 0x00000c88 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000020 }, + { 0x00600001, 0x21800021, 0x008d0c80, 0x00000000 }, + { 0x00600001, 0x21a00021, 0x008d0ca0, 0x00000000 }, + { 0x00000020, 0x34001c00, 0x00001400, 0xfffff710 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/src/shaders/vme/mpeg2_inter_ivb.asm b/src/shaders/vme/mpeg2_inter_ivb.asm new file mode 100644 index 0000000..dde9643 --- /dev/null +++ b/src/shaders/vme/mpeg2_inter_ivb.asm @@ -0,0 +1,705 @@ +/* + * Copyright © <2010>, Intel Corporation. + * + * This program is licensed under the terms and conditions of the + * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at + * http://www.opensource.org/licenses/eclipse-1.0.php. + * Authors: Zhao Yakui <yakui.zhao@intel.com> + * + */ +// Modual name: mpeg2_inter_ivb.asm +// +// Make inter predition estimation for Mpeg2 Inter frame on Ivy +// + +// +// Now, begin source code.... +// + +#define SAVE_RET add (1) RETURN_REG<1>:ud ip:ud 32:ud +#define RETURN mov (1) ip:ud RETURN_REG<0,1,0>:ud + +/* + * __START + */ +__INTER_START: +mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1}; +mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1}; +mov (16) tmp_reg4.0<1>:UD 0x0:UD {align1} ; +mov (16) tmp_reg6.0<1>:UD 0x0:UD {align1} ; + +shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ +mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +mul (1) obw_m0.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1}; +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1}; +mul (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD INTER_VME_OUTPUT_IN_OWS:UD {align1}; +mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +shl (2) pic_ref.0<1>:uw r4.24<2,2,1>:uw 4:uw {align1}; +mov (2) pic_ref.16<1>:uw r4.20<2,2,1>:uw {align1}; + +mov (8) mb_mvp_ref.0<1>:ud 0:ud {align1}; +mov (8) mb_ref_win.0<1>:ud 0:ud {align1}; +mov (8) mba_result.0<1>:ud 0x0:ud {align1}; +mov (8) mbb_result.0<1>:ud 0x0:ud {align1}; +mov (8) mbc_result.0<1>:ud 0x0:ud {align1}; +and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1}; +(f0.0) jmpi (1) __mb_hwdep_end; +/* read back the data for MB A */ +/* the layout of MB result is: rx.0(Available). rx.4(MVa), rX.8(MVb), rX.16(Pred_L0 flag), +* rX.18 (Pred_L1 flag), rX.20(Forward reference ID), rX.22(Backwared reference ID) +*/ +mba_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1}; +/* MB A doesn't exist. Zero MV. mba_flag is zero and ref ID = -1 */ +(f0.0) mov (2) mba_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mbb_start; +mov (1) mba_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w -1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD INTER_VME_OUTPUT_IN_OWS:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ +mov (1) mb_msg_tmp.8<1>:ud mb_msg0.8<0,1,0>:ud {align1}; + +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD INTER_VME_OUTPUT_MV_IN_OWS:UD {align1}; +/* bind index 3, read 1 oword (16bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_0, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +and.z.f0.0 (1) null<1>:ud mb_mode_wb.0<0,1,0>:ud INTRAMBFLAG_MASK:ud {align1} ; +(-f0.0) mov (2) mba_result.20<1>:w -1:w {align1}; +(-f0.0) jmpi (1) mbb_start; + +mov (1) mb_msg0.8<1>:UD mb_msg_tmp.8<0,1,0>:ud {align1}; +/* Read MV for MB A */ +/* bind index 3, read 2 oword (16bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_2, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; +/* TODO: RefID is required after multi-references are added */ +/* MV */ +mov (2) mba_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1}; +mov (1) mba_result.16<1>:w MB_PRED_FLAG {align1}; + +mbb_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1}; +/* MB B doesn't exist. Zero MV. mba_flag is zero */ +/* If MB B doesn't exist, neither MB C nor D exists */ +(f0.0) mov (2) mbb_result.20<1>:w -1:w {align1}; +(f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(f0.0) jmpi (1) mb_mvp_start; +mov (1) mbb_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD INTER_VME_OUTPUT_IN_OWS:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ +mov (1) mb_msg_tmp.8<1>:ud mb_msg0.8<0,1,0>:ud {align1}; + +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD INTER_VME_OUTPUT_MV_IN_OWS:UD {align1}; + +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_0, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +and.z.f0.0 (1) null<1>:ud mb_mode_wb.0<0,1,0>:ud INTRAMBFLAG_MASK:ud {align1} ; +(-f0.0) mov (2) mbb_result.20<1>:w -1:w {align1}; +(-f0.0) jmpi (1) mbc_start; + +mov (1) mb_msg0.8<1>:UD mb_msg_tmp.8<0,1,0>:ud {align1}; +/* Read MV for MB B */ +/* bind index 3, read 2 oword (16bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_2, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; +/* TODO: RefID is required after multi-references are added */ +mov (2) mbb_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1}; +mov (1) mbb_result.16<1>:w MB_PRED_FLAG {align1}; + +mbc_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_C:uw {align1}; +/* MB C doesn't exist. Zero MV. mba_flag is zero */ +/* Based on h264 spec the MB D will be replaced if MB C doesn't exist */ +(f0.0) jmpi (1) mbd_start; +mov (1) mbc_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1}; +add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD INTER_VME_OUTPUT_IN_OWS:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +mov (1) mb_msg_tmp.8<1>:ud mb_msg0.8<0,1,0>:ud {align1}; + +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD INTER_VME_OUTPUT_MV_IN_OWS:UD {align1}; +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_0, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; + +/* TODO: RefID is required after multi-references are added */ +and.z.f0.0 (1) null<1>:ud mb_mode_wb.0<0,1,0>:ud INTRAMBFLAG_MASK:ud {align1} ; +(-f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(-f0.0) jmpi (1) mb_mvp_start; +mov (1) mb_msg0.8<1>:UD mb_msg_tmp.8<0,1,0>:ud {align1}; +/* Read MV for MB C */ +/* bind index 3, read 2 oword (16bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_2, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; +/* TODO: RefID is required after multi-references are added */ +/* Forward MV */ +mov (2) mbc_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1}; +mov (1) mbc_result.16<1>:w MB_PRED_FLAG {align1}; + +jmpi (1) mb_mvp_start; +mbd_start: +mov (8) mb_msg0.0<1>:ud 0:ud {align1}; +and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_D:uw {align1}; +(f0.0) jmpi (1) mb_mvp_start; +mov (1) mbc_result.0<1>:d MB_AVAIL {align1}; +mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; +add (2) tmp_reg0.0<1>:w tmp_reg0.0<2,2,1>:w -1:w {align1}; +mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; + +mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD INTER_VME_OUTPUT_IN_OWS:UD {align1}; +mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ +mov (1) mb_msg_tmp.8<1>:ud mb_msg0.8<0,1,0>:ud {align1}; + +add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD INTER_VME_OUTPUT_MV_IN_OWS:UD {align1}; +/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_0, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; + +and.z.f0.0 (1) null<1>:ud mb_mode_wb.0<0,1,0>:ud INTRAMBFLAG_MASK:ud {align1} ; +(-f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; +(-f0.0) jmpi (1) mb_mvp_start; + +mov (1) mb_msg0.8<1>:UD mb_msg_tmp.8<0,1,0>:ud {align1}; +/* Read MV for MB D */ +/* bind index 3, read 2 oword (16bytes), msg type: 0(OWord Block Read) */ +send (16) + mb_ind + mb_mv0.0<1>:ub + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_2, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; + +/* TODO: RefID is required after multi-references are added */ + +/* Forward MV */ +mov (2) mbc_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1}; +mov (1) mbc_result.16<1>:w MB_PRED_FLAG {align1}; + +mb_mvp_start: +/*TODO: Add the skip prediction */ +/* Check whether both MB B and C are invailable */ +add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1}; +cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1}; +(-f0.0) jmpi (1) mb_median_start; +cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 0:d {align1}; +(f0.0) mov (1) mbb_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; +(f0.0) mov (1) mbc_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; +(f0.0) mov (1) mbb_result.20<1>:uw mba_result.20<0,1,0>:uw {align1}; +(f0.0) mov (1) mbc_result.20<1>:uw mba_result.20<0,1,0>:uw {align1}; +(f0.0) mov (1) mb_mvp_ref.0<1>:ud mba_result.4<0,1,0>:ud {align1}; +(-f0.0) mov (1) mb_mvp_ref.0<1>:ud 0:ud {align1}; +jmpi (1) __mb_hwdep_end; + +mb_median_start: +/* check whether only one neighbour MB has the same ref ID with the current MB */ +mov (8) tmp_reg0.0<1>:ud 0:ud {align1}; +cmp.z.f0.0 (1) null:d mba_result.20<1>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<1>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mba_result.4<0,1,0>:ud {align1}; +cmp.z.f0.0 (1) null:d mbb_result.20<1>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<1>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mbb_result.4<0,1,0>:ud {align1}; +cmp.z.f0.0 (1) null:d mbc_result.20<1>:w 0:w {align1}; +(f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<1>:w 1:w {align1}; +(f0.0) mov (1) tmp_reg0.4<1>:ud mbc_result.4<0,1,0>:ud {align1}; +cmp.e.f0.0 (1) null:d tmp_reg0.0<1>:w 1:w {align1}; +(f0.0) mov (1) mb_mvp_ref.0<1>:ud tmp_reg0.4<0,1,0>:ud {align1}; +(f0.0) jmpi (1) __mb_hwdep_end; + +mov (1) INPUT_ARG0.0<1>:w mba_result.4<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.4<1>:w mbb_result.4<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.8<1>:w mbc_result.4<0,1,0>:w {align1}; +SAVE_RET {align1}; + jmpi (1) word_imedian; +mov (1) mb_mvp_ref.0<1>:w RET_ARG<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.0<1>:w mba_result.6<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.4<1>:w mbb_result.6<0,1,0>:w {align1}; +mov (1) INPUT_ARG0.8<1>:w mbc_result.6<0,1,0>:w {align1}; +SAVE_RET {align1}; +jmpi (1) word_imedian; +mov (1) mb_mvp_ref.2<1>:w RET_ARG<0,1,0>:w {align1}; + +__mb_hwdep_end: + +mov (2) mv_cc_ref.0<1>:w mba_result.4<2,2,1>:w {align1}; + +/* Calibrate the ref window for MPEG2 */ +mov (1) vme_m0.0<1>:W -16:W {align1}; +mov (1) vme_m0.2<1>:W -12:W {align1}; +mov (1) INPUT_ARG0.0<1>:ud vme_m0.0<0,1,0>:ud {align1}; +mov (1) INPUT_ARG0.8<1>:ud vme_m0.8<0,1,0>:ud {align1}; +mov (8) INPUT_ARG1.0<1>:ud pic_ref.0<8,8,1>:ud {align1}; + +SAVE_RET {align1}; +jmpi (1) ref_boundary_check; +mov (2) vme_m0.0<1>:w RET_ARG<2,2,1>:w {align1}; + +/* m2 */ +mov (8) vme_msg_2<1>:UD 0x0:UD {align1}; + +/* m3 */ +mov (8) vme_msg_3<1>:UD 0x0:UD {align1}; +/* Use the Luma mode */ +mov (1) tmp_reg0.0<1>:UW INTRA16_DC_PRED:UW {align1}; +mov (1) vme_msg_3.4<1>:ub tmp_reg0.0<0,1,0>:UB {align1}; + +/* m4 */ +mov (8) vme_msg_4<1>:UD 0x0:UD {align1}; +mov (1) vme_msg_4.16<1>:UD INTRA_PREDICTORE_MODE {align1}; + + +/* m1 */ +mov (8) vme_m1.0<1>:ud 0x0:ud {align1}; +mov (1) intra_flag<1>:UW 0x0:UW {align1} ; +mov (1) tmp_reg0.0<1>:uw LUMA_INTRA_8x8_DISABLE:uw {align1}; +add (1) tmp_reg0.0<1>:uw tmp_reg0.0<0,1,0>:uw LUMA_INTRA_4x4_DISABLE:uw {align1}; +mov (1) intra_part_mask_ub<1>:UB tmp_reg0.0<0,1,0>:ub {align1}; +/* m1 */ +/* assign MB intra struct from the thread payload*/ +mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1}; + + +/* M0 */ +/* IME search */ +mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR + SUB_PEL_MODE_HALF:UD {align1}; +/* 16x16 Source, 1/2 pixel, harr */ +mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */ + +mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1}; +add (2) vme_m0.0<1>:w vme_m0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1}; +add (2) vme_m0.4<1>:w vme_m0.4<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1}; +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; + +/* m1 */ + +mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ; +/* MV num is passed by constant buffer. R4.28 */ +mov (1) vme_m1.4<1>:UB r4.28<0,1,0>:UB {align1}; +add (1) vme_m1.4<1>:UD vme_m1.4<0,1,0>:UD FB_PRUNING_DISABLE:UD {align1}; +mov (1) vme_m1.8<1>:UD START_CENTER + SEARCH_PATH_LEN:UD {align1}; + +/* Bilinear filter */ +mov (1) tmp_reg0.0<1>:uw 0x04:uw {align1}; +add (1) vme_m1.30<1>:ub vme_m1.30<0,1,0>:ub tmp_reg0.0<0,1,0>:ub {align1}; + +/* Set the MV cost center */ +mov (1) vme_m1.16<1>:ud mv_cc_ref.0<0,1,0>:ud {align1}; +mov (1) vme_m1.20<1>:ud mv_cc_ref.0<0,1,0>:ud {align1}; +mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; + + +send (8) + vme_msg_ind + vme_wb + null + vme( + BIND_IDX_VME, + 0, + 0, + VME_MESSAGE_TYPE_MIXED + ) + mlen vme_msg_length + rlen vme_inter_wb_length + {align1}; + +and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1}; +(-f0.0) jmpi (1) vme_run_again; + +vme_mv_output: + +and.z.f0.0 (1) null<1>:ud vme_wb0.0<0,1,0>:ud INTRAMBFLAG_MASK:ud {align1} ; + +(-f0.0)jmpi (1) __INTRA_INFO ; + +__INTER_INFO: +/* Write MV pairs */ +mov (8) msg_reg0.0<1>:UD obw_m0.0<8,8,1>:UD {align1}; + +mov (8) msg_reg1.0<1>:UD vme_wb1.0<8,8,1>:UD {align1}; + +/* bind index 3, write 8 oword (128 bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + + +mov (1) msg_reg1.0<1>:ud vme_wb0.0<0,1,0>:ud {align1} ; +mov (1) msg_reg1.4<1>:UD vme_wb0.28<0,1,0>:UD {align1}; +mov (1) msg_reg1.8<1>:ud tmp_ud1<0,1,0>:ud {align1} ; +mov (1) msg_reg1.12<1>:ud vme_wb0.0<0,1,0>:ud {align1} ; +mov (1) msg_reg1.16<1>:ud 0x25:ud {align1} ; +jmpi (1) __OUTPUT_INFO; + +__INTRA_INFO: +mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1}; +mov (1) msg_reg1.4<1>:UD vme_wb.16<0,1,0>:UD {align1}; +mov (1) msg_reg1.8<1>:UD vme_wb.20<0,1,0>:UD {align1}; +mov (1) msg_reg1.12<1>:UD vme_wb.24<0,1,0>:UD {align1}; +mov (1) msg_reg1.16<1>:ud 0x35:ud {align1} ; + +__OUTPUT_INFO: + +mov (1) msg_reg1.20<1>:ud obw_m0.8<0,1,0>:ud {align1}; +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD INTER_VME_OUTPUT_MV_IN_OWS:UD {align1}; +mov (8) msg_reg0.0<1>:ud obw_m0.0<8,8,1>:ud {align1}; + + +/* bind index 3, write 1 oword, msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +/* Issue message fence so that the previous write message is committed */ +send (16) + mb_ind + mb_wb.0<1>:ud + NULL + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_FENCE, + OBR_MF_COMMIT, + OBR_BIND_IDX, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; + +__EXIT: +/* + * kill thread + */ +mov (8) ts_msg_reg0<1>:UD r0<8,8,1>:UD {align1}; +send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT}; + + + nop ; + nop ; + +word_imedian: + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1}; + (f0.0) jmpi (1) cmp_a_ge_b; + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1}; + (f0.0) jmpi (1) cmp_end; + cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1}; + jmpi (1) cmp_end; +cmp_a_ge_b: + cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1}; + (f0.0) jmpi (1) cmp_end; + cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1}; + (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1}; +cmp_end: + RETURN {align1}; + +nop; +nop; + +ref_boundary_check: + +/* The left/up coordinate of reference window */ +add (2) TEMP_VAR0.0<1>:w INPUT_ARG0.8<2,2,1>:w INPUT_ARG0.0<2,2,1>:w {align1}; +/* The right/bottom coordinate of reference window */ +add (1) TEMP_VAR0.16<1>:w TEMP_VAR0.0<0,1,0>:w 48:w {align1}; +add (1) TEMP_VAR0.18<1>:w TEMP_VAR0.2<0,1,0>:w 40:w {align1}; + +/* Firstly the MV range is checked */ +mul (2) TEMP_VAR1.16<1>:w INPUT_ARG1.16<2,2,1>:w -1:w {align1}; +add (2) TEMP_VAR1.0<1>:w INPUT_ARG0.8<2,2,1>:w TEMP_VAR1.16<2,2,1>:w {align1}; +add (2) TEMP_VAR1.4<1>:w INPUT_ARG0.8<2,2,1>:w INPUT_ARG1.16<2,2,1>:w {align1}; + +cmp.l.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w TEMP_VAR1.0<0,1,0>:w {align1}; +(f0.0) mov (1) TEMP_VAR0.0<1>:w TEMP_VAR1.0<0,1,0>:w {align1}; +cmp.g.f0.0 (1) null:w TEMP_VAR0.16<0,1,0>:w TEMP_VAR1.4<0,1,0>:w {align1}; +(f0.0) add (1) TEMP_VAR0.0<1>:w TEMP_VAR1.4<0,1,0>:w -48:w {align1}; +cmp.l.f0.0 (1) null:w TEMP_VAR0.2<0,1,0>:w TEMP_VAR1.2<0,1,0>:w {align1}; +(f0.0) mov (1) TEMP_VAR0.2<1>:w TEMP_VAR1.2<0,1,0>:w {align1}; +cmp.g.f0.0 (1) null:w TEMP_VAR0.18<0,1,0>:w TEMP_VAR1.6<0,1,0>:w {align1}; +(f0.0) add (1) TEMP_VAR0.2<1>:w TEMP_VAR1.6<0,1,0>:w -40:w {align1}; + + +x_left_cmp: + cmp.l.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w 0:w {align1}; + (-f0.0) jmpi (1) x_right_cmp; + (f0.0) mov (1) TEMP_VAR0.0<1>:w 0:w {align1}; + jmpi (1) y_top_cmp; +x_right_cmp: + cmp.g.f0.0 (1) null:w TEMP_VAR0.16<0,1,0>:w INPUT_ARG1.0<0,1,0>:w {align1}; + (-f0.0) jmpi (1) y_top_cmp; + (f0.0) add (1) TEMP_VAR0.0<1>:w INPUT_ARG1.0<0,1,0>:w -48:w {align1}; +y_top_cmp: + cmp.l.f0.0 (1) null:w TEMP_VAR0.2<0,1,0>:w 0:w {align1}; + (-f0.0) jmpi (1) y_bottom_cmp; + (f0.0) mov (1) TEMP_VAR0.2<1>:w 0:w {align1}; + jmpi (1) y_bottom_end; +y_bottom_cmp: + cmp.g.f0.0 (1) null:w TEMP_VAR0.18<0,1,0>:w INPUT_ARG1.2<0,1,0>:w {align1}; + (f0.0) add (1) TEMP_VAR0.2<1>:w INPUT_ARG1.2<0,1,0>:w -40:w {align1}; + +y_bottom_end: +mul (2) TEMP_VAR1.0<1>:w INPUT_ARG0.8<2,2,1>:w -1:w {align1}; +add (2) RET_ARG<1>:w TEMP_VAR0.0<2,2,1>:w TEMP_VAR1.0<2,2,1>:w {align1}; + RETURN {align1}; +nop; +nop; + +vme_run_again: + +asr (2) mb_ref_win.0<1>:w mb_mvp_ref.0<2,2,1>:w 2:w {align1}; +mov (2) tmp_reg0.0<1>:w mb_ref_win.0<2,2,1>:w {align1}; +add (2) mb_ref_win.8<1>:w mb_ref_win.0<2,2,1>:w 3:w {align1}; +and (2) mb_ref_win.16<1>:uw mb_ref_win.8<2,2,1>:uw 0xFFFC:uw {align1}; + +cmp.l.f0.0 (1) null:w tmp_reg0.0<0,1,0>:w 0:w {align1}; +(f0.0) mul (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w -1:w {align1}; +cmp.l.f0.0 (1) null:w tmp_reg0.2<0,1,0>:w 0:w {align1}; +(f0.0) mul (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1}; + +cmp.ge.f0.0 (1) null:w tmp_reg0.0<0,1,0>:w 4:w {align1}; +(f0.0) jmpi (1) vme_start; +cmp.ge.f0.0 (1) null:w tmp_reg0.2<0,1,0>:w 4:w {align1}; +(f0.0) jmpi (1) vme_start; + +jmpi (1) vme_done; + +vme_start: + mov (8) tmp_vme_wb0.0<1>:ud vme_wb0.0<8,8,1>:ud {align1}; + mov (8) tmp_vme_wb1.0<1>:ud vme_wb1.0<8,8,1>:ud {align1}; + +/* Calibrate the ref window for MPEG2 */ +mov (1) vme_m0.0<1>:W -16:W {align1}; +mov (1) vme_m0.2<1>:W -12:W {align1}; +mov (1) INPUT_ARG0.8<1>:ud vme_m0.8<0,1,0>:ud {align1}; +add (2) INPUT_ARG0.0<1>:w vme_m0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1}; +mov (8) INPUT_ARG1.0<1>:ud pic_ref.0<8,8,1>:ud {align1}; + +SAVE_RET {align1}; +jmpi (1) ref_boundary_check; +mov (2) vme_m0.0<1>:w RET_ARG<2,2,1>:w {align1}; + +/* m2 */ +mov (8) vme_msg_2<1>:UD 0x0:UD {align1}; + +/* m3 */ +mov (8) vme_msg_3<1>:UD 0x0:UD {align1}; + +/* m4 */ +mov (8) vme_msg_4<1>:UD 0x0:UD {align1}; + + +/* m1 */ +mov (8) vme_m1.0<1>:ud 0x0:ud {align1}; +mov (1) intra_flag<1>:UW 0x0:UW {align1} ; +mov (1) tmp_reg0.0<1>:uw LUMA_INTRA_8x8_DISABLE:uw {align1}; +add (1) tmp_reg0.0<1>:uw tmp_reg0.0<0,1,0>:uw LUMA_INTRA_4x4_DISABLE:uw {align1}; +mov (1) intra_part_mask_ub<1>:UB tmp_reg0.0<0,1,0>:ub {align1}; +/* m1 */ +/* assign MB intra struct from the thread payload*/ +mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1}; + + +/* M0 */ +/* IME search */ +mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR + SUB_PEL_MODE_HALF:UD {align1}; +/* 16x16 Source, 1/2 pixel, harr */ +mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */ + +mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1}; +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; + +/* m1 */ + +mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ; +/* MV num is passed by constant buffer. R4.28 */ +mov (1) vme_m1.4<1>:UB r4.28<0,1,0>:UB {align1}; +add (1) vme_m1.4<1>:UD vme_m1.4<0,1,0>:UD FB_PRUNING_DISABLE:UD {align1}; +mov (1) vme_m1.8<1>:UD START_CENTER + SEARCH_PATH_LEN:UD {align1}; + +/* Bilinear filter */ +mov (1) tmp_reg0.0<1>:uw 0x04:uw {align1}; +add (1) vme_m1.30<1>:ub vme_m1.30<0,1,0>:ub tmp_reg0.0<0,1,0>:ub {align1}; + +/* Set the MV cost center */ +mov (1) vme_m1.16<1>:ud mv_cc_ref.0<0,1,0>:ud {align1}; +mov (1) vme_m1.20<1>:ud mv_cc_ref.0<0,1,0>:ud {align1}; +mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; + + +send (8) + vme_msg_ind + vme_wb + null + vme( + BIND_IDX_VME, + 0, + 0, + VME_MESSAGE_TYPE_INTER + ) + mlen vme_msg_length + rlen vme_inter_wb_length + {align1}; + + +cmp.l.f0.0 (1) null:uw vme_wb0.6<0,1,0>:uw tmp_vme_wb0.6<0,1,0>:uw {align1}; +(f0.0) jmpi (1) vme_done; +mov (8) vme_wb0.0<1>:ud tmp_vme_wb0.0<8,8,1>:ud {align1}; +mov (8) vme_wb1.0<1>:ud tmp_vme_wb1.0<8,8,1>:ud {align1}; + +vme_done: + jmpi (1) vme_mv_output; +nop; +nop; +nop; + diff --git a/src/shaders/vme/mpeg2_inter_ivb.g7a b/src/shaders/vme/mpeg2_inter_ivb.g7a new file mode 100644 index 0000000..bf0cdb3 --- /dev/null +++ b/src/shaders/vme/mpeg2_inter_ivb.g7a @@ -0,0 +1,3 @@ +#include "vme7.inc" +#include "vme7_mpeg2.inc" +#include "mpeg2_inter_ivb.asm" diff --git a/src/shaders/vme/mpeg2_inter_ivb.g7b b/src/shaders/vme/mpeg2_inter_ivb.g7b new file mode 100644 index 0000000..1ffcdd5 --- /dev/null +++ b/src/shaders/vme/mpeg2_inter_ivb.g7b @@ -0,0 +1,308 @@ + { 0x00800001, 0x24000061, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24400061, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24800061, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24c00061, 0x00000000, 0x00000000 }, + { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 }, + { 0x00000001, 0x24540231, 0x00000014, 0x00000000 }, + { 0x00000041, 0x24884521, 0x000000a2, 0x000000a1 }, + { 0x00000040, 0x24884421, 0x00000488, 0x000000a0 }, + { 0x00000041, 0x24880c21, 0x00000488, 0x0000000a }, + { 0x00000001, 0x24940231, 0x00000014, 0x00000000 }, + { 0x00200009, 0x2a402d29, 0x00450098, 0x00040004 }, + { 0x00200001, 0x2a500129, 0x00450094, 0x00000000 }, + { 0x00600001, 0x2ac00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2a800061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2ae00061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2b000061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2b200061, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002d28, 0x000000a6, 0x00040004 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x000000f2 }, + { 0x00600001, 0x2b400061, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002e28, 0x000000a5, 0x00600060 }, + { 0x00210001, 0x2af401ed, 0x00000000, 0xffffffff }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000022 }, + { 0x00000001, 0x2ae000e5, 0x00000000, 0x00000001 }, + { 0x00200001, 0x24000229, 0x004500a0, 0x00000000 }, + { 0x00000040, 0x24003dad, 0x00000400, 0xffffffff }, + { 0x00000041, 0x2b482521, 0x000000a2, 0x00000402 }, + { 0x00000040, 0x2b482421, 0x00000b48, 0x00000400 }, + { 0x00000041, 0x2b480c21, 0x00000b48, 0x0000000a }, + { 0x00000001, 0x2b540231, 0x00000014, 0x00000000 }, + { 0x00000001, 0x2b680021, 0x00000b48, 0x00000000 }, + { 0x00000040, 0x2b480c21, 0x00000b48, 0x00000008 }, + { 0x0a800031, 0x2b801ca1, 0x00000b40, 0x02180003 }, + { 0x01000005, 0x20000c20, 0x00000b80, 0x00002000 }, + { 0x00310001, 0x2af401ed, 0x00000000, 0xffffffff }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000008 }, + { 0x00000001, 0x2b480021, 0x00000b68, 0x00000000 }, + { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02180203 }, + { 0x00200001, 0x2ae40021, 0x00450ba0, 0x00000000 }, + { 0x00000001, 0x2af001ed, 0x00000000, 0x00010001 }, + { 0x00600001, 0x2b400061, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002e28, 0x000000a5, 0x00100010 }, + { 0x00210001, 0x2b1401ed, 0x00000000, 0xffffffff }, + { 0x00210001, 0x2b3401ed, 0x00000000, 0xffffffff }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000076 }, + { 0x00000001, 0x2b0000e5, 0x00000000, 0x00000001 }, + { 0x00200001, 0x24000229, 0x004500a0, 0x00000000 }, + { 0x00000040, 0x24023dad, 0x00000402, 0xffffffff }, + { 0x00000041, 0x2b482521, 0x000000a2, 0x00000402 }, + { 0x00000040, 0x2b482421, 0x00000b48, 0x00000400 }, + { 0x00000041, 0x2b480c21, 0x00000b48, 0x0000000a }, + { 0x00000001, 0x2b540231, 0x00000014, 0x00000000 }, + { 0x00000001, 0x2b680021, 0x00000b48, 0x00000000 }, + { 0x00000040, 0x2b480c21, 0x00000b48, 0x00000008 }, + { 0x0a800031, 0x2b801ca1, 0x00000b40, 0x02180003 }, + { 0x01000005, 0x20000c20, 0x00000b80, 0x00002000 }, + { 0x00310001, 0x2b1401ed, 0x00000000, 0xffffffff }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000008 }, + { 0x00000001, 0x2b480021, 0x00000b68, 0x00000000 }, + { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02180203 }, + { 0x00200001, 0x2b040021, 0x00450ba0, 0x00000000 }, + { 0x00000001, 0x2b1001ed, 0x00000000, 0x00010001 }, + { 0x00600001, 0x2b400061, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002e28, 0x000000a5, 0x00080008 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000026 }, + { 0x00000001, 0x2b2000e5, 0x00000000, 0x00000001 }, + { 0x00200001, 0x24000229, 0x004500a0, 0x00000000 }, + { 0x00000040, 0x24023dad, 0x00000402, 0xffffffff }, + { 0x00000040, 0x24003dad, 0x00000400, 0x00010001 }, + { 0x00000041, 0x2b482521, 0x000000a2, 0x00000402 }, + { 0x00000040, 0x2b482421, 0x00000b48, 0x00000400 }, + { 0x00000041, 0x2b480c21, 0x00000b48, 0x0000000a }, + { 0x00000001, 0x2b540231, 0x00000014, 0x00000000 }, + { 0x00000001, 0x2b680021, 0x00000b48, 0x00000000 }, + { 0x00000040, 0x2b480c21, 0x00000b48, 0x00000008 }, + { 0x0a800031, 0x2b801ca1, 0x00000b40, 0x02180003 }, + { 0x01000005, 0x20000c20, 0x00000b80, 0x00002000 }, + { 0x00310001, 0x2b3401ed, 0x00000000, 0xffffffff }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000032 }, + { 0x00000001, 0x2b480021, 0x00000b68, 0x00000000 }, + { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02180203 }, + { 0x00200001, 0x2b240021, 0x00450ba0, 0x00000000 }, + { 0x00000001, 0x2b3001ed, 0x00000000, 0x00010001 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000028 }, + { 0x00600001, 0x2b400061, 0x00000000, 0x00000000 }, + { 0x01000005, 0x20002e28, 0x000000a5, 0x00040004 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000022 }, + { 0x00000001, 0x2b2000e5, 0x00000000, 0x00000001 }, + { 0x00200001, 0x24000229, 0x004500a0, 0x00000000 }, + { 0x00200040, 0x24003dad, 0x00450400, 0xffffffff }, + { 0x00000041, 0x2b482521, 0x000000a2, 0x00000402 }, + { 0x00000040, 0x2b482421, 0x00000b48, 0x00000400 }, + { 0x00000041, 0x2b480c21, 0x00000b48, 0x0000000a }, + { 0x00000001, 0x2b540231, 0x00000014, 0x00000000 }, + { 0x00000001, 0x2b680021, 0x00000b48, 0x00000000 }, + { 0x00000040, 0x2b480c21, 0x00000b48, 0x00000008 }, + { 0x0a800031, 0x2b801ca1, 0x00000b40, 0x02180003 }, + { 0x01000005, 0x20000c20, 0x00000b80, 0x00002000 }, + { 0x00310001, 0x2b3401ed, 0x00000000, 0xffffffff }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000008 }, + { 0x00000001, 0x2b480021, 0x00000b68, 0x00000000 }, + { 0x0a800031, 0x2ba01cb1, 0x00000b40, 0x02180203 }, + { 0x00200001, 0x2b240021, 0x00450ba0, 0x00000000 }, + { 0x00000001, 0x2b3001ed, 0x00000000, 0x00010001 }, + { 0x00000040, 0x240014a5, 0x00000b00, 0x00000b20 }, + { 0x01000010, 0x20001ca4, 0x00000400, 0x00000000 }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000010 }, + { 0x02000010, 0x20001ca4, 0x00000ae0, 0x00000000 }, + { 0x00010001, 0x2b040021, 0x00000ae4, 0x00000000 }, + { 0x00010001, 0x2b240021, 0x00000ae4, 0x00000000 }, + { 0x00010001, 0x2b140129, 0x00000af4, 0x00000000 }, + { 0x00010001, 0x2b340129, 0x00000af4, 0x00000000 }, + { 0x00010001, 0x2ac00021, 0x00000ae4, 0x00000000 }, + { 0x00110001, 0x2ac00061, 0x00000000, 0x00000000 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000032 }, + { 0x00600001, 0x24000061, 0x00000000, 0x00000000 }, + { 0x01000010, 0x20003da4, 0x00200af4, 0x00000000 }, + { 0x00010040, 0x24003dad, 0x00200400, 0x00010001 }, + { 0x00010001, 0x24040021, 0x00000ae4, 0x00000000 }, + { 0x01000010, 0x20003da4, 0x00200b14, 0x00000000 }, + { 0x00010040, 0x24003dad, 0x00200400, 0x00010001 }, + { 0x00010001, 0x24040021, 0x00000b04, 0x00000000 }, + { 0x01000010, 0x20003da4, 0x00200b34, 0x00000000 }, + { 0x00010040, 0x24003dad, 0x00200400, 0x00010001 }, + { 0x00010001, 0x24040021, 0x00000b24, 0x00000000 }, + { 0x01000010, 0x20003da4, 0x00200400, 0x00010001 }, + { 0x00010001, 0x2ac00021, 0x00000404, 0x00000000 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000018 }, + { 0x00000001, 0x2fa001ad, 0x00000ae4, 0x00000000 }, + { 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 }, + { 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 }, + { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x0000008e }, + { 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 }, + { 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 }, + { 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 }, + { 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 }, + { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000082 }, + { 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 }, + { 0x00200001, 0x2a2001ad, 0x00450ae4, 0x00000000 }, + { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 }, + { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 }, + { 0x00000001, 0x2fa00021, 0x00000440, 0x00000000 }, + { 0x00000001, 0x2fa80021, 0x00000448, 0x00000000 }, + { 0x00600001, 0x2fc00021, 0x008d0a40, 0x00000000 }, + { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000094 }, + { 0x00200001, 0x244001ad, 0x00450fe4, 0x00000000 }, + { 0x00600001, 0x28400061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28600061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x24000169, 0x00000000, 0x00bb00bb }, + { 0x00000001, 0x28640231, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28800061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x28900061, 0x00000000, 0x11111111 }, + { 0x00600001, 0x24600061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x247c0169, 0x00000000, 0x00000000 }, + { 0x00000001, 0x24000169, 0x00000000, 0x00020002 }, + { 0x00000040, 0x24002d29, 0x00000400, 0x00040004 }, + { 0x00000001, 0x247c0231, 0x00000400, 0x00000000 }, + { 0x00000001, 0x247d0231, 0x000000a5, 0x00000000 }, + { 0x00000001, 0x244c0061, 0x00000000, 0x7e201000 }, + { 0x00000001, 0x24560169, 0x00000000, 0x28302830 }, + { 0x00000001, 0x24440021, 0x00000440, 0x00000000 }, + { 0x00200040, 0x244035ad, 0x00450440, 0x00450a90 }, + { 0x00200040, 0x244435ad, 0x00450444, 0x00450a90 }, + { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 }, + { 0x00000001, 0x24600061, 0x00000000, 0x00000002 }, + { 0x00000001, 0x24640231, 0x0000009c, 0x00000000 }, + { 0x00000040, 0x24640c21, 0x00000464, 0x00000000 }, + { 0x00000001, 0x24680061, 0x00000000, 0x30003030 }, + { 0x00000001, 0x24000169, 0x00000000, 0x00040004 }, + { 0x00000040, 0x247e4631, 0x0000047e, 0x00000400 }, + { 0x00000001, 0x24700021, 0x00000a20, 0x00000000 }, + { 0x00000001, 0x24740021, 0x00000a20, 0x00000000 }, + { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 }, + { 0x08600031, 0x21801cbd, 0x00000800, 0x0a686000 }, + { 0x01000005, 0x20002d28, 0x000000a6, 0x00040004 }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000096 }, + { 0x01000005, 0x20000c20, 0x00000180, 0x00002000 }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000012 }, + { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 }, + { 0x00600001, 0x28200021, 0x008d01a0, 0x00000000 }, + { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0203 }, + { 0x00000001, 0x28200021, 0x00000180, 0x00000000 }, + { 0x00000001, 0x28240021, 0x0000019c, 0x00000000 }, + { 0x00000001, 0x28280021, 0x00000544, 0x00000000 }, + { 0x00000001, 0x282c0021, 0x00000180, 0x00000000 }, + { 0x00000001, 0x28300061, 0x00000000, 0x00000025 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x0000000a }, + { 0x00000001, 0x28200021, 0x00000180, 0x00000000 }, + { 0x00000001, 0x28240021, 0x00000190, 0x00000000 }, + { 0x00000001, 0x28280021, 0x00000194, 0x00000000 }, + { 0x00000001, 0x282c0021, 0x00000198, 0x00000000 }, + { 0x00000001, 0x28300061, 0x00000000, 0x00000035 }, + { 0x00000001, 0x28340021, 0x00000488, 0x00000000 }, + { 0x00000040, 0x24880c21, 0x00000488, 0x00000008 }, + { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 }, + { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0203 }, + { 0x0a800031, 0x2b801ca1, 0x00000b40, 0x0219e003 }, + { 0x00600001, 0x2e000021, 0x008d0000, 0x00000000 }, + { 0x07800031, 0x24001ca8, 0x00000e00, 0x82000010 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x04000010, 0x200035ac, 0x00000fa0, 0x00000fa4 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x0000000e }, + { 0x04000010, 0x200035ac, 0x00000fa0, 0x00000fa8 }, + { 0x00010001, 0x2fe401ad, 0x00000fa0, 0x00000000 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000014 }, + { 0x04000010, 0x200035ac, 0x00000fa4, 0x00000fa8 }, + { 0x00010001, 0x2fe401ad, 0x00000fa8, 0x00000000 }, + { 0x00110001, 0x2fe401ad, 0x00000fa4, 0x00000000 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x0000000c }, + { 0x04000010, 0x200035ac, 0x00000fa4, 0x00000fa8 }, + { 0x00010001, 0x2fe401ad, 0x00000fa4, 0x00000000 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000006 }, + { 0x04000010, 0x200035ac, 0x00000fa0, 0x00000fa8 }, + { 0x00010001, 0x2fe401ad, 0x00000fa8, 0x00000000 }, + { 0x00110001, 0x2fe401ad, 0x00000fa0, 0x00000000 }, + { 0x00000001, 0x34000020, 0x00000fe0, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00200040, 0x2f6035ad, 0x00450fa8, 0x00450fa0 }, + { 0x00000040, 0x2f703dad, 0x00000f60, 0x00300030 }, + { 0x00000040, 0x2f723dad, 0x00000f62, 0x00280028 }, + { 0x00200041, 0x2f903dad, 0x00450fd0, 0xffffffff }, + { 0x00200040, 0x2f8035ad, 0x00450fa8, 0x00450f90 }, + { 0x00200040, 0x2f8435ad, 0x00450fa8, 0x00450fd0 }, + { 0x05000010, 0x200035ac, 0x00000f60, 0x00000f80 }, + { 0x00010001, 0x2f6001ad, 0x00000f80, 0x00000000 }, + { 0x03000010, 0x200035ac, 0x00000f70, 0x00000f84 }, + { 0x00010040, 0x2f603dad, 0x00000f84, 0xffd0ffd0 }, + { 0x05000010, 0x200035ac, 0x00000f62, 0x00000f82 }, + { 0x00010001, 0x2f6201ad, 0x00000f82, 0x00000000 }, + { 0x03000010, 0x200035ac, 0x00000f72, 0x00000f86 }, + { 0x00010040, 0x2f623dad, 0x00000f86, 0xffd8ffd8 }, + { 0x05000010, 0x20003dac, 0x00000f60, 0x00000000 }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000004 }, + { 0x00010001, 0x2f6001ed, 0x00000000, 0x00000000 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000006 }, + { 0x03000010, 0x200035ac, 0x00000f70, 0x00000fc0 }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000002 }, + { 0x00010040, 0x2f603dad, 0x00000fc0, 0xffd0ffd0 }, + { 0x05000010, 0x20003dac, 0x00000f62, 0x00000000 }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000004 }, + { 0x00010001, 0x2f6201ed, 0x00000000, 0x00000000 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000004 }, + { 0x03000010, 0x200035ac, 0x00000f72, 0x00000fc2 }, + { 0x00010040, 0x2f623dad, 0x00000fc2, 0xffd8ffd8 }, + { 0x00200041, 0x2f803dad, 0x00450fa8, 0xffffffff }, + { 0x00200040, 0x2fe435ad, 0x00450f60, 0x00450f80 }, + { 0x00000001, 0x34000020, 0x00000fe0, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0020000c, 0x2a803dad, 0x00450ac0, 0x00020002 }, + { 0x00200001, 0x240001ad, 0x00450a80, 0x00000000 }, + { 0x00200040, 0x2a883dad, 0x00450a80, 0x00030003 }, + { 0x00200005, 0x2a902d29, 0x00450a88, 0xfffcfffc }, + { 0x05000010, 0x20003dac, 0x00000400, 0x00000000 }, + { 0x00010041, 0x24003dad, 0x00000400, 0xffffffff }, + { 0x05000010, 0x20003dac, 0x00000402, 0x00000000 }, + { 0x00010041, 0x24023dad, 0x00000402, 0xffffffff }, + { 0x04000010, 0x20003dac, 0x00000400, 0x00040004 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000006 }, + { 0x04000010, 0x20003dac, 0x00000402, 0x00040004 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000002 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x0000004a }, + { 0x00600001, 0x2c800021, 0x008d0180, 0x00000000 }, + { 0x00600001, 0x2ca00021, 0x008d01a0, 0x00000000 }, + { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 }, + { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 }, + { 0x00000001, 0x2fa80021, 0x00000448, 0x00000000 }, + { 0x00200040, 0x2fa035ad, 0x00450440, 0x00450a90 }, + { 0x00600001, 0x2fc00021, 0x008d0a40, 0x00000000 }, + { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, + { 0x00000020, 0x34001c00, 0x00001400, 0xffffff94 }, + { 0x00200001, 0x244001ad, 0x00450fe4, 0x00000000 }, + { 0x00600001, 0x28400061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28600061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28800061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x24600061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x247c0169, 0x00000000, 0x00000000 }, + { 0x00000001, 0x24000169, 0x00000000, 0x00020002 }, + { 0x00000040, 0x24002d29, 0x00000400, 0x00040004 }, + { 0x00000001, 0x247c0231, 0x00000400, 0x00000000 }, + { 0x00000001, 0x247d0231, 0x000000a5, 0x00000000 }, + { 0x00000001, 0x244c0061, 0x00000000, 0x7e201000 }, + { 0x00000001, 0x24560169, 0x00000000, 0x28302830 }, + { 0x00000001, 0x24440021, 0x00000440, 0x00000000 }, + { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 }, + { 0x00000001, 0x24600061, 0x00000000, 0x00000002 }, + { 0x00000001, 0x24640231, 0x0000009c, 0x00000000 }, + { 0x00000040, 0x24640c21, 0x00000464, 0x00000000 }, + { 0x00000001, 0x24680061, 0x00000000, 0x30003030 }, + { 0x00000001, 0x24000169, 0x00000000, 0x00040004 }, + { 0x00000040, 0x247e4631, 0x0000047e, 0x00000400 }, + { 0x00000001, 0x24700021, 0x00000a20, 0x00000000 }, + { 0x00000001, 0x24740021, 0x00000a20, 0x00000000 }, + { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 }, + { 0x08600031, 0x21801cbd, 0x00000800, 0x0a682000 }, + { 0x05000010, 0x20002528, 0x00000186, 0x00000c86 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000004 }, + { 0x00600001, 0x21800021, 0x008d0c80, 0x00000000 }, + { 0x00600001, 0x21a00021, 0x008d0ca0, 0x00000000 }, + { 0x00000020, 0x34001c00, 0x00001400, 0xffffff04 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/src/shaders/vme/vme.inc b/src/shaders/vme/vme.inc index dd7e1bb..992c6b9 100644 --- a/src/shaders/vme/vme.inc +++ b/src/shaders/vme/vme.inc @@ -54,6 +54,7 @@ define(`SEARCH_CTRL_DUAL_RECORD', `0x00000300') define(`SEARCH_CTRL_DUAL_REFERENCE', `0x00000700') define(`REF_REGION_SIZE', `0x2830:UW') +define(`MIN_REF_REGION_SIZE', `0x2020:UW') define(`BI_SUB_MB_PART_MASK', `0x0c000000') define(`MAX_NUM_MV', `0x00000020') @@ -140,6 +141,7 @@ define(`orig_y_ub', `inline_reg0.1') define(`transform_8x8_ub', `inline_reg0.4') define(`slice_edge_ub', `inline_reg0.4') define(`num_macroblocks', `inline_reg0.6') +define(`quality_level_ub', `inline_reg0.8') /* * GRF 6~11 -- reserved @@ -277,3 +279,6 @@ define(`vme_msg_4', `msg_reg4') #endif +define(`DEFAULT_QUALITY_LEVEL', `0x01') +define(`HIGH_QUALITY_LEVEL', `DEFAULT_QUALITY_LEVEL') +define(`LOW_QUALITY_LEVEL', `0x02') diff --git a/src/shaders/vme/vme7.inc b/src/shaders/vme/vme7.inc index 3fa99b7..e9d5864 100644 --- a/src/shaders/vme/vme7.inc +++ b/src/shaders/vme/vme7.inc @@ -54,6 +54,8 @@ define(`SEARCH_CTRL_DUAL_RECORD', `0x00000300') define(`SEARCH_CTRL_DUAL_REFERENCE', `0x00000700') define(`REF_REGION_SIZE', `0x2830:UW') +define(`MIN_REF_REGION_SIZE', `0x2020:UW') +define(`DREF_REGION_SIZE', `0x2020:UW') define(`BI_SUB_MB_PART_MASK', `0x0c000000') define(`MAX_NUM_MV', `0x00000020') @@ -132,6 +134,7 @@ define(`orig_y_ub', `inline_reg0.1') define(`transform_8x8_ub', `inline_reg0.4') define(`input_mb_intra_ub', `inline_reg0.5') define(`num_macroblocks', `inline_reg0.6') +define(`quality_level_ub', `inline_reg0.7') /* * GRF 6~11 -- reserved @@ -291,7 +294,6 @@ define(`mb_mv3', `r96') define(`mb_ref', `r97') define(`mb_ref_win', `r84') -define(`DREF_REGION_SIZE', `0x2020:UW') define(`PRED_L0', `0x0':uw) define(`PRED_L1', `0x1':uw) define(`PRED_BI', `0x2':uw) @@ -317,3 +319,7 @@ define(`INTER_8X16MODE', `0x02') define(`OBR_MESSAGE_FENCE', `7') define(`OBR_MF_NOCOMMIT', `0') define(`OBR_MF_COMMIT', `0x20') + +define(`DEFAULT_QUALITY_LEVEL', `0x01') +define(`HIGH_QUALITY_LEVEL', `DEFAULT_QUALITY_LEVEL') +define(`LOW_QUALITY_LEVEL', `0x02') diff --git a/src/shaders/vme/vme75.inc b/src/shaders/vme/vme75.inc index be49056..97e814f 100644 --- a/src/shaders/vme/vme75.inc +++ b/src/shaders/vme/vme75.inc @@ -59,6 +59,8 @@ define(`SEARCH_CTRL_DUAL_RECORD', `0x00000300') define(`SEARCH_CTRL_DUAL_REFERENCE', `0x00000700') define(`REF_REGION_SIZE', `0x2830:UW') +define(`MIN_REF_REGION_SIZE', `0x2020:UW') +define(`DREF_REGION_SIZE', `0x2020:UW') define(`BI_SUB_MB_PART_MASK', `0x0c000000') define(`MAX_NUM_MV', `0x00000020') @@ -133,6 +135,7 @@ define(`orig_y_ub', `inline_reg0.1') define(`transform_8x8_ub', `inline_reg0.4') define(`input_mb_intra_ub', `inline_reg0.5') define(`num_macroblocks', `inline_reg0.6') +define(`quality_level_ub', `inline_reg0.7') /* * GRF 6~11 -- reserved @@ -311,7 +314,6 @@ define(`mb_mv3', `r96') define(`mb_ref', `r97') define(`mb_ref_win', `r84') -define(`DREF_REGION_SIZE', `0x2020:UW') define(`PRED_L0', `0x0':uw) define(`PRED_L1', `0x1':uw) define(`PRED_BI', `0x2':uw) @@ -337,3 +339,7 @@ define(`INTER_8X16MODE', `0x02') define(`OBR_MESSAGE_FENCE', `7') define(`OBR_MF_NOCOMMIT', `0') define(`OBR_MF_COMMIT', `0x20') + +define(`DEFAULT_QUALITY_LEVEL', `0x01') +define(`HIGH_QUALITY_LEVEL', `DEFAULT_QUALITY_LEVEL') +define(`LOW_QUALITY_LEVEL', `0x02') diff --git a/src/shaders/vme/vme75_mpeg2.inc b/src/shaders/vme/vme75_mpeg2.inc index 9b877ac..b638056 100644 --- a/src/shaders/vme/vme75_mpeg2.inc +++ b/src/shaders/vme/vme75_mpeg2.inc @@ -16,3 +16,18 @@ */ define(`INTER_PART_MASK', `0x7e000000') +define(`mpeg2_ref', `r83') +define(`pic_ref', `r82') +define(`INTRA16_DC_PRED', `0xBB') +/* Cost center ref */ +define(`mv_cc_ref', `r81') +define(`tmp_vme_wb0', `r100') +define(`tmp_vme_wb1', `r101') +define(`tmp_vme_wb2', `r102') +define(`tmp_vme_wb3', `r103') +define(`tmp_vme_wb4', `r104') +define(`tmp_vme_wb5', `r105') +define(`tmp_vme_wb6', `r106') +define(`tmp_vme_wb7', `r107') +define(`tmp_vme_wb8', `r108') +define(`tmp_vme_wb9', `r109') diff --git a/src/shaders/vme/vme7_mpeg2.inc b/src/shaders/vme/vme7_mpeg2.inc index 9b877ac..2d7852a 100644 --- a/src/shaders/vme/vme7_mpeg2.inc +++ b/src/shaders/vme/vme7_mpeg2.inc @@ -16,3 +16,20 @@ */ define(`INTER_PART_MASK', `0x7e000000') +define(`mpeg2_ref', `r83') +define(`pic_ref', `r82') +define(`INTRA16_DC_PRED', `0xBB') + +/* Cost center ref */ +define(`mv_cc_ref', `r81') + +define(`tmp_vme_wb0', `r100') +define(`tmp_vme_wb1', `r101') +define(`tmp_vme_wb2', `r102') +define(`tmp_vme_wb3', `r103') +define(`tmp_vme_wb4', `r104') +define(`tmp_vme_wb5', `r105') +define(`tmp_vme_wb6', `r106') +define(`tmp_vme_wb7', `r107') +define(`tmp_vme_wb8', `r108') +define(`tmp_vme_wb9', `r109') diff --git a/src/shaders/vme/vme8.inc b/src/shaders/vme/vme8.inc new file mode 100644 index 0000000..5b6f469 --- /dev/null +++ b/src/shaders/vme/vme8.inc @@ -0,0 +1,347 @@ +/* + * Copyright © <2010>, Intel Corporation. + * + * This program is licensed under the terms and conditions of the + * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at + * http://www.opensource.org/licenses/eclipse-1.0.php. + * + */ +// Modual name: ME_header.inc for Gen8 +// +// Global symbols define +// + +/* + * Constant + */ +define(`VME_MESSAGE_TYPE_INTER', `1') +define(`VME_MESSAGE_TYPE_INTRA', `2') +define(`VME_MESSAGE_TYPE_MIXED', `3') + +define(`VME_SIC_MESSAGE_TYPE', `1') +define(`VME_IME_MESSAGE_TYPE', `2') +define(`VME_FBR_MESSAGE_TYPE', `3') + +define(`BLOCK_32X1', `0x0000001F') +define(`BLOCK_4X16', `0x000F0003') +define(`BLOCK_8X4', `0x00070003') + +define(`LUMA_INTRA_16x16_DISABLE', `0x1') +define(`LUMA_INTRA_8x8_DISABLE', `0x2') +define(`LUMA_INTRA_4x4_DISABLE', `0x4') + +define(`INTRA_PRED_AVAIL_FLAG_AE', `0x60') +define(`INTRA_PRED_AVAIL_FLAG_B', `0x10') +define(`INTRA_PRED_AVAIL_FLAG_C', `0x8') +define(`INTRA_PRED_AVAIL_FLAG_D', `0x4') + +define(`BIND_IDX_VME', `0') +define(`BIND_IDX_VME_REF0', `1') +define(`BIND_IDX_VME_REF1', `2') +define(`BIND_IDX_OUTPUT', `3') +define(`BIND_IDX_INEP', `4') + +define(`SUB_PEL_MODE_INTEGER', `0x00000000') +define(`SUB_PEL_MODE_HALF', `0x00001000') +define(`SUB_PEL_MODE_QUARTER', `0x00003000') + +define(`INTER_SAD_NONE', `0x00000000') +define(`INTER_SAD_HAAR', `0x00200000') + +define(`INTRA_SAD_NONE', `0x00000000') +define(`INTRA_SAD_HAAR', `0x00800000') + +define(`INTER_PART_MASK', `0x00000000') + +define(`SEARCH_CTRL_SINGLE', `0x00000000') +define(`SEARCH_CTRL_DUAL_START', `0x00000100') +define(`SEARCH_CTRL_DUAL_RECORD', `0x00000300') +define(`SEARCH_CTRL_DUAL_REFERENCE', `0x00000700') + +define(`REF_REGION_SIZE', `0x2830:UW') +define(`MIN_REF_REGION_SIZE', `0x2020:UW') +define(`DREF_REGION_SIZE', `0x2020:UW') + +define(`BI_SUB_MB_PART_MASK', `0x0c000000') +define(`MAX_NUM_MV', `0x00000020') +define(`FB_PRUNING_ENABLE', `0x40000000') + +define(`SEARCH_PATH_LEN', `0x00003030') +define(`START_CENTER', `0x30000000') + +define(`ADAPTIVE_SEARCH_ENABLE', `0x00000002') +define(`INTRA_PREDICTORE_MODE', `0x11111111:UD') + +define(`INTER_VME_OUTPUT_IN_OWS', `10') +define(`INTER_VME_OUTPUT_MV_IN_OWS', `8') + +define(`INTRAMBFLAG_MASK', `0x00002000') +define(`MVSIZE_UW_BASE', `0x0040') +define(`MFC_MV32_BIT_SHIFT', `5') +define(`CBP_DC_YUV_UW', `0x000E') + +define(`DC_HARR_ENABLE', `0x0000') +define(`DC_HARR_DISABLE', `0x0020') + +define(`MV32_BIT_MASK', `0x0020') +define(`MV32_BIT_SHIFT', `5') + +define(`OBW_CACHE_TYPE', `10') + + +define(`OBW_MESSAGE_TYPE', `8') + +define(`OBW_BIND_IDX', `BIND_IDX_OUTPUT') + +define(`OBW_CONTROL_0', `0') /* 1 OWord, low 128 bits */ +define(`OBW_CONTROL_1', `1') /* 1 OWord, high 128 bits */ +define(`OBW_CONTROL_2', `2') /* 2 OWords */ +define(`OBW_CONTROL_3', `3') /* 4 OWords */ +define(`OBW_CONTROL_8', `4') /* 8 OWords */ + +define(`FBR_BME_ENABLE', `0x00000000') +define(`FBR_BME_DISABLE', `0x00040000') + +define(`OBW_WRITE_COMMIT_CATEGORY', `0') /* category on Ivybridge */ + + +define(`OBW_HEADER_PRESENT', `1') + +/* GRF registers + * r0 header + * r1~r4 constant buffer (reserved) + * r5 inline data + * r6~r11 reserved + * r12 write back of VME message + * r13 write back of Oword Block Write + */ +/* + * GRF 0 -- header + */ +define(`thread_id_ub', `r0.20<0,1,0>:UB') /* thread id in payload */ + +/* + * GRF 1~4 -- Constant Buffer (reserved) + */ + +/* + * GRF 5 -- inline data + */ +define(`inline_reg0', `r5') +define(`w_in_mb_uw', `inline_reg0.2') +define(`orig_xy_ub', `inline_reg0.0') +define(`orig_x_ub', `inline_reg0.0') /* in macroblock */ +define(`orig_y_ub', `inline_reg0.1') +define(`transform_8x8_ub', `inline_reg0.4') +define(`input_mb_intra_ub', `inline_reg0.5') +define(`num_macroblocks', `inline_reg0.6') +define(`quality_level_ub', `inline_reg0.7') + +/* + * GRF 6~11 -- reserved + */ + +/* + * GRF 12~15 -- write back for VME message + */ +define(`vme_wb', `r12') +define(`vme_wb0', `r12') +define(`vme_wb1', `r13') +define(`vme_wb2', `r14') +define(`vme_wb3', `r15') +define(`vme_wb4', `r16') +define(`vme_wb5', `r17') +define(`vme_wb6', `r18') +define(`vme_ime_wb7', `r19') +define(`vme_ime_wb8', `r20') +define(`vme_ime_wb9', `r21') +define(`vme_ime_wb10', `r22') + + +/* + * GRF 24 -- write for VME output message + */ +define(`obw_wb', `null<1>:W') +define(`obw_wb_length', `0') + + +/* + * GRF 28~30 -- Intra Neighbor Edge Pixels + */ +define(`INEP_ROW', `r28') +define(`INEP_COL0', `r29') +define(`INEP_COL1', `r30') + +/* + * GRF 48~50 -- Chroma Neighbor Edge Pixels + */ +define(`CHROMA_ROW', `r48') +define(`CHROMA_COL', `r49') + +/* + * temporary registers + */ +define(`tmp_reg0', `r32') +define(`read0_header', `tmp_reg0') +define(`tmp_reg1', `r33') +define(`read1_header', `tmp_reg1') +define(`tmp_reg2', `r34') +define(`vme_m0', `tmp_reg2') +define(`tmp_reg3', `r35') +define(`vme_m1', `tmp_reg3') +define(`intra_flag', `vme_m1.28') +define(`intra_part_mask_ub', `vme_m1.28') +define(`mb_intra_struct_ub', `vme_m1.29') +define(`tmp_reg4', `r36') +define(`obw_m0', `tmp_reg4') +define(`tmp_reg5', `r37') +define(`obw_m1', `tmp_reg5') +define(`tmp_reg6', `r38') +define(`obw_m2', `tmp_reg6') +define(`tmp_reg7', `r39') +define(`obw_m3', `tmp_reg7') +define(`tmp_reg8', `r40') +define(`obw_m4', `tmp_reg8') +define(`tmp_reg9', `r41') +define(`tmp_x_w', `tmp_reg9.0') +define(`tmp_rega', `r42') +define(`tmp_ud0', `tmp_rega.0') +define(`tmp_ud1', `tmp_rega.4') +define(`tmp_ud2', `tmp_rega.8') +define(`tmp_ud3', `tmp_rega.12') +define(`tmp_uw0', `tmp_rega.0') +define(`tmp_uw1', `tmp_rega.2') +define(`tmp_uw2', `tmp_rega.4') +define(`tmp_uw3', `tmp_rega.6') +define(`tmp_uw4', `tmp_rega.8') +define(`tmp_uw5', `tmp_rega.10') +define(`tmp_uw6', `tmp_rega.12') +define(`tmp_uw7', `tmp_rega.14') + +define(`vme_m2', `r43') +define(`vme_m3', `r44') +/* + * MRF registers + */ + +define(`msg_ind', `64') +define(`msg_reg0', `r64') +define(`msg_reg1', `r65') +define(`msg_reg2', `r66') +define(`msg_reg3', `r67') +define(`msg_reg4', `r68') +define(`msg_reg5', `r69') +define(`msg_reg6', `r70') +define(`msg_reg7', `r71') +define(`msg_reg8', `r72') +define(`msg_reg9', `r73') + +define(`ts_msg_ind', `112') +define(`ts_msg_reg0', `r112') +/* + * VME message payload + */ + +define(`vme_intra_wb_length', `1') +define(`vme_wb_length', `7') +define(`sic_vme_msg_length', `8') +define(`fbr_vme_msg_length', `8') +define(`ime_vme_msg_length', `6') + +define(`vme_msg_ind', `msg_ind') +define(`vme_msg_0', `msg_reg0') +define(`vme_msg_1', `msg_reg1') +define(`vme_msg_2', `msg_reg2') + +define(`vme_msg_3', `msg_reg3') +define(`vme_msg_4', `msg_reg4') + + +define(`vme_msg_5', `msg_reg5') +define(`vme_msg_6', `msg_reg6') +define(`vme_msg_7', `msg_reg7') +define(`vme_msg_8', `msg_reg8') +define(`vme_msg_9', `msg_reg9') + +define(`BIND_IDX_CBCR', `6') + + +define(`LUMA_CHROMA_MODE', `0x0') +define(`LUMA_INTRA_MODE', `0x1') +define(`LUMA_INTRA_DISABLE', `0x2') + +define(`RETURN_REG', `r127.0') +define(`RET_ARG', `r127.4') + +/* Now at most two registers are used for input parameter */ +define(`INPUT_ARG0', `r125') +define(`INPUT_ARG1', `r126') + +/* Two temporal registers are used in the function */ +define(`TEMP_VAR0', `r123') +define(`TEMP_VAR1', `r124') + + +define(`OBR_MESSAGE_TYPE', `0') +define(`OBR_CACHE_TYPE', `10') +define(`OBR_BIND_IDX', `BIND_IDX_OUTPUT') + +define(`OBR_CONTROL_0', `0') /* 1 OWord, low 128 bits */ +define(`OBR_CONTROL_1', `1') /* 1 OWord, high 128 bits */ +define(`OBR_CONTROL_2', `2') /* 2 OWords */ +define(`OBR_CONTROL_4', `3') /* 4 OWords */ +define(`OBR_CONTROL_8', `4') /* 8 OWords */ +define(`OBR_WRITE_COMMIT_CATEGORY', `0') /* category on SNB+ for Data port */ +define(`OBR_HEADER_PRESENT', `1') + +define(`mb_hwdep', `r5.6') +define(`MB_AVAIL', `1:d') +define(`MB_PRED_FLAG', `1:w') + +define(`mb_pred_mode', `r85') +define(`mb_mvp_ref', `r86') +define(`mba_result', `r87') +define(`mbb_result', `r88') +define(`mbc_result', `r89') +define(`mb_ind', `90') +define(`mb_msg0', `r90') +define(`mb_wb', `r91') +define(`mb_intra_wb', `r91') +define(`mb_inter_wb', `r92') +define(`mb_mv0', `r93') +define(`mb_mv1', `r94') +define(`mb_mv2', `r95') +define(`mb_mv3', `r96') +define(`mb_ref', `r97') +define(`mb_ref_win', `r84') + +define(`PRED_L0', `0x0':uw) +define(`PRED_L1', `0x1':uw) +define(`PRED_BI', `0x2':uw) +define(`PRED_DIRECT', `0x3':uw) +define(`PRED_MASK', `0x3':uw) + +/* The MAX search len per reference is 16 */ +define(`DSEARCH_PATH_LEN', `0x00001212') +define(`BI_WEIGHT', `0x20':uw) +define(`DSTART_CENTER', `0x00000000') +define(`INTER_MASK', `0x03') +define(`INTER_16X16MODE', `0x0') +define(`INTER_16X8MODE', `0x01') +define(`INTER_8X16MODE', `0x02') +define(`INTER_8X8MODE', `0x03') +define(`INTER_BLOCK0', `0x0') +define(`INTER_BLOCK1', `0x1') +define(`INTER_BLOCK2', `0x2') +define(`INTER_BLOCK3', `0x3') +define(`INTER_16X8MODE', `0x01') +define(`INTER_8X16MODE', `0x02') + + +define(`OBR_MESSAGE_FENCE', `7') +define(`OBR_MF_NOCOMMIT', `0') +define(`OBR_MF_COMMIT', `0x20') + +define(`DEFAULT_QUALITY_LEVEL', `0x01') +define(`HIGH_QUALITY_LEVEL', `DEFAULT_QUALITY_LEVEL') +define(`LOW_QUALITY_LEVEL', `0x02') diff --git a/src/va_backend_compat.h b/src/va_backend_compat.h index f5c9f75..8767153 100644 --- a/src/va_backend_compat.h +++ b/src/va_backend_compat.h @@ -45,4 +45,9 @@ # define VA_DRM_AUTH_CUSTOM VA_DUMMY #endif +#if !VA_CHECK_VERSION(0,35,2) +# define VAProfileH264MultiviewHigh 15 +# define VAProfileH264StereoHigh 16 +#endif + #endif /* VA_BACKEND_COMPAT_H */ |